#!/usr/bin/ruby

require 'yaml'


#
# Allow access to our common-code.
#
$LOAD_PATH << '/usr/share/bytemark'
$LOAD_PATH << '../lib/bytemark' if ENV['TEST'] && ENV['TEST_PREFIX']

require 'healthcheck/command_output'



class SoftwareRAID
  def status
      # Use of "cat" here is to allow mocking for testing-purposes.
      parse(Bytemark::Healthcheck::CommandWrapper.run_command("cat /proc/mdstat"))
  end

  def parse(text)
    return nil if (text.nil? || text.empty?)
    discs = Hash.new { |h, k| h[k] = {} }
    device = nil
    text.split("\n").each do |line|
      case line
      when /^([a-z]+\d+)\s+:\s+active\s+(\S+)(.*)/
        device = Regexp.last_match(1)
        discs[device]['raid_level'] = Regexp.last_match(2)
        # Record discs here so we can catch failed spares
        discs[device]['discs']      = Regexp.last_match(3)
      when /^.*\[(\d+)\/(\d+)\]\s+\[([U_F]+)\]/
        next if device.nil?
        discs[device]['raid_discs'] = Regexp.last_match(1).to_i
        discs[device]['working_discs'] = Regexp.last_match(2).to_i
        discs[device]['faulty_discs'] = Regexp.last_match(1).to_i - Regexp.last_match(2).to_i


        #
        discs[device]['status'] = (Regexp.last_match(1).to_i == Regexp.last_match(2).to_i ? 'OK' : 'DEGRADED')

        # count the number of "(F)" we see in the devices entry, which
        # will cover the case of failed-spares.
        spare_fail = discs[device]['discs'].scan( /\(F\)/ ).length
        discs[device]['faulty_discs']  += spare_fail

        #
        # If the count is non-zero then we're in a failing-state
        #
        discs[device]['status'] = "FAILING" if ( spare_fail && ( spare_fail > 0 ) )
        device = nil
      end
    end

    discs
  end
end

if __FILE__ ==  $PROGRAM_NAME

  def verbose(str)
    STDERR.puts(str)
  end

  to_raise = []

  #
  #  Look for errors.
  #
  status = SoftwareRAID.new.status
  unless status.nil?

    status.each do |raid, details|
      unless details.respond_to?(:has_key?) && details.key?('status')
        verbose "Skipping #{raid} as no 'status' key defined"
        next
      end

      h = {}
      h[:id] = "raid-#{raid}"
      h[:summary] = "RAID array status (#{raid}) is #{details['status']}"
      h[:detail] = " * #{raid}\n" + details.sort.collect { |k, v| " * #{k}: #{v}" }.join("\n")

      if details['status'] !~ /^(OK|VERIFY(ING|-PAUSED))$/
        verbose "RAID #{raid} status is #{details['status']} -- sending alert"
        to_raise.push(h)
      else
        verbose "RAID #{raid} status is #{details['status']} -- not sending alert"
      end
    end

  end

  #
  #  Show the output.
  #
  puts YAML.dump(to_raise)
  exit(0)
end
