#!/usr/bin/ruby

require 'yaml'


#
# Allow access to our common-code.
#
$LOAD_PATH << '/usr/share/bytemark'
$LOAD_PATH << '../lib/bytemark' if ENV['TEST'] && ENV['TEST_PREFIX']

require 'healthcheck/command_output'


class HardwareRAID


  #
  #  The detail we've read from the hpacucli command.
  #
  attr_reader :data


  #
  # Constructor.
  #
  def initialize
    @data = nil
  end


  #
  #  Is this hardware present on this host?
  #
  def present?
    result = false

    Bytemark::Healthcheck::CommandWrapper.run_command('lspci -v').split( "\n" ).each do |line|
      if (line =~ /RAID/i) &&
         (line =~ /controller/i) &&
         (line =~ /Hewlett-Packard/i)
        result = true
      end
    end

    result
  end


  #
  # Get the status via the wrapper.
  #
  # NOTE: We only execute the command once, and cache the output.
  #
  def status()
    if ( @data.nil? )
      @data = Bytemark::Healthcheck::CommandWrapper.run_command('/usr/sbin/hpacucli ctrl all show config detail')
    end

    @data
  end


  #
  # Parse the information about the drives, battery, & etc, via the command
  # execution and the cached output from that command.
  #
  def parse
    raid = Hash.new{|h,k| h[k] = Hash.new}

    raidbattery = Hash.new

    controller = nil
    array = nil
    logicaldrive = nil
    physicaldrive = nil
    thing = nil

    #
    # Read the output of the (possibly mocked) command, and parse
    # it line-by-line
    #
    self.status.split("\n").each do |line|
      line.chomp!
      case line
      when /^ +Slot: (\d+)/
        controller = "slot_"+$1
        array = nil
        logicaldrive = nil
        physicaldrive = nil
        thing = nil

      when /^ +Array: (\S+)$/
        array = $1
        logicaldrive = nil
        physicaldrive = nil
        thing = "array_#{array}"

      when /^ +Logical Drive: (\S+)$/
        logicaldrive = $1
        physicaldrive = nil
        thing  = "ld_#{logicaldrive}"

      when /^ +physicaldrive (\S+)$/
        next if array.nil?
        physicaldrive = $1.strip.gsub(/\W/,'-')
        port  = "array_#{array}_pd_#{physicaldrive}"
        thing = port
      when /^ +Controller Status: (.*)$/
        next if controller.nil?
        raid[controller]["status"] = $1.strip
      when /^ +Cache Board Present: [Tt].*/
        next if controller.nil?
        raidbattery[controller] = Hash.new unless raidbattery.has_key?(controller)
      when /^ +Drive Write Cache: (.*)$/
        next if controller.nil?
        next unless raidbattery.has_key?(controller)
        raidbattery[controller]["write_cache"] = $1.strip
      when /^ +Battery\/Capacitor Status: (.*)$/
        next if controller.nil?
        next unless raidbattery.has_key?(controller)
        raidbattery[controller]["status"] = $1.strip
      when /^ +Cache Status: (.*)$/
        next if controller.nil?
        next unless raidbattery.has_key?(controller)
        raidbattery[controller]["cache_status"] = $1.strip
      when /^ +Status: (.*)$/
        next if thing.nil?
        this_status = $1
        # If this is a physicaldrive add it on.
        if !physicaldrive.nil?
          raid["#{controller}_#{thing}"]['raid_discs'] ||= 0
          raid["#{controller}_#{thing}"]['working_discs'] ||= 0
          raid["#{controller}_#{thing}"]['raid_discs'] += 1
          raid["#{controller}_#{thing}"]['working_discs'] += 1 if this_status == 'OK'
        end
        raid["#{controller}_#{thing}"]['status'] = this_status
      when /^ +Serial Number: (.*)$/
        next if thing.nil?
        raid["#{controller}_#{thing}"]['serial'] = $1.strip
      when /^ +Fault Tolerance: (.*)$/
        next if thing.nil?
        raid["#{controller}_#{thing}"]['raid_level'] = $1.strip
      when /^ +Disk Name: (.*)$/
        next if thing.nil?
        raid["#{controller}_#{thing}"]['device'] = $1.strip
      end
    end

    Hash['raid' => raid, 'raid_battery' => raidbattery]

  end


  #
  #  Check the status of the drives.
  #
  def drives?
    h = {}

    #
    #  Parse the output of the command
    #
    tmp = self.parse()

    #
    #  Walk over the data-structure we created.
    #
    tmp['raid'].each do |drive,data|

      #
      #  We only care about "PD" == Physical Drives
      #
      next unless drive =~ /_pd_/i

      #
      #  Get some data
      #
      status = data['status'] || "unknown"
      serial = data['serial'] || "unknown"

      verbose("Found physical drive, serial:#{serial} status:#{status}")

      h[drive] = status
    end

    #
    #  Return the hash of drive + status
    #
    h
  end


  #
  # Check the status of the array(s).
  #
  def arrays?
    h = {}

    #
    #  Parse the output of the command
    #
    tmp = self.parse()

    #
    #  Walk over the data-structure we created.
    #
    tmp['raid'].each do |array,data|

      #
      #  We only care about "LD" == Logical Drives (i.e. RAID array)
      #
      next unless array =~ /_ld_/i

      #
      #  Get some data
      #
      status = data['status'] || "unknown"
      device = data['device'] || "unknown"

      verbose("Found logical drive #{device}, #{array} status:#{status}")

      h[array] = status
    end

    h
  end



  #
  #  Check the `cache_status` and battery-status
  #
  def battery?

    h = {}

    if ( File.exist?( "/etc/bytemark-healthcheck/local/battery.disable" ) )
      verbose( "Disabling battery health-check." )
      return h
    end

    #
    #  Parse the output of the command
    #
    tmp = self.parse()

    #
    #  Walk over the data-structure we created.
    #
    tmp['raid_battery'].each do |slot,data|

      #
      #  Get some data
      #
      c_status = data['cache_status'] || "unknown"
      b_status = data['status']       || "unknown"

      #
      #  For each battery we want to both log and store the status.
      #
      verbose("Cache_status  :#{c_status}")
      verbose("Battery_status:#{b_status}")

      h["battery_#{slot}"] = b_status
      h["cache_#{slot}"]   = c_status
    end

    h
  end

end




if __FILE__ == $PROGRAM_NAME

  def verbose(str)
    STDERR.puts(str)
  end

  #
  #  Set a sane path
  #
  ENV['PATH'] = '/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin'

  #
  #  The alerts we'll raise, if any.
  #
  to_raise = []

  #
  #  Look for errors.
  #
  status = HardwareRAID.new

  #
  #  Check if this is present?
  #
  unless status.present?
    verbose('Software/Hardware not present')
    puts YAML.dump(to_raise)
    exit(0)
  end

  #
  #  Check the status of the drives
  #
  drives = status.drives?

  drives.each_pair do |key, val|
    h = {}
    h[:id] = "drive-#{key}"
    h[:summary] = "Drive status (#{key}) is #{val}"

    if (val != 'OK')
      h[:detail] = "Drive #{key} status is #{val} -- raising alert"
      verbose(h[:detail])
      to_raise.push(h)
    else
      verbose "Drive #{key} status is #{val} -- not sending alert"
    end
  end

  #
  #  Check the status of the arrays.
  #
  arrays = status.arrays?
  arrays.each_pair do |key, val|
    h = {}
    h[:id] = "raid-#{key}"
    h[:summary] = "RAID array (#{key}) is #{val}"

    if (val != 'OK')
      h[:detail] =  "RAID array #{key} status is #{val} -- raising alert"
      verbose(h[:detail])
      to_raise.push(h)
    else
      verbose "RAID array #{key} status is #{val} -- not sending alert"
    end
  end

  #
  #  Now look at the battery status
  #
  arrays = status.battery?
  arrays.each do |key,val|
    h = {}
    h[:id] = key
    h[:summary] = "#{key} status is #{val}"

    if (val !~ /OK/)
      h[:detail] =  "#{key} is #{val} -- raising alert"
      verbose(h[:detail])
      to_raise.push(h)
    else
      verbose "#{key} is #{val} -- not sending alert"
    end
  end

  #
  #  Show the output.
  #
  puts YAML.dump(to_raise)
  exit(0)

end
