#!/usr/bin/ruby

require 'yaml'

# A class for working with the MegeRAID hardware raid controller.
class HardwareRAID
  #
  #  The controllers we've discovered, if any.
  #
  attr_reader :controllers

  #
  # Constructor
  #
  def initialize
    @controllers = nil
  end

  #
  #  Is this hardware present?
  #
  def present?
    result = false

    cmd_output('lspci -v').each do |line|
      result = true if line =~ /megaraid/i
    end

    result
  end

  #
  #  Run a command and capture output.
  #
  def cmd_output(cmd)
    verbose('Running:' + cmd)
    IO.popen(cmd, 'r', &:readlines)
  end

  #
  #  Find the controllers on this machine, if any.
  #
  def find_controllers
    found = []

    cmd = '/opt/MegaRAID/storcli/storcli64 show'
    cmd_output(cmd).each do |line|
      if line =~ /^\s+([0-9]+)\s/
        found.push(Regexp.last_match(1).dup)
        verbose("Found controller: #{Regexp.last_match(1).dup} from: #{line}")
      end
    end
    found
  end

  #
  #  Check the status of the drives
  #
  def drives?
    h = {}

    @controllers = find_controllers if @controllers.nil?

    count = 1

    @controllers.each do |c|
      cmd = "/opt/MegaRAID/storcli/storcli64 /c#{c} show all"
      cmd_output(cmd).each do |line|
        next unless line =~ /^([0-9]+):/
        data = line.split
        h["drive-#{count}"] = data[2]
        count += 1
      end
    end
    h
  end

  #
  # Check the status of the array(s)
  #
  def arrays?
    h = {}

    @controllers = find_controllers if @controllers.nil?

    @controllers.each do |c|
      cmd = "/opt/MegaRAID/storcli/storcli64 /c#{c} show all"
      cmd_output(cmd).each do |line|
        next unless line =~ /^([0-9]+)\/([0-9])/
        data = line.split
        h[data[0]] = data[2]
        verbose("Found virtual disk #{data[0]} with status #{data[2]}")
      end
    end

    h
  end

  #
  #  Get the current state of the system - used for the detail if
  # any alert is raised.
  #
  def state
    txt = ''

    @controllers = find_controllers if @controllers.nil?

    @controllers.each do |c|
      cmd = "/opt/MegaRAID/storcli/storcli64 /c#{c} show all"
      cmd_output(cmd).each do |line|
        txt += line
        txt += "\n"
      end
    end
    txt
  end
end

if __FILE__ == $PROGRAM_NAME

  def verbose(str)
    STDERR.puts(str)
  end

  #
  #  The alerts we'll raise, if any.
  #
  to_raise = []

  detail = ''

  #
  #  Look for errors.
  #
  status = HardwareRAID.new

  #
  #  Check that the hardware is present?
  #
  unless status.present?
    verbose('Software/Hardware not present')
    puts YAML.dump(to_raise)
    exit(0)
  end

  #
  #  Check we have the wrapper
  #
  if !File.exist?('/opt/MegaRAID/storcli/storcli64') && ENV['TEST'].nil?
    h = {}
    h[:id] = 'monitoring-fail'
    h[:summary] = 'missing tool(s)'
    h[:detail] = '/opt/MegaRAID/storcli/storcli64 is not present'
    to_raise.push(h)

    puts YAML.dump(to_raise)
    exit(0)
  end

  #
  #  Check the status of the drives
  #
  drives = status.drives?
  drives.each_pair do |key, val|
    h = {}
    h[:id] = "drive-#{key}"
    h[:summary] = "Drive status (#{key}) is #{val}"

    # We don't want to enumerate bad-states so we just alert on
    # everything that isn't known-good.
    #
    # To that end we ignore:
    #
    #  Onln -> Online
    #  Optl -> Optimal
    #  GHS  -> Global hot spare
    #
    if (val != 'Onln') && (val != 'Optl') && (val != 'GHS')
      verbose "Drive #{key} status is #{val} -- raising alert"

      detail = status.state if detail.empty?
      h[:detail] = detail

      to_raise.push(h)
    else
      verbose "Drive #{key} status is #{val} -- not sending alert"
    end
  end

  #
  #  Check the status of the arrays.
  #
  arrays = status.arrays?
  arrays.each_pair do |key, val|
    h = {}
    h[:id] = "raid-#{key}"
    h[:summary] = "RAID array (#{key}) is #{val}"

    # "Optimal"
    if val != 'Optl'
      detail = status.state if detail.empty?
      h[:detail] = detail
      verbose "RAID array #{key} status is #{val} -- raising alert"
      to_raise.push(h)
    else
      verbose "RAID array #{key} status is #{val} -- not sending alert"
    end
  end

  #
  #  Show the output.
  #
  puts YAML.dump(to_raise)
  exit(0)

end
