#!/usr/bin/ruby
# NAME
#   symbiosis-monit - Symbiosis Service Monitor
#
# SYNOPSIS
#   symbiosis-monit <directory>
#                    [ --help | -h ] [ --manual | -n ] [ --always-mail | -a ]
#                    [ -m | --mailto <email> ] [ -f | --mailfrom <email> ] 
#                    [ -t | --template <template> ] [ -d | --template-dir <dir> ]
#                    [ -l | --max-load <load> ] [ -F | --force ]
#                    [ -s | --state-db <file> ] [ --verbose | -v ]
#
# OPTIONS
# <directory>                The directory containing the test scripts. If
#                            nothing is specified, /etc/symbiosis/monit.d is
#                            used.
#
# -m, --mailto <email>       Set the to address for the email notification.
#                            The default address is root@localhost.
#
# -f, --mailfrom <email>     Set the from address for the email notification.
#                            The default address is nobody@localhost.
#
# -a, --always-mail          Always send the email summary. If the command is
#                            run interactively, or all the tests pass then no
#                            mail is sent, unless this is specified.
#
# -t, --template <template>  Specify the template to be used in the output. The 
#                            options are sms, email, or verbose. The "sms"
#                            template produces a one-line summary. The "email"
#                            template gives a list of all the tests, and
#                            whether they passed or failed. The "verbose"
#                            template produces a longer listing, with the full
#                            output from each test.
#
# -d, --template-dir <directory>  Specify the directory where templates are
#                                 kept. The default is
#                                 /usr/share/symbiosis/monit.
#
# -s, --state-db <file>      Specify where the state databse should be kept.
#                            The default is /var/lib/symbiosis/monit.db.
#
# -l, --max-load <load>      If the one-minute load average is greater or equal
#                            to this number, do not run the tests, unles
#                            the --force flag is given. Defaults to the number
#                            of CPU cores in a machine, as per /proc/cpuinfo,
#                            or 2 if that file cannot be read.
#
# -F, --force                Run tests, even if symbiosis-monit would otherwise
#                            be disabled. See DISABLING below. 
#
# -v, --verbose              Synonym for "-t verbose".
#
# -h, --help                 Show help.
#
# -n, --manual               Show the full manual.
#
# TESTS
#
# Each of the test scripts in the script directory are expected to be self
# contained. That is to say, each script is responsible for testing, and
# starting/stopping/restarting a service as necessary.
#
# A test will be deemed successful if it exits with a status of EX_OK (0).
#
# This program will retry a script once more if it returns with an exit status
# of EX_TEMPFAIL (75).
#
# Tests that are continually failing will only get their failure reported once.
#
# LOGGING
#
# Any scripts that fail tests will be logged to the daemon syslog, along with
# the last 10 lines of any output they produce.
#
# EXIT STATUS
#
# This program will return
# 
#   0   if none of the tests fail;
#   1   if any of the tests fail;
#   75  if the program has been disabled for any reason (see DISABLING below).
#
# DISABLING
#
# This program may be disabled by creating an empty file with called
# /etc/symbiosis/monit.d/disabled
#
# Also if /var/lib/initscripts/nologin exists, it is assumed that the system is
# still booting, and as such symbiosis-monit will not run.
#
# If the program detects dpkg is running, it will not start. This is to avoid
# restarting stopped services that might have been stopped by dpkg during an
# upgrade.
#
# Finally if the load average is deemed too high, i.e. a number greater than
# the number of CPUs in a machine, this program will not run. See also
# the --max-load option.
#
# To force the program to run regardless, use the --force flag.
#
# AUTHOR
#
# Patrick J Cherry <patrick@bytemark.co.uk>
#
# DATE
#
# 2011-11-25

require 'getoptlong'
require 'socket'

#
#  Ensure we have a sensible path.
#
ENV['PATH']="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/X11R6/bin"

def error(msg)
  STDERR.print "*** Error: #{msg}\n"
  STDERR.print "*** For help, type: #{$0} -h\n"
  exit 1
end

#
#  Defaults.
#
$DEBUG      = ( ENV['DEBUG']     ? true : nil )  unless $DEBUG
$VERBOSE    = ( ENV['VERBOSE']   ? true : nil )  unless $VERBOSE

# Default mailing addresses
user          = (ENV['USER'].to_s.empty? ? "nobody" : ENV['USER'])
hostname      = Socket.gethostname
mailfrom_addr = user+"@"+hostname
# We only send mail if this is set
mailto_addr   = "root@"+hostname
always_mail   = false
send_mail     = false
dir           = "/etc/symbiosis/monit.d/"
level         = "INFO"
state_db_fn   = "/var/lib/symbiosis/monit.db"
template      = "default"
template_dir  = "/usr/share/symbiosis/monit/"
help = manual = false
opts = nil
interactive = true unless ENV['TERM'].nil? or ENV['TERM'].empty?

#
# Work out the maximum load, usually the same as the number of CPU cores.
#
max_load = 0
if File.readable?('/proc/cpuinfo')
  File.readlines('/proc/cpuinfo').each do |l|
    max_load += 1 if l =~ /^processor\s+:/i
  end
else
  max_load = 2
end

force    = false

#
# Options parsing
#
begin
  opts = GetoptLong.new(
                        [ "--always-mail","-a", GetoptLong::NO_ARGUMENT ],
                        [ "--force" ,     "-F", GetoptLong::NO_ARGUMENT ],
                        [ "--help" ,      "-h", GetoptLong::NO_ARGUMENT ],
                        [ "--manual" ,    "-n", GetoptLong::NO_ARGUMENT ],
                        [ "--verbose" ,   "-v", GetoptLong::NO_ARGUMENT ],
                        [ "--max-load",   "-l", GetoptLong::REQUIRED_ARGUMENT ],
                        [ "--mailto",     "-m", GetoptLong::REQUIRED_ARGUMENT ],
                        [ "--mailfrom",   "-f", GetoptLong::REQUIRED_ARGUMENT ],
                        [ "--state-db",   "-s", GetoptLong::REQUIRED_ARGUMENT ],
                        [ "--template",   "-t", GetoptLong::REQUIRED_ARGUMENT ],
                        [ "--template-directory", "-d", GetoptLong::REQUIRED_ARGUMENT ]
                       )
  opts.quiet = true
  opts.each do |opt, arg|
    case opt
      when "--always-mail"
        always_mail = true
      when "--help"
        help = true
      when "--manual"
        manual = true
      when "--mailto"
        mailto_addr = arg
      when "--mailfrom"
        mailfrom_addr = arg
      when "--state-db"
        state_db_fn = arg
      when "--template"
        template = arg
      when "--template-directory"
        template_dir = arg
      when "--max-load"
        max_load = arg.to_f
      when "--force"
        force = true
      when "--verbose"
        template = "verbose"
    end
  end
rescue GetoptLong::InvalidOption => e
 error e.to_s
end

dir = ARGV.pop if ARGV.length > 0

# CAUTION! Kwality kode.
#
if manual or help
  # Open the file, stripping the shebang line
  lines = File.open(__FILE__){|fh| fh.readlines}[1..-1]

  found_synopsis = false

  lines.each do |line|

    line.chomp!
    break if line.empty?

    if help and !found_synopsis
      found_synopsis = (line =~ /^#\s+SYNOPSIS\s*$/)
      next
    end

    puts line[2..-1].to_s

    break if help and found_synopsis and line =~ /^#\s*$/

  end

  exit 0
end

#
# These requires are here such that manpage generation works on build machines.
#
require 'symbiosis/monitor/runner'
require 'symbiosis/utils'
require 'systemexit'
require 'log4r'
require 'log4r/outputter/syslogoutputter'

#
# Exit if we've been disabled
#
if File.exist?(File.join(dir, "disabled"))
  msg = "symbiosis-monit: disabled. "
  if force
    warn msg + "Ignoring as --force given." if $VERBOSE
  else
    warn msg + "Exiting." if $VERBOSE
    exit SystemExit::EX_TEMPFAIL
  end
end

#
# Don't run if the machine is still booting
#
if File.exist?("/var/lib/initscripts/nologin")
  msg = "symbiosis-monit: This machine is still booting. "
  if force
    warn msg + "Ignoring as --force given." if $VERBOSE
  else
    warn msg + "Exiting." if $VERBOSE
    exit SystemExit::EX_TEMPFAIL
  end
end

#
# Acquire lock
#
lock_fh = nil
lock_fn = "/var/lock/symbiosis-monit.lock"

begin
  lock_fh = File.open(lock_fn, "w+")
  Symbiosis::Utils.lock(lock_fh)
rescue Errno::ENOLCK => err
  msg = "symbiosis-monit: #{lock_fn}: #{err.to_s}"
  if force
    warn msg + "Ignoring as --force given." if $VERBOSE
  else
    warn msg + "Exiting." if $VERBOSE
    exit 0
  end
end

begin
  #
  # Open syslog
  #
  logger = Log4r::Logger.new("Symbiosis::Monitor")
  outputter = Log4r::SyslogOutputter.new("symbiosis-monit")
  outputter.level = Log4r::INFO
  logger.outputters << outputter
 
  #
  # Bail out if the load is too high
  #
  if File.readable?("/proc/loadavg")
    load1, *remainder = File.read("/proc/loadavg").split(" ")
    load1 = load1.to_f if load1 =~ /^[\d\.]+$/
    if load1 >= max_load
      msg = "symbiosis-monit: Load too high. " 
      if force
        warn msg + "Ignoring as --force given." if $VERBOSE
      else
        warn msg + "Exiting." if $VERBOSE
        exit SystemExit::EX_TEMPFAIL
      end
    end
  end

  #
  # Bail out if dpkg is running.
  #
  if 0 == Process.uid
    if Symbiosis::Monitor::Check.dpkg_running?
      msg = "symbiosis-monit: dpkg running. " if $VERBOSE
      if force
        warn msg + "Ignoring as --force given." if $VERBOSE
      else
        warn msg + "Exiting." if $VERBOSE
        exit SystemExit::EX_TEMPFAIL
      end
    end
  end

  runner = Symbiosis::Monitor::Runner.new(dir, state_db_fn, template_dir)
  runner.go
  report = runner.report(template)

  if always_mail or (!interactive and runner.should_notify?)
    msg =<<EOF
To: #{mailto_addr}
From: #{mailfrom_addr}
Subject: [symbiosis] Service test report

EOF
    msg += report
    IO.popen("/usr/sbin/sendmail -t","w") do |pipe|
      pipe.print msg
    end
  end

  #
  # Clean up entries in the database older than 30 days.
  #
  runner.clean_db(30)

rescue StandardError => err
  error err.to_s
end

puts report if interactive or $VERBOSE or $DEBUG

#
# Release the lock and close.
#
begin
  Symbiosis::Utils.unlock(lock_fh)
  lock_fh.close unless lock_fh.nil? or lock_fh.closed?
rescue SystemCallError => err
  warn "symbiosis-monit: Failed to release lock on #{lock_fn}: #{err.to_s}" if $VERBOSE
  puts err.backtrace.join("\n") if $DEBUG
end

# Return the code
exit (runner.failed_tests.length > 0 ? 1 : 0)

