#!/usr/bin/ruby
#
# NAME
#
#  symbiosis-httpd-generate-stats - Generate statistics for domains.
#
# SYNOPSIS
#
#  symbiosis-httpd-generate-stats [ --template | -t <filename> ] [ --force | -f ]
#                           [ --prefix | -p <directory> ] [ -h | --help ]
#                           [-m | --manual] [ -v | --verbose ]
#
# OPTIONS
#
#  -t, --template <file>   Set the webalizer config template file. Defaults to
#                          /etc/symbiosis/apache.d/webalizer.conf.erb.
#
#  -f, --force             Force regeneration of the webalizer configuration
#                          snippet for all domains.
#
#  -p, --prefix <directory>  Prefix directory, defaults to /srv.
#
#  -h, --help              Show a help message, and exit.
#
#  -m, --manual            Show this manual, and exit.
#
#  -v, --verbose           Show verbose errors
#
# USAGE
#
# This script is designed to be invoked once per day and update the
# web-visible access statistics for each hosted Symbiosis domain.
#
# The script is assumed to be invoked once per day, via /etc/cron.daily/.
#
# AUTHOR
#
#   Steve Kemp <steve@bytemark.co.uk>
#


#
# Standard ruby
#
require 'getoptlong'
require 'symbiosis/utils'



#
#  The options set by the command line.
#
template     = '/etc/symbiosis/apache.d/webalizer.conf.erb'
help         = false
manual       = false
$VERBOSE     = false
$FORCE       = false
prefix       = "/srv"

opts = GetoptLong.new(
                      [ '--help',       '-h', GetoptLong::NO_ARGUMENT ],
                      [ '--manual',     '-m', GetoptLong::NO_ARGUMENT ],
                      [ '--verbose',    '-v', GetoptLong::NO_ARGUMENT ],
                      [ '--force',      '-f', GetoptLong::NO_ARGUMENT ],
                      [ '--prefix',     '-p', GetoptLong::REQUIRED_ARGUMENT ],
                      [ '--template',   '-t', GetoptLong::REQUIRED_ARGUMENT ]
                      )


begin
  opts.each do |opt,arg|
    case opt
    when '--help'
      help = true
    when '--manual'
      manual = true
    when '--verbose'
      $VERBOSE = true
    when '--force'
      $FORCE = true
    when '--template'
      template = arg
    when '--prefix'
      prefix = arg
    end
  end
rescue => err
  # any errors, show the help
  warn err.to_s
  help = true
end


#
# Show the manual, or the help
#
Symbiosis::Utils.show_usage( __FILE__ ) if  help
Symbiosis::Utils.show_manual( __FILE__ ) if manual

#
#  If either happened we can exit.
#
if ( help or manual )
  exit 0
end


#
# Symbiosis libraries -- required here so they're not needed during the build
# process for manpage generation.
#
require 'symbiosis/domains'
require 'symbiosis/domain/http'
require 'symbiosis/config_files/webalizer'

def verbose(s) ; puts s if $VERBOSE ; end

#
#  Potentially we process each domain.
#
Symbiosis::Domains.each(prefix) do |domain|
  verbose "Considering domain: #{domain}"

  #
  # Are statistics disabled for this domain?
  #
  unless ( domain.should_have_stats? )
    verbose "\tSkipping as stats have been disabled."
    next
  end

  #
  # Skip symlinks
  #
  if ( domain.is_alias? )
    verbose "\tSkipping as it is an symlink to #{domain.directory}."
    next
  end

  #
  # Create a configuration file, if one wasn't found.
  #
  webalizer_conf = File.join( domain.config_dir, "webalizer.conf" )

  config          = Symbiosis::ConfigFiles::Webalizer.new(webalizer_conf, "#")
  config.template = template
  config.domain   = domain

  write_config = false

  #
  # This is taken from symbiosis-create-sites
  #
  if ( $FORCE )
    verbose "\tForcing re-creation of configuration due to --force."
    write_config = true

  elsif config.exists?

    if config.changed?
      verbose "\tNot updating configuration, as it has been edited by hand."
      write_config = false

    elsif config.outdated?
      verbose "\tRe-creating configuration as it is out of date."
      write_config = true

    else
      verbose "\tConfiguring already present and up-to date."
      write_config = false

    end

  else
    verbose "\tConfiguring site for the first time"
    write_config = true

  end

  #
  # Write the config if we need to.
  #
  if write_config
    begin
      #
      # Make sure /srv/domain/config exists
      #
      Symbiosis::Utils.mkdir_p(domain.config_dir) 

      #
      # And write
      #
      config.write
    rescue StandardError => ex
      verbose "\tCaught #{ex.to_s} when writing configuration"
      next
    end
  end


  #
  # OK now we need to look for a logfile, or two.
  #
  #  access.log     for HTTP accesses.
  #  ssl_access.log for HTTPS accesses.
  #
  #
  # Firstly check when webalizer was last run.
  #
  last_run = nil

  #
  # Read webalizer config to find where the history file is kept, so we can see when
  #
  output_dir   = domain.config_dir
  history_name = "webalizer.hist"

  File.open(config.filename) do |fh|
    while line = fh.gets
      case line.chomp
        when /^ *HistoryName +([^ ]+)/
          history_name = $1
        when /^ *OutputDir +([^ ]+)/
          output_dir = $1
      end
    end
  end

  #
  # Make sure the output dir is absolute
  #
  unless output_dir =~ /^\//
    output_dir = File.join(domain.config_dir, output_dir)
  end

  #
  # Work out the history file name
  #
  if history_name =~ /^\//
    history_file = history_name
  else
    history_file = File.join(output_dir, history_name)
  end

  #
  # Now stat() it -- webalizer updates this file with each run.
  #
  if File.exist?(history_file)
    last_run = File.stat(history_file).mtime
  end

  #
  # Now select all the log files, and sort based on mtime.
  #
  process = Dir.glob(File.join(domain.log_dir,'{ssl_access,access}.log.*')).reject do |log|
    last_run and File.stat(log).mtime < last_run
  end.sort{|a,b| File.stat(a).mtime <=> File.stat(b).mtime}

  #
  # If we didn't find a logfile we're done.
  #
  if ( process.empty? )
    verbose "\tSkipping this domain, no suitable logfiles found"
    next
  end

  #
  # Iterate through the stats directory until we find a directory that exists,
  #
  cdir = output_dir
  while cdir != "/"
    break if File.exist?(cdir)
    cdir, odir = File.split(cdir)
  end

  #
  # Now make sure that this directory is owned by the domain's owner.
  #
  unless File.stat(cdir).uid == domain.uid
    if File.symlink?(cdir)
      verbose "\tSkipping as #{File.readlink(cdir)} (symlinked from #{cdir}) is not owned by #{domain.user}."
    else
      verbose "\tSkipping as #{cdir} is not owned by #{domain.user}."
    end
    next
  end

  #
  # If we don't have a statistics directory then we need to create it, and we
  # need to create it with the domains' UID/GID.  This method will not error if
  # the directory already exists.
  #
  domain.create_dir( output_dir )

  # Remove old output
  verbose "\tRemoving output from #{output_dir} older than 365 days"
  system( "sudo -u '\##{domain.uid}' find #{output_dir} -ctime +365 -delete" )

  #
  #  Now process each logfile.
  #
  process.each do |stinking_log_file|
    puts "\tRunning webalizer against #{stinking_log_file}" if $VERBOSE

    #
    #  Now run it under sudo, as the user ID of the domain.
    #
    quiet = ($VERBOSE ? "-d" : "-Q")
    system( "cd #{domain.config_dir} && sudo -u '\##{domain.uid}' -- webalizer -c #{webalizer_conf} #{quiet} #{stinking_log_file}" )
  end
end

