#!/usr/bin/perl

########################################################
# Please file all bug reports, patches, and feature
# requests under:
#      https://sourceforge.net/p/logwatch/_list/tickets
# Help requests and discusion can be filed under:
#      https://sourceforge.net/p/logwatch/discussion/
########################################################

##########################################################################
#
# Logwatch service for snort log
#
# Processes all messages and summarizes them
# Each message is given with a timestamp and RMS
#
########################################################
# (C) 2023 by MigOps Inc - https://www.migops.com/
# written by Gilles Darold.
#
########################################################
## Covered under the included MIT/X-Consortium License:
##    http://www.opensource.org/licenses/mit-license.php
## All modifications and contributions by other persons to
## this script are assumed to have been donated to the
## Logwatch project and thus assume the above copyright
## and licensing terms.  If you want to make contributions
## under your own copyright or a different license this
## must be explicitly stated in the contribution and the
## Logwatch project reserves the right to not accept such
## contributions.  If you have made significant
## contributions to this script and want to claim
## copyright please contact logwatch-devel@lists.sourceforge.net.
########################################################

use strict;
use Logwatch ':dates';
use Time::Local;
use POSIX qw(strftime);

# Allow timestamp from two different logfile format: syslog and stderr
my $date_format1 = '%m/%d-%H:%M:%S';
my $filter1 = TimeFilter($date_format1);


# Allow summarization of WARNING and HINT too if wanted
my $detail = exists $ENV{'LOGWATCH_DETAIL_LEVEL'} ? $ENV{'LOGWATCH_DETAIL_LEVEL'} : 0;

# Used to replace the month trigram into the syslog timestamp
my %month2num = ( Jan => 0, Feb => 1, Mar => 2, Apr => 3,
		  May => 4, Jun => 5, Jul => 6, Aug => 7,
		  Sep => 8, Oct => 9, Nov => 10, Dec => 11 );

# Array of the relevant lines in the log file.
# First element: type of event
# Second element: matching regexp ($1 should contain the message)
# Third element: anonymous hash ref (stores message counts)
my @message_categories = (
	['Priority 5', qr/\[\*\*\] \[\d+:\d+:\d+\] (.*?) \[\*\*\](?: \[(Classification: [^\]]+)\])? \[Priority: 5\] (?:\{([^\}]+)\})?/o, {}],
	['Priority 4', qr/\[\*\*\] \[\d+:\d+:\d+\] (.*?) \[\*\*\](?: \[(Classification: [^\]]+)\])? \[Priority: 4\] (?:\{([^\}]+)\})?/o, {}],
	['Priority 3', qr/\[\*\*\] \[\d+:\d+:\d+\] (.*?) \[\*\*\](?: \[(Classification: [^\]]+)\])? \[Priority: 3\] (?:\{([^\}]+)\})?/o, {}],
);

if ($detail)
{
	# Add more log information
	push(@message_categories,
		['Priority 2', qr/\[\*\*\] \[\d+:\d+:\d+\] (.*?) \[\*\*\](?: \[(Classification: [^\]]+)\])? \[Priority: 2\] (?:\{([^\}]+)\})?/o, {}],
	);
	if ($detail > 5)
	{
		push(@message_categories,
			['Priority 1', qr/\[\*\*\] \[\d+:\d+:\d+\] (.*?) \[\*\*\](?: \[(Classification: [^\]]+)\])? \[Priority: 1\] (?:\{([^\}]+)\})?/o, {}],
		);
	}
}

# Set the current year as syslog don't have this information.
my $cur_year = (localtime(time))[5];

# Parse messages from stdin
while (my $line = <>)
{
    # skipping messages that are not within the requested range
    next unless $line =~ /^($filter1)/o;

    my $datetime = $1;
    my $time = '';
    # Date/time format differ following the log_destination (stderr or syslog)
    if ($datetime =~ /(\d{2})\/(\d{2})-(\d+):(\d+):(\d+)/) {
	$time = timelocal($5, $4, $3, $2, $1-1, $cur_year);
    }

    foreach my $cur_cat (@message_categories)
    {
	if ($line =~ /$cur_cat->[1]/)
	{
	    my $msgs = $cur_cat->[2];
	    my $rule = $1;
	    my $class = $2;
	    my $priority = $3;
	    my $key = "$rule" || $priority;
	    $key .= ", $priority" if ($priority && $rule);
	    $msgs->{$key} = {
			   count => '0',
			   first_occurrence => $time,
			   sum => 0, 
			   sqrsum => 0
		} unless exists $msgs->{$key};

	    $msgs->{$key}->{'count'}++;

	    # summing up timestamps and squares of timestamps
	    # in order to calculate the rms
	    # using first occurrence of message as offset in calculation to 
	    # prevent an integer overflow
	    $msgs->{$key}->{'sum'} += $time - $msgs->{$key}->{'first_occurrence'};
	    $msgs->{$key}->{'sqrsum'} += ($time - $msgs->{$key}->{'first_occurrence'}) ** 2;
	    last;
	}
    }
}


# generating summary
foreach my $cur_cat (@message_categories) 
{
    # skipping non-requested message types
    next unless keys %{$cur_cat->[2]};

    my ($name, undef, $msgs) = @{$cur_cat};
    print $name, ":\n";
    print '-' x (length($name)+1), "\n";
    my $last_count = 0;

    # sorting messages by count
    my @sorted_msgs = sort { $msgs->{$b}->{'count'} <=> $msgs->{$a}->{'count'} } keys %{$msgs};

    foreach my $msg (@sorted_msgs)
    {
	# grouping messages by number of occurrence
	print "\n", $msgs->{$msg}->{'count'}, " times:\n" unless $last_count == $msgs->{$msg}->{'count'};   
	my $rms = 0;

	# printing timestamp
        print '[';

	if($msgs->{$msg}->{'count'} > 1) {
	    # calculating rms
	    $rms = int(sqrt(
			   ($msgs->{$msg}->{'count'} * 
			    $msgs->{$msg}->{'sqrsum'} - 
			    $msgs->{$msg}->{'sum'}) / 
			   ($msgs->{$msg}->{'count'} * 
			    ($msgs->{$msg}->{'count'} - 1))));

	    print strftime($date_format1, localtime($msgs->{$msg}->{'first_occurrence'}+int($rms/2)));

	    print ' +/-';
	    
	    # printing rms
	    if($rms > 86400) {
		print int($rms/86400) , ' day(s)';
	    } elsif($rms > 3600) {
		print int($rms/3600) , ' hour(s)';
	    } elsif($rms > 60) {
		print int($rms/60) , ' minute(s)';
	    } else {
		print $rms, ' seconds';
	    }
	}
	else
	{
	    # we have got this message a single time
	    print strftime($date_format1, localtime($msgs->{$msg}->{'first_occurrence'}));
	}
	   
	print '] ', $msg, "\n";	
	$last_count = $msgs->{$msg}->{'count'};
    }

    print "\n";
}

