File: //ibin/sa-stats.pl
#!/usr/bin/perl
# -------------------------------------------------------------
# file: sa-stats.pl (SARE release)
# date: 2005-01-31
# version: 0.92
# author: Dallas Engelken <dallase@uribl.com>
# desc: Generates Top Spam/Ham Rules fired for SA 3.x installations.
#
# IMPORTANT NOTES
#
# See http://www.rulesemporium.com/programs/sa-stats-1.0.txt for
# a SA 3.1.x version that supports per-domain and per-user.
#
# If your top 5 does not contain URIBL_BLACK, see
# http://www.uribl.com/usage.shtml
# -------------------------------------------------------------
use Getopt::Long;
use Pod::Usage;
my ($LOG_DIR,$FILE,$TOPRULES,$PRINT_TO_WEB,$HELP);
GetOptions (
'logdir|l=s' => \$LOG_DIR,
'filename|f=s' => \$FILE,
'num|n=i' => \$TOPRULES,
'web|w' => \$PRINT_TO_WEB,
'help|h' => \$HELP
);
if ($HELP) {
print "usage: $0 [-l <dir>] [-f <file>] [-n <num>] [-w]\n";
print "\t--logdir|-l <dir>\tDirectory containing spamd logs\n";
print "\t--filename|-f <file>\tFile names or regex to look for in the logdir\n";
print "\t--num|-n <num>\tNumber of top rules to display\n";
print "\t--web|-w\tMake it web friendly output\n";
print "\t--help|-h\tPrints this help\n";
exit;
}
if (!defined $TOPRULES) { $TOPRULES=20 }
if (!defined $LOG_DIR) { $LOG_DIR="/var/log" }
if (!defined $FILE) { $FILE='^maillog$' } # regex
# LEAVE THE REST ALONE UNLESS YOU KNOW WHAT YOU ARE DOING...
################################################################
my $NUM_EMAIL=0; my $NUM_SPAM=0; my $NUM_HAM=0;
my $EMAIL_HITS=0; my $SPAM_HITS=0; my $HAM_HITS=0;
my %SPAM_RULES=(); my %HAM_RULES=();
my $TOTAL_SPAM_RULES=0; my $TOTAL_HAM_RULES=0;
my $ALSPAM=0; my $ALHAM=0; my $ALNO=0;
my $HAM_SEC=0; my $SPAM_SEC=0; my $EMAIL_SEC=0;
my $footer = '</div><div id="footer"><p>CGI by <a href="mailto:dallase@nmgi.com">Dallas Engelken</a></p></div>';
opendir (DIR,"$LOG_DIR");
my @logs = grep /$FILE/i, readdir DIR;
closedir DIR;
foreach my $log (@logs) {
&calcstats($LOG_DIR."/".$log);
}
&summarize();
exit;
#############################
sub calcstats {
my $log=shift;
if (!-e $log || -d $log) {
print "$log not found..\n";
return;
}
open(F,"$log");
while(<F>) {
my ($result,$score,$rules,$time,$size,$learn);
my $spam=0;
if (/.*result:\s+(\w|\.)\s+(\-?\d+)\s+\-\s+(.*)\s+scantime=([\d\.]+),size=(\d+).*autolearn=(\w+)/) {
$result=$1;
$score=$2;
$rules=$3;
$time=$4;
$size=$5;
$learn=$6;
}
else {
next;
}
if ($result eq "Y") {
$SPAM_SEC+=$time;
}
else {
$HAM_SEC+=$time;
}
$EMAIL_SEC+=$time;
$spam=1 if ($result =~ m/Y/);
if ($learn =~ /ham/) {
$ALHAM++;
}
elsif ($learn =~ /spam/) {
$ALSPAM++;
}
else {
$ALNO++;
}
my @tmprules=split(/\,/,$rules);
foreach my $r (@tmprules) {
if ($spam) {
$TOTAL_SPAM_RULES++;
if (defined $SPAM_RULES{$r}) {
$SPAM_RULES{$r}++;
}
else {
$SPAM_RULES{$r}=1;
}
}
else {
$TOTAL_HAM_RULES++;
if (defined $HAM_RULES{$r}) {
$HAM_RULES{$r}++;
}
else {
$HAM_RULES{$r}=1;
}
}
}
if ($spam) {
$NUM_SPAM++;
$SPAM_HITS += $score;
}
else {
$NUM_HAM++;
$HAM_HITS += $score;
}
$NUM_EMAIL++;
$EMAIL_HITS += $score;
}
close(F);
}
sub summarize {
my ($avgspamhits,$avghamhits,$avgemailhits);
print "Content-type: text/html\n\n" if ($PRINT_TO_WEB);
print "<pre>" if ($PRINT_TO_WEB);
if ($NUM_SPAM > 0) {
$avgspamhits= sprintf("%.2f",$SPAM_HITS/$NUM_SPAM);
$avgspamtime= sprintf("%.2f",$SPAM_SEC/$NUM_SPAM);
}
else {
$avgspamhits=0;
$avgspamtime=0;
}
if ($NUM_HAM > 0) {
$avghamhits= sprintf("%.2f",$HAM_HITS/$NUM_HAM);
$avghamtime= sprintf("%.2f",$HAM_SEC/$NUM_HAM);
}
else {
$avghamhits=0;
$avghamtime=0;
}
if ($NUM_EMAIL > 0) {
$avgemailhits= sprintf("%.2f",$EMAIL_HITS/$NUM_EMAIL);
$avgemailtime= sprintf("%.2f",$EMAIL_SEC/$NUM_EMAIL);
}
else {
$avgemailhits=0;
$avgemailtime=0;
}
my $ALTOT=$ALSPAM+$ALHAM;
printf("Email: %8s Autolearn: %5s AvgScore: %6.2f AvgScanTime: %5.2f sec\n",$NUM_EMAIL,$ALTOT,$avgemailhits,$avgemailtime);
printf("Spam: %8s Autolearn: %5s AvgScore: %6.2f AvgScanTime: %5.2f sec\n",$NUM_SPAM,$ALSPAM,$avgspamhits,$avgspamtime);
printf("Ham: %8s Autolearn: %5s AvgScore: %6.2f AvgScanTime: %5.2f sec\n",$NUM_HAM,$ALHAM,$avghamhits,$avghamtime);
&br;
printf "Time Spent Running SA: %7.2f hours\n",$EMAIL_SEC/60/60;
printf "Time Spent Processing Spam: %7.2f hours\n",$SPAM_SEC/60/60;
printf "Time Spent Processing Ham: %7.2f hours\n",$HAM_SEC/60/60;
&br;
my $count=0;
print "TOP SPAM RULES FIRED\n";
&hr;
printf("%4s\t%-24s\t%5s %8s %7s %7s %7s\n","RANK","RULE NAME","COUNT","\%OFMAIL","\%OFSPAM","\%OFHAM");
&hr;
foreach my $key (sort { $SPAM_RULES{$b} <=> $SPAM_RULES{$a} } keys %SPAM_RULES) {
#my $perc1=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_EMAIL)*100);
my $perc1=sprintf("%.2f",(($SPAM_RULES{$key}+$HAM_RULES{$key})/$NUM_EMAIL)*100);
my $perc2=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_SPAM)*100);
my $perc3=sprintf("%.2f",($HAM_RULES{$key}/$NUM_HAM)*100);
printf("%4d\t%-24s\t%5s\t%6.2f\t%6.2f\t%6.2f\n",$count+1,$key,$SPAM_RULES{$key},$perc1,$perc2,$perc3);
$count++;
if ($count >= $TOPRULES && $TOPRULES > 0) {
last;
}
}
&hr;
&br;
my $count=0;
print "TOP HAM RULES FIRED\n";
&hr;
printf("%4s\t%-24s\t%5s %8s %7s %7s %7s\n","RANK","RULE NAME","COUNT","\%OFMAIL","\%OFSPAM","\%OFHAM");
&hr;
foreach my $key (sort { $HAM_RULES{$b} <=> $HAM_RULES{$a} } keys %HAM_RULES) {
#my $perc1=sprintf("%.2f",($HAM_RULES{$key}/$NUM_EMAIL)*100);
my $perc1=sprintf("%.2f",(($SPAM_RULES{$key}+$HAM_RULES{$key})/$NUM_EMAIL)*100);
my $perc2=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_SPAM)*100);
my $perc3=sprintf("%.2f",($HAM_RULES{$key}/$NUM_HAM)*100);
printf("%4d\t%-24s\t%5s\t%6.2f\t%6.2f\t%6.2f\n",$count+1,$key,$HAM_RULES{$key},$perc1,$perc2,$perc3);
$count++;
if ($count >= $TOPRULES && $TOPRULES > 0) {
last;
}
}
&hr;
&br;
print "</pre>\n" if ($PRINT_TO_WEB);
print $footer if ($PRINT_TO_WEB && $footer ne "");
print "\n";
}
#######################
sub hr {
if ($PRINT_TO_WEB) {
print "<hr size=1 width=50% align=left>";
}
else {
print "-" x 70 ."\n";
}
}
#######################
sub br {
if ($PRINT_TO_WEB) {
print "<br>";
}
else {
print "\n";
}
}