#!/usr/bin/perl -w # # spamdstats - Gives back rudimentary stats based on spamd logs # # Copyright Clint Byrum, 2003,2004. All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # version 0.1 - Initial release # version 0.5 - Suggestion from viz, widening user column for virtual domains # version 0.6 - Adding total messages and spam percentage columns .. ty viz # - Changed name to spamdstats # version 0.7 - Sorting by date # version 0.8 - Sorting users by number of spams (descending) # version 0.9 - Fixing div/zero problems # version 0.10 - I'm an idiot # version 0.11 - Fixing errors when you didn't get any clean messages # version 0.12 - Adding 'max' scores # version 0.13 - downcasing addresses # version 1.4 - Switching to using CVS for version numbers (hence the jump) # Adding bytes stats # shuffling things on top to accomodate for bytes stats # version 1.5 - Ending changelog in file. See CVS Repository for change hist # going forward. # # $Id: spamdstats,v 1.7 2004/06/02 19:48:38 clint Exp $ # Month to Digits our %mtd =( 'Jan' => '00' ,'Feb' => '01' ,'Mar' => '02' ,'Apr' => '03' ,'May' => '04' ,'Jun' => '05' ,'Jul' => '06' ,'Aug' => '07' ,'Sep' => '08' ,'Oct' => '09' ,'Nov' => '10' ,'Dec' => '11' ); # Digits to Month our @dtm = ( 'Jan' ,'Feb' ,'Mar' ,'Apr' ,'May' ,'Jun' ,'Jul' ,'Aug' ,'Sep' ,'Oct' ,'Nov' ,'Dec' ); my @leveltable=(" ", "KB" , "MB" , "GB" , "TB", "PB"); my %cleandates; my %spamdates; my %spamdatebytes; my %userspambytes; my %userspam; my %userclean; my ($month,$day,$loghost,$daemon); my ($word1,$word2,$scores,$user,$proctime); my $date; my $logtime; my $cleanmsgs=0; my $spams=0; my $linecnt=0; my $timespent=0; my $totalscore=0; my $cleanscore=0; my $spamscore=0; my $maxuserlen=0; my $maxspamscore=0; my $spambytes=0; my $bytes=0; my @users=(); while($line=) { ($month,$day,$logtime,$loghost,$daemon,$word1,$word2, $scores,undef,$user,undef,$proctime,undef,$bytes)=split(' ',$line); # print "DEBUG: m=$month d=$day l=$loghost daemon=$daemon\n"; if($daemon =~ /spamd/) { # yay $date="$month $day"; ($score,undef)=split('\/',$scores); $score =~ s/^\(//; if($word1 =~ /clean|identified/) { # sometimes there are more than one user, for these, split them up and run multiple times if($user =~ /;/) { @users=split(/;/,$user); } else { @users=($user); } foreach $user (@users) { chomp $user; ($user) = split(/:/,$user); # strip off UID if(length($user) > $maxuserlen) { $maxuserlen = length($user); } if($user =~ /\@/) { $user=lc($user); # email addresses are case insensitive, so downcase for proper matching } $totalscore += $score; $timespent += $proctime; if($word1 =~ /clean/) { $cleanmsgs=$cleanmsgs + 1; $userclean{$user} += 1; $cleandates{&sortabledate($date)} += 1; $cleanscore += $score; } elsif($word1 =~ /identified/) { $spams=$spams + 1; $userspam{$user} += 1; $userspambytes{$user} += $bytes; $spamdates{&sortabledate($date)} += 1; $spamdatebytes{&sortabledate($date)} += $bytes; $spamscore += $score; $spambytes += $bytes; if($score > $maxspamscore) { $maxspamscore=$score; } } } } } $linecnt=$linecnt + 1; } my $totalmsgs=$spams+$cleanmsgs; if($totalmsgs==0) { print STDERR "You didn't get any messages. Exiting"; exit 0; } my $averageptime=$timespent/$totalmsgs; my $averagescore=$totalscore/$totalmsgs; my $avgcleanscore; if($cleanmsgs == 0) { $avgcleanscore='inf'; } else { $avgcleanscore=$cleanscore/$cleanmsgs; } my $avgspamscore; if($spams!=0) { $avgspamscore=$spamscore/$spams; } else { $avgspamscore="inf"; } my $avgspambytes; if($spams!=0) { $avgspambytes=$spambytes/$spams; } else { $avgspambytes="inf"; } print ("Spams Found : $spams"); printf "\tAvg. Score: %5.3f Max: %5.3f AvgBytes: %7s\n" ,$avgspamscore,$maxspamscore,&formatbytes($avgspambytes); print "Clean Messages: $cleanmsgs"; printf ("\tAvg. Score: %5.3f\n",$avgcleanscore); print "Total Messages: $totalmsgs"; printf ("\tAvg. Score: %5.3f\n",$averagescore); printf("Time spent: %10.3f\n",$timespent); printf("Avg. Time: %5.3f\n",$averageptime); print "\n"; $uheadstr = "%".$maxuserlen."s%10s%10s%10s%9s%8s\n"; $headstr = "%10s%10s%10s%10s%9s%8s\n"; $upfstr = "%".$maxuserlen."s%10d%10d%10d %8.3f%8s\n"; $pfstr = "%10s%10d%10d%10d %8.3f%8s\n"; printf($uheadstr,"User","Spams","Clean","Total","Spam %","Bytes"); printf($uheadstr,'-'x($maxuserlen-1),'-'x9,'-'x9,'-'x9,'-'x8, '-'x7); # List users # XXX will only show users who got spam my $spampct; my @sortedusers = sort { $userspam{$b} <=> $userspam{$a} } keys %userspam; foreach $user (@sortedusers) { #while(($user,$spams) = each %userspam) { $spams=$userspam{$user}; $spams=0 unless defined $spams; $cleanmsgs=$userclean{$user}; $cleanmsgs=0 unless defined $cleanmsgs; $bytes=&formatbytes($userspambytes{$user}); $totalmsgs=$spams+$cleanmsgs; if($totalmsgs != 0) { $spampct=($spams/$totalmsgs)*100; } else { $spampct = 'inf'; } printf($upfstr,$user,$spams,$cleanmsgs,$totalmsgs,$spampct,$bytes); } print "\n"; #print "------------DATES-------------\n"; printf($headstr,"Date","Spams","Clean","Total","Spam %","Bytes"); printf($headstr,'-'x9,'-'x9,'-'x9,'-'x9,'-'x8,'-'x7); my @sorteddates = sort { $a cmp $b } keys %spamdates; # XXX Will only show dates that spam was received on #while(($date,$spams) = each %spamdates) { foreach $date (@sorteddates) { $spams=$spamdates{$date}; $cleanmsgs=$cleandates{$date}; $bytes=$spamdatebytes{$date}; if(!defined($cleanmsgs)) { $cleanmsgs=0; } #printf "$date\: $spams,$cleanmsgs\n"; $totalmsgs=$spams+$cleanmsgs; if($totalmsgs != 0) { $spampct=($spams/$totalmsgs)*100; } else { $spampct = 'inf'; } printf($pfstr,&dateablesort($date),$spams,$cleanmsgs,$totalmsgs ,$spampct,&formatbytes($bytes)); } exit 0; ###### END OF PROGRAM ###### # returns a date suitable for sorting sub sortabledate { my $indate=shift(); my($month,$dom)=split(/ +/,$indate); our %mtd; return $mtd{$month}.sprintf('%02d',$dom); } # reverse the process above sub dateablesort { my $indate=shift(); my $dom=sprintf('%d',substr($indate,2,2)); my $month=sprintf('%d',substr($indate,0,2)); $month=$dtm[$month]; return sprintf('%s %3d',$month,$dom); } sub averagetimes { my @avgarray=@_; my ($total,$counter); foreach $datum (@avgarray) { $total += $datum; $counter++; } if($counter == 0) { return 'inf'; } else { return $total / $counter; } } # recursion anyone? sub formatbytes { my $in=shift(); my $level=shift(); my $out; if(!defined($level)) { $level=1; } my $levelunit=(1024**$level); my $max=$levelunit*999; #print "\nDEBUG: in=$in level=$level max=$max\n"; if($in > $max) { # call myself again, with a higher level, until this if fails $string=&formatbytes($in,$level+1); } else { # found the right level, format and return $label=$leveltable[$level]; $string=sprintf("%6.1f$label",$in/($levelunit)); } return $string; }