unix:/ # cat /web/server/etc/squid-report.pl
#!/usr/local/bin/perl -w
#
# $Header: /web/server/etc/RCS/squid-report.pl,v 1.15 2012/01/20 00:23:37 mwd Exp mwd $
#
use strict;
use Getopt::Long;
use MIME::Entity;
use POSIX qw(strftime);
#----------------------------------------------------------------
die "does not take input from a terminal" if ( -t 0 );
#----------------------------------------------------------------
my $VERSION = substr(q$Revision: 1.15 $, 10);
#----------------------------------------------------------------
my $opt_debug = 0;
my $opt_ranklimit = 100;
my $opt_return = ''; # return email address
my $opt_email = ''; # to email address
my $opt_sep = ','; # CSV seperator
my $opt_csv = 1;
my $opt_pdf = 1;
my $opt_report = 1;
# report options
my $opt_mimetypes = 1;
my $opt_fileexts = 1;
my $opt_users = 1;
my $opt_websites = 1;
my $opt_clientips = 1;
GetOptions('report!' => \$opt_report,
'pdf!' => \$opt_pdf,
'csv!' => \$opt_csv,
'clientips!' => \$opt_clientips,
'websites!' => \$opt_websites,
'users!' => \$opt_users,
'fileexts!' => \$opt_fileexts,
'mimetypes!' => \$opt_mimetypes,
'debug|d+' => \$opt_debug,
'email=s' => \$opt_email,
'return=s' => \$opt_return,
'ranklimit=n' => \$opt_ranklimit,
);
#-counters-------------------------------------------------------
my $totalsize = 0;
my $totalurls = 0;
my %mimeurls = ();
my %mimebytes = ();
my %fileexturls = ();
my %fileextbytes = ();
my %userurls = ();
my %userbytes = ();
my %siteurls = ();
my %sitebytes = ();
my %clientipurls = ();
my %clientipbytes = ();
#----------------------------------------------------------------
my $startdate = 0;
my $finishdate = 0;
#----------------------------------------------------------------
while(<>) {
chomp;
if (m!^(\d*?)\.\d+\s+\S+\s+(\S+)\s+\S+\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s+\S+\s+(\S+)$!oig) {
# NB we are only interested in the website name, so converting it to lower case doesn't matter
my ($reqdate,$clientip,$size,$request,$username,$mimetype) = ($1, $2, $3, lc $4, lc $5, lc $6);
$finishdate = $reqdate;
$startdate = $finishdate if ( $startdate == 0 );
print STDERR "$startdate = $finishdate\n" if ($opt_debug);
$size = 0 if ($size < 0);
my $website = $request || '';
$website =~ s!^(http://[^/]+/).*$!$1!oig;
if ( $website =~ s!:443$!/!oig ) {
$website = "https://$website" if ( $website !~ m!^http!io );
}
my $fileext = $1 if ($request =~ m!//.*/.*?(\.[\w]+).?$!oig);
$fileext ||= '-';
$fileext = 'html' if ($fileext eq 'htm');
$mimeurls{$mimetype} ||= 0;
$mimebytes{$mimetype} ||= 0;
$fileexturls{$fileext} ||= 0;
$fileextbytes{$fileext} ||= 0;
$userurls{$username} ||= 0;
$userbytes{$username} ||= 0;
$clientipurls{$clientip} ||= 0;
$clientipbytes{$clientip} ||= 0;
$totalurls++;
$totalsize += $size;
$siteurls{$website}++;
$sitebytes{$website} += $size;
$mimeurls{$mimetype}++;
$mimebytes{$mimetype} += $size;
$fileexturls{$fileext}++;
$fileextbytes{$fileext} += $size;
$userurls{$username}++;
$userbytes{$username} += $size;
$clientipurls{$clientip}++;
$clientipbytes{$clientip} += $size;
} else {
print STDERR "INVALID SQUID LOGFILE LINE: $_\n";
}
}
#----------------------------------------------------------------
#----------------------------------------------------------------
my @reportdata = ();
my @reportfilenames = ();
my $mainsummary = '';
if ($opt_websites) {
# top sites by url count
{
my $csv = '';
my $rank = 0;
my $title = "TOP WEB SITES by URLs";
$mainsummary .= sprintf "$title\n\n";
$mainsummary .= sprintf "%6s %8s %s\n\n", 'Rank', 'URLs', 'Web Site';
$csv .= sprintf "%s%s%s%s%s\n", 'Rank', $opt_sep, 'URLs', $opt_sep, 'Web Site';
foreach my $key ( sort { $siteurls{$b} <=> $siteurls{$a} } keys %siteurls ) {
$rank++;
$mainsummary .= sprintf "%6d %9d %s\n", $rank, $siteurls{$key}, $key;
$csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, $siteurls{$key}, $opt_sep, $key;
last if ($rank >= $opt_ranklimit);
}
push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
push @reportdata, $csv;
}
$mainsummary .= sprintf "\n\n\n";
#----------------------------------------------------------------
# top sites by bytes downloaded
{
my $csv = '';
my $rank = 0;
my $title = "TOP WEB SITES by BYTEs";
$mainsummary .= sprintf "$title\n\n";
$mainsummary .= sprintf "%6s %10s %s\n\n", 'Rank', 'Bytes', 'Web Site';
foreach my $key ( sort { $sitebytes{$b} <=> $sitebytes{$a} } keys %sitebytes ) {
$rank++;
$mainsummary .= sprintf "%6d %s %s\n", $rank, print_bytes($sitebytes{$key}), $key;
$csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, print_bytes($sitebytes{$key}), $opt_sep, $key;
last if ($rank >= $opt_ranklimit);
}
push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
push @reportdata, $csv;
}
$mainsummary .= sprintf "\n\n\n";
}
#----------------------------------------------------------------
if ( $opt_mimetypes ) {
# top mimetypes by url count
{
my $csv = '';
my $rank = 0;
my $title = "TOP MIME-TYPES by URLs";
$mainsummary .= sprintf "$title\n\n";
$mainsummary .= sprintf "%6s %7s %s\n\n", 'Rank', 'URLs', 'MIME-Type';
foreach my $key (sort { $mimeurls{$b} <=> $mimeurls{$a} } keys %mimeurls) {
$rank++;
$mainsummary .= sprintf "%6d %7d %s\n", $rank, $mimeurls{$key}, $key;
$csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, $mimeurls{$key}, $opt_sep, $key;
last if ($rank >= $opt_ranklimit);
}
push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
push @reportdata, $csv;
}
$mainsummary .= sprintf "\n\n\n";
#----------------------------------------------------------------
# top mimetypes by bytes downloaded
{
my $csv = '';
my $rank = 0;
my $title = "TOP MIME-TYPES by BYTES";
$mainsummary .= sprintf "$title\n\n";
$mainsummary .= sprintf "%6s %9s %s\n\n", 'Rank', 'Bytes', 'MIME-Type';
foreach my $key (sort { $mimebytes{$b} <=> $mimebytes{$a} } keys %mimebytes) {
$rank++;
$mainsummary .= sprintf "%6d %s %s\n", $rank, print_bytes($mimebytes{$key}), $key;
$csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, print_bytes($mimebytes{$key}), $opt_sep, $key;
last if ($rank >= $opt_ranklimit);
}
push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
push @reportdata, $csv;
}
$mainsummary .= sprintf "\n\n\n";
}
#----------------------------------------------------------------
if ( $opt_fileexts ) {
# top file extensions by url count
{
my $csv = '';
my $rank = 0;
my $title = "TOP FILE EXTENSIONS by URLs";
$mainsummary .= sprintf "$title\n\n";
$mainsummary .= sprintf "%6s %7s %s\n\n", 'Rank', 'URLs', 'File Extension';
foreach my $key (sort { $fileexturls{$b} <=> $fileexturls{$a} } keys %fileexturls) {
$rank++;
$mainsummary .= sprintf "%6d %7d %s\n", $rank, $fileexturls{$key}, $key;
$csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, $fileexturls{$key}, $opt_sep, $key;
last if ($rank >= $opt_ranklimit);
}
push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
push @reportdata, $csv;
}
$mainsummary .= sprintf "\n\n\n";
#----------------------------------------------------------------
# top file extensions by bytes downloaded
{
my $csv = '';
my $rank = 0;
my $title = "TOP FILE EXTENSIONS by BYTES";
$mainsummary .= sprintf "$title\n\n";
$mainsummary .= sprintf "%6s %9s %s\n\n", 'Rank', 'Bytes', 'File Extension';
foreach my $key (sort { $fileextbytes{$b} <=> $fileextbytes{$a} } keys %fileextbytes) {
$rank++;
$mainsummary .= sprintf "%6d %s %s\n", $rank, print_bytes($fileextbytes{$key}), $key;
$csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, print_bytes($fileextbytes{$key}), $opt_sep, $key;
last if ($rank >= $opt_ranklimit);
}
push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
push @reportdata, $csv;
}
$mainsummary .= sprintf "\n\n\n";
}
#----------------------------------------------------------------
if ($opt_users) {
# top usernames by url count
{
my $csv = '';
my $rank = 0;
my $title = "TOP USERS by URLs";
$mainsummary .= sprintf "$title\n\n";
$mainsummary .= sprintf "%6s %7s %s\n\n", 'Rank', 'URLs', 'Username';
foreach my $key (sort { $userurls{$b} <=> $userurls{$a} } keys %userurls) {
$rank++;
$mainsummary .= sprintf "%6d %7d %s%s\n", $rank, $userurls{$key}, $key, ($key eq '-') ? ' (anonymous user)' : '';
$csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, $userurls{$key}, $opt_sep, $key;
last if ($rank >= $opt_ranklimit);
}
push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
push @reportdata, $csv;
}
$mainsummary .= sprintf "\n\n\n";
#----------------------------------------------------------------
# top users by bytes downloaded
{
my $csv = '';
my $rank = 0;
my $title = "TOP USERS by BYTES";
$mainsummary .= sprintf "$title\n\n";
$mainsummary .= sprintf "%6s %9s %s\n\n", 'Rank', 'Bytes', 'Username';
foreach my $key (sort { $userbytes{$b} <=> $userbytes{$a} } keys %userbytes) {
$rank++;
$mainsummary .= sprintf "%6d %s %s%s\n", $rank, print_bytes($userbytes{$key}), $key, ($key eq '-') ? ' (anonymous user)' : '';
$csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, print_bytes($userbytes{$key}), $opt_sep, $key;
last if ($rank >= $opt_ranklimit);
}
push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
push @reportdata, $csv;
}
$mainsummary .= sprintf "\n\n\n";
}
#----------------------------------------------------------------
if ($opt_clientips) {
# top clientip by url count
{
my $csv = '';
my $rank = 0;
my $title = "TOP CLIENTIPs by URLs";
$mainsummary .= sprintf "$title\n\n";
$mainsummary .= sprintf "%6s %7s %s\n\n", 'Rank', 'URLs', 'Client IP Address';
foreach my $key (sort { $clientipurls{$b} <=> $clientipurls{$a} } keys %clientipurls) {
$rank++;
$mainsummary .= sprintf "%6d %7d %s\n", $rank, $clientipurls{$key}, $key;
$csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, $clientipurls{$key}, $opt_sep, $key;
last if ($rank >= $opt_ranklimit);
}
push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
push @reportdata, $csv;
}
$mainsummary .= sprintf "\n\n\n";
#----------------------------------------------------------------
# top usernames by bytes downloaded
{
my $csv = '';
my $rank = 0;
my $title = "TOP CLIENTIPs by BYTES";
$mainsummary .= sprintf "$title\n\n";
$mainsummary .= sprintf "%6s %9s %s\n\n", 'Rank', 'Bytes', 'Client IP Address';
foreach my $key (sort { $clientipbytes{$b} <=> $clientipbytes{$a} } keys %clientipbytes) {
$rank++;
$mainsummary .= sprintf "%6d %s %s\n", $rank, print_bytes($clientipbytes{$key}), $key;
$csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, print_bytes($clientipbytes{$key}), $opt_sep, $key;
last if ($rank >= $opt_ranklimit);
}
push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
push @reportdata, $csv;
}
$mainsummary .= sprintf "\n\n\n";
}
{
my $subject = makecsvfilename("Internet Report", $startdate, $finishdate );
$subject =~ s!.csv$!!soig;
$subject =~ s!Internet-Report-!Internet Report - !soig;
$subject =~ s!-to-! to !soig;
#----------------------------------------------------------------
#
# craft an email full of CSV
#
if ($opt_email ne '') {
$opt_return ||= $opt_email;
print STDERR "$opt_return ||= $opt_email\n" if ( $opt_debug > 2 );
my $entity = MIME::Entity->build(Type =>"multipart/mixed",
From => $opt_return,
To => $opt_email,
Subject => $subject );
$entity->attach(Data => $mainsummary,
Type => "text/plain");
# loop thru the reports, attaching them
for my $index ( 0 .. $#reportdata ) {
my $content_type = 'text/comma-separated-values';
### Attach stuff to it:
$entity->attach(Data => $reportdata[$index],
Filename => $reportfilenames[$index],
Type => $content_type,
Encoding => "base64");
print STDERR "$reportfilenames[$index]\n$reportdata[$index]\n" if ( $opt_debug > 3 );
}
$entity->print(\*STDOUT);
} else {
print "$subject\n\n$mainsummary\n";
}
}
#----------------------------------------------------------------
# main program ends
#----------------------------------------------------------------
#-S-U-B-R-O-U-T-I-N-E-S------------------------------------------
sub print_bytes {
my $bytes = shift;
my $divideby = 1;
my $bytelabel = 'b ';
if ($bytes > 1024*1024*1024) {
$divideby = 1024*1024*1024;
$bytelabel = 'gb';
} elsif ($bytes > 1024*1024) {
$divideby = 1024*1024;
$bytelabel = 'mb';
} elsif ($bytes > 1024) {
$divideby = 1024;
$bytelabel = 'kb';
}
return sprintf "%7.2f%s", $bytes / $divideby, $bytelabel;
}
sub makecsvfilename {
my $title = shift;
my $startdate = shift;
my $finishdate = shift;
my $finish_at = strftime "%Y-%b-%d %H:%M%p", localtime($finishdate);
my $start_at = strftime "%Y-%b-%d %H:%M%p", localtime($startdate);
# die "'$startdate' to '$finishdate'";
print STDERR qq{$finish_at = strftime "%Y-%b-%d %H:%M", localtime($finishdate);} if ( $opt_debug );
my $csvfilename = sprintf "%s %s to %s.csv", $title, $start_at, $finish_at;
$csvfilename =~ s! +!-!sg;
return $csvfilename;
}
Various snippets of UNIX code that might just be a waste if I keep them to myself. This blog is very much a work in progress and is mainly for myself. Yes, I should probably host code on one of those fancy code hosting web sites and I probably will eventually.
Thursday, April 10, 2014
Perl Script: squid-report.pl
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment