Thursday, April 10, 2014

Perl Script: squid-report.pl

unix:/ # cat /web/server/etc/squid-report.pl
#!/usr/local/bin/perl -w
#
# $Header: /web/server/etc/RCS/squid-report.pl,v 1.15 2012/01/20 00:23:37 mwd Exp mwd $
#

use strict;

use Getopt::Long;
use MIME::Entity;
use POSIX qw(strftime);

#----------------------------------------------------------------

die "does not take input from a terminal" if ( -t 0 );

#----------------------------------------------------------------

my $VERSION = substr(q$Revision: 1.15 $, 10);

#----------------------------------------------------------------

my $opt_debug       = 0;
my $opt_ranklimit   = 100;
my $opt_return      = ''; # return email address
my $opt_email       = ''; # to email address
my $opt_sep         = ','; # CSV seperator
my $opt_csv         = 1;
my $opt_pdf         = 1;
my $opt_report      = 1;

# report options
my $opt_mimetypes   = 1;
my $opt_fileexts    = 1;
my $opt_users       = 1;
my $opt_websites    = 1;
my $opt_clientips   = 1;

GetOptions('report!'         => \$opt_report,
           'pdf!'            => \$opt_pdf,
           'csv!'            => \$opt_csv,
           'clientips!'      => \$opt_clientips,
           'websites!'       => \$opt_websites,
           'users!'          => \$opt_users,
           'fileexts!'       => \$opt_fileexts,
           'mimetypes!'      => \$opt_mimetypes,
           'debug|d+'        => \$opt_debug,
           'email=s'         => \$opt_email,
           'return=s'        => \$opt_return,
           'ranklimit=n'     => \$opt_ranklimit,
           );


#-counters-------------------------------------------------------

my $totalsize = 0;
my $totalurls = 0;

my %mimeurls  = ();
my %mimebytes = ();

my %fileexturls  = ();
my %fileextbytes = ();

my %userurls  = ();
my %userbytes = ();

my %siteurls  = ();
my %sitebytes = ();

my %clientipurls  = ();
my %clientipbytes = ();

#----------------------------------------------------------------

my $startdate = 0;
my $finishdate = 0;

#----------------------------------------------------------------

while(<>) {

    chomp;

    if (m!^(\d*?)\.\d+\s+\S+\s+(\S+)\s+\S+\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s+\S+\s+(\S+)$!oig) {

        # NB we are only interested in the website name, so converting it to lower case doesn't matter
        my ($reqdate,$clientip,$size,$request,$username,$mimetype) = ($1, $2, $3, lc $4, lc $5, lc $6);

        $finishdate = $reqdate;
        $startdate = $finishdate if ( $startdate == 0 );

        print STDERR "$startdate = $finishdate\n" if ($opt_debug);

        $size = 0 if ($size < 0);

        my $website = $request || '';

        $website =~ s!^(http://[^/]+/).*$!$1!oig;
        if ( $website =~ s!:443$!/!oig ) {
            $website = "https://$website" if ( $website !~ m!^http!io );
        }

        my $fileext = $1 if ($request =~ m!//.*/.*?(\.[\w]+).?$!oig);
           $fileext ||= '-';

        $fileext = 'html' if ($fileext eq 'htm');

        $mimeurls{$mimetype}      ||= 0;
        $mimebytes{$mimetype}     ||= 0;
        $fileexturls{$fileext}    ||= 0;
        $fileextbytes{$fileext}   ||= 0;
        $userurls{$username}      ||= 0;
        $userbytes{$username}     ||= 0;
        $clientipurls{$clientip}  ||= 0;
        $clientipbytes{$clientip} ||= 0;

        $totalurls++;
        $totalsize += $size;

        $siteurls{$website}++;
        $sitebytes{$website} += $size;

        $mimeurls{$mimetype}++;
        $mimebytes{$mimetype} += $size;

        $fileexturls{$fileext}++;
        $fileextbytes{$fileext} += $size;

        $userurls{$username}++;
        $userbytes{$username} += $size;

        $clientipurls{$clientip}++;
        $clientipbytes{$clientip} += $size;

    } else {
        print STDERR "INVALID SQUID LOGFILE LINE: $_\n";
    }
}

#----------------------------------------------------------------


#----------------------------------------------------------------


my @reportdata = ();
my @reportfilenames = ();
my $mainsummary = '';

if ($opt_websites) {

    # top sites by url count
    {
        my $csv = '';
        my $rank = 0;
        my $title = "TOP WEB SITES by URLs";
        $mainsummary .= sprintf "$title\n\n";
        $mainsummary .= sprintf "%6s  %8s      %s\n\n", 'Rank', 'URLs', 'Web Site';
        $csv .= sprintf "%s%s%s%s%s\n", 'Rank', $opt_sep, 'URLs', $opt_sep, 'Web Site';
        foreach my $key ( sort { $siteurls{$b} <=> $siteurls{$a} } keys %siteurls ) {
            $rank++;
            $mainsummary .= sprintf "%6d %9d      %s\n", $rank, $siteurls{$key}, $key;
            $csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, $siteurls{$key}, $opt_sep, $key;
            last if ($rank >= $opt_ranklimit);
        }
        push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
        push @reportdata, $csv;
    }

    $mainsummary .= sprintf "\n\n\n";

    #----------------------------------------------------------------

    # top sites by bytes downloaded
    {
        my $csv = '';
        my $rank = 0;
        my $title = "TOP WEB SITES by BYTEs";
        $mainsummary .= sprintf "$title\n\n";
        $mainsummary .= sprintf "%6s  %10s    %s\n\n", 'Rank', 'Bytes', 'Web Site';
        foreach my $key ( sort { $sitebytes{$b} <=> $sitebytes{$a} } keys %sitebytes ) {
            $rank++;
            $mainsummary .= sprintf "%6d   %s    %s\n", $rank, print_bytes($sitebytes{$key}), $key;
            $csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, print_bytes($sitebytes{$key}), $opt_sep, $key;
            last if ($rank >= $opt_ranklimit);
        }
        push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
        push @reportdata, $csv;
    }

    $mainsummary .= sprintf "\n\n\n";

}

#----------------------------------------------------------------

if ( $opt_mimetypes ) {
    # top mimetypes by url count
    {
        my $csv = '';
        my $rank = 0;
        my $title = "TOP MIME-TYPES by URLs";
        $mainsummary .= sprintf "$title\n\n";
        $mainsummary .= sprintf "%6s  %7s    %s\n\n", 'Rank', 'URLs', 'MIME-Type';
        foreach my $key (sort { $mimeurls{$b} <=> $mimeurls{$a} } keys %mimeurls) {
            $rank++;
            $mainsummary .= sprintf "%6d  %7d    %s\n", $rank, $mimeurls{$key}, $key;
            $csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, $mimeurls{$key}, $opt_sep, $key;
            last if ($rank >= $opt_ranklimit);
        }
        push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
        push @reportdata, $csv;
    }

    $mainsummary .= sprintf "\n\n\n";

    #----------------------------------------------------------------

    # top mimetypes by bytes downloaded
    {
        my $csv = '';
        my $rank = 0;
        my $title = "TOP MIME-TYPES by BYTES";
        $mainsummary .= sprintf "$title\n\n";
        $mainsummary .= sprintf "%6s  %9s    %s\n\n", 'Rank', 'Bytes', 'MIME-Type';
        foreach my $key (sort { $mimebytes{$b} <=> $mimebytes{$a} } keys %mimebytes) {
            $rank++;
            $mainsummary .= sprintf "%6d  %s    %s\n", $rank, print_bytes($mimebytes{$key}), $key;
            $csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, print_bytes($mimebytes{$key}), $opt_sep, $key;
            last if ($rank >= $opt_ranklimit);
        }
        push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
        push @reportdata, $csv;
    }

    $mainsummary .= sprintf "\n\n\n";

}

#----------------------------------------------------------------

if ( $opt_fileexts ) {

    # top file extensions by url count
    {
        my $csv = '';
        my $rank = 0;
        my $title = "TOP FILE EXTENSIONS by URLs";
        $mainsummary .= sprintf "$title\n\n";
        $mainsummary .= sprintf "%6s  %7s    %s\n\n", 'Rank', 'URLs', 'File Extension';
        foreach my $key (sort { $fileexturls{$b} <=> $fileexturls{$a} } keys %fileexturls) {
            $rank++;
            $mainsummary .= sprintf "%6d  %7d    %s\n", $rank, $fileexturls{$key}, $key;
            $csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, $fileexturls{$key}, $opt_sep, $key;
            last if ($rank >= $opt_ranklimit);
        }
        push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
        push @reportdata, $csv;
    }

    $mainsummary .= sprintf "\n\n\n";

    #----------------------------------------------------------------

    # top file extensions by bytes downloaded
    {
        my $csv = '';
        my $rank = 0;
        my $title = "TOP FILE EXTENSIONS by BYTES";
        $mainsummary .= sprintf "$title\n\n";
        $mainsummary .= sprintf "%6s  %9s    %s\n\n", 'Rank', 'Bytes', 'File Extension';
        foreach my $key (sort { $fileextbytes{$b} <=> $fileextbytes{$a} } keys %fileextbytes) {
            $rank++;
            $mainsummary .= sprintf "%6d  %s    %s\n", $rank, print_bytes($fileextbytes{$key}), $key;
            $csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, print_bytes($fileextbytes{$key}), $opt_sep, $key;
            last if ($rank >= $opt_ranklimit);
        }
        push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
        push @reportdata, $csv;
    }

    $mainsummary .= sprintf "\n\n\n";
}

#----------------------------------------------------------------
if ($opt_users) {
    # top usernames by url count
    {
        my $csv = '';
        my $rank = 0;
        my $title = "TOP USERS by URLs";
        $mainsummary .= sprintf "$title\n\n";
        $mainsummary .= sprintf "%6s  %7s    %s\n\n", 'Rank', 'URLs', 'Username';
        foreach my $key (sort { $userurls{$b} <=> $userurls{$a} } keys %userurls) {
            $rank++;
            $mainsummary .= sprintf "%6d  %7d    %s%s\n", $rank, $userurls{$key}, $key, ($key eq '-') ? ' (anonymous user)' : '';
            $csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, $userurls{$key}, $opt_sep, $key;
            last if ($rank >= $opt_ranklimit);
        }
        push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
        push @reportdata, $csv;
    }

    $mainsummary .= sprintf "\n\n\n";

    #----------------------------------------------------------------
    # top users by bytes downloaded
    {
        my $csv = '';
        my $rank = 0;
        my $title = "TOP USERS by BYTES";
        $mainsummary .= sprintf "$title\n\n";
        $mainsummary .= sprintf "%6s  %9s    %s\n\n", 'Rank', 'Bytes', 'Username';
        foreach my $key (sort { $userbytes{$b} <=> $userbytes{$a} } keys %userbytes) {
            $rank++;
            $mainsummary .= sprintf "%6d  %s    %s%s\n", $rank, print_bytes($userbytes{$key}), $key, ($key eq '-') ? ' (anonymous user)' : '';
            $csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, print_bytes($userbytes{$key}), $opt_sep, $key;
            last if ($rank >= $opt_ranklimit);
        }
        push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
        push @reportdata, $csv;
    }

    $mainsummary .= sprintf "\n\n\n";
}

#----------------------------------------------------------------
if ($opt_clientips) {
    # top clientip by url count
    {
        my $csv = '';
        my $rank = 0;
        my $title = "TOP CLIENTIPs by URLs";
        $mainsummary .= sprintf "$title\n\n";
        $mainsummary .= sprintf "%6s  %7s    %s\n\n", 'Rank', 'URLs', 'Client IP Address';
        foreach my $key (sort { $clientipurls{$b} <=> $clientipurls{$a} } keys %clientipurls) {
            $rank++;
            $mainsummary .= sprintf "%6d  %7d    %s\n", $rank, $clientipurls{$key}, $key;
            $csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, $clientipurls{$key}, $opt_sep, $key;
            last if ($rank >= $opt_ranklimit);
        }
        push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
        push @reportdata, $csv;
    }

    $mainsummary .= sprintf "\n\n\n";

    #----------------------------------------------------------------

    # top usernames by bytes downloaded
    {
        my $csv = '';
        my $rank = 0;
        my $title = "TOP CLIENTIPs by BYTES";
        $mainsummary .= sprintf "$title\n\n";
        $mainsummary .= sprintf "%6s  %9s    %s\n\n", 'Rank', 'Bytes', 'Client IP Address';
        foreach my $key (sort { $clientipbytes{$b} <=> $clientipbytes{$a} } keys %clientipbytes) {
            $rank++;
            $mainsummary .= sprintf "%6d  %s    %s\n", $rank, print_bytes($clientipbytes{$key}), $key;
            $csv .= sprintf "%d%s%s%s%s\n", $rank, $opt_sep, print_bytes($clientipbytes{$key}), $opt_sep, $key;
            last if ($rank >= $opt_ranklimit);
        }
        push @reportfilenames, makecsvfilename($title,$startdate, $finishdate);
        push @reportdata, $csv;
    }

    $mainsummary .= sprintf "\n\n\n";
}

{
    my $subject = makecsvfilename("Internet Report", $startdate, $finishdate );
       $subject =~ s!.csv$!!soig;
       $subject =~ s!Internet-Report-!Internet Report - !soig;
       $subject =~ s!-to-! to !soig;

    #----------------------------------------------------------------
    #
    # craft an email full of CSV
    #
    if ($opt_email ne '') {

        $opt_return ||= $opt_email;

        print STDERR "$opt_return ||= $opt_email\n" if ( $opt_debug > 2 );

        my $entity = MIME::Entity->build(Type    =>"multipart/mixed",
                                         From    => $opt_return,
                                         To      => $opt_email,
                                         Subject => $subject );

        $entity->attach(Data     => $mainsummary,
                        Type     => "text/plain");


        # loop thru the reports, attaching them
        for my $index ( 0 .. $#reportdata ) {

            my $content_type = 'text/comma-separated-values';

            ### Attach stuff to it:
            $entity->attach(Data     => $reportdata[$index],
                            Filename => $reportfilenames[$index],
                            Type     => $content_type,
                            Encoding => "base64");

            print STDERR "$reportfilenames[$index]\n$reportdata[$index]\n" if ( $opt_debug > 3 );

        }

        $entity->print(\*STDOUT);

    } else {

        print "$subject\n\n$mainsummary\n";

    }
}

#----------------------------------------------------------------
# main program ends
#----------------------------------------------------------------

#-S-U-B-R-O-U-T-I-N-E-S------------------------------------------

sub print_bytes {
    my $bytes = shift;

    my $divideby = 1;
    my $bytelabel = 'b ';
    if ($bytes > 1024*1024*1024) {
        $divideby = 1024*1024*1024;
        $bytelabel = 'gb';
    } elsif ($bytes > 1024*1024) {
        $divideby = 1024*1024;
        $bytelabel = 'mb';
    } elsif ($bytes > 1024) {
        $divideby = 1024;
        $bytelabel = 'kb';
    }
    return sprintf "%7.2f%s", $bytes / $divideby, $bytelabel;
}


sub makecsvfilename {
    my $title = shift;
    my $startdate = shift;
    my $finishdate = shift;

    my $finish_at = strftime "%Y-%b-%d %H:%M%p", localtime($finishdate);
    my $start_at  = strftime "%Y-%b-%d %H:%M%p", localtime($startdate);

    # die "'$startdate' to '$finishdate'";

    print STDERR qq{$finish_at = strftime "%Y-%b-%d %H:%M", localtime($finishdate);} if ( $opt_debug );

    my $csvfilename = sprintf "%s %s to %s.csv", $title, $start_at, $finish_at;

    $csvfilename =~ s! +!-!sg;

    return $csvfilename;

}

No comments:

Post a Comment