#!/usr/bin/perl -w
use strict;
use IO::File;

my %mnum= (
    Jan=>"01", Feb=>"02", Mar=>"03", Apr=>"04", May=>"05", Jun=>"06",
    Jul=>"07", Aug=>"08", Sep=>"09", Oct=>"10", Nov=>"11", Dec=>"12"
);

my %aliases= (
Jip=>'r.g.meijer@s... jip@w... jip_roel@y...',
stefan=>'stefan_koster@h... s.koster@k... stefan@w... io470662@s...',
Toby=>'tobias_oberdorfer@h... t.f.w.oberdorfer@w... das_heimatmensch@h...',
Junior=>'van.der.hee@c... rogier@v... rvanderh@r...',
Lotte=>'lopheij@b... lophey@t... l.ophey@c... oppietwo@h...',
Rogier=>'r.dehaan@b... rogierdehaan@h... rogier85@h... rogiero@g...',
Gul=>'gulmans@g... rgu@p... gul@c... r.gulmans@d... ronald.gulmans@i... ronald.gulmans@a... rgulmans@s... ronald.gulmans@p...',
Chris=>'christiaan.basting@g... chrisbasting@m... christiaan.basting@v... christiaan.basting@d...',
An3s=>'an3s@g... a.schukking@a... an3s@n... an3s@c... an3s@h... andries@k... andries@s... kampstransport@h...',
Robin=>'yahoo@p... robin@k... de.kluizenaar@l... orthelius@y... robink@h... rketelaars@e...',
Jarjan=>'jjf@j... jarjan@e... jarjan@d... jarjan_fisher@h...',
Nop=>'koster.nb@w... nkoster@h... nkoster@d... internop@x...',
JJ=>'jessica@c... j.m.jansen@i...',
HWH=>'huugken@p... hwh@c... hwh@s... huugken@w... hwh@p... hwhogeweg@p... hhogeweg@s... h.w.hogeweg@s... hogew059@w... hogeweg@s... hogeweg@d... h.w.hogeweg@p... ct352054@s... .ct352054.student.citg.tudelft@s... .ct352054.student.tudelft.nl@s... 352054@s... hogew059@',
Sef=>'sef_teerink@h... sef.teerink@o... sef.teerink@l... s.j.w.p.teerink@b... s.j.w.p.teerink@g...',
Tessa=>'tessa@c... tessa14@x... tessa.koster@w... t.koster@x... t.koster@t...',
Marcel=>'marcel@c... marcel-spam@w... marcel@r... marcel@a... marcelammerlaan@h... marcel@r...)',
Patrick=>'patrickpoland@g... patrieknl@y...',
Marco=>'marcoh@c... marcoh@m...',
Nico=>'noky@g... noki@r... noki@c... amfi@w...',
Rene=>'elmo@c... intron@x... elmo@w...',
Teo=>'tla@p... theo.landgraf@p... theo.landgraaf@e... j.landgraf@t... t.landgraf@k... mafti@x... mafti@h... s499435@d...',
Willem=>'itsme@x... itsme@c... itsme@n... itsme@z... w.j.hengeveld@e... hengeveld_wj@p... wjhengeveld@h... willem@e...',
Lennard=>'weurt@g... debar@g... debar@d... weurt@d... vdhulst@i...',
Egroups=>'wolbonet@yahoogroups.com wolbonet@egroups.com',
Debbie=>'d.r.a.vanwaardenburg@s...',
Remi=>'dropremix@g...',
Jaap=>'ganswijk@x...',
JW=>'j.w.tendam@s... io169134@s...',
Merten=>'mert_en@h...',
Erik=>'erik@f... erikroozen@f... e.s.roozen@w...',
Jonathan=>'jddevries@w... j.d.dvries@b...',
BartJan=>'l.j.moree@t... s583605@d...',
GamblePit=>'rp-1ckbjr-18i-laewjl@r...  rp-1ckbjr-18j-laewjl@r...  rp-1ckbjr-18n-laewjl@r...  rp-1ckbjr-194-laewjl@r...  rp-1ckbjr-1c4-laewjl@r...  rp-1izdlr-1ro-laewjl@r...  rp-1izdlr-1vk-laewjl@r...  rp-1izdlr-1xm-laewjl@r...  rp-1izdlr-1zn-laewjl@r...',
Greg=>'gregdevries@g...',
Xoip=>'cs_question@x...',
);

my %aliasrmap;
for my $alias (keys %aliases) {
    for (split /\s/, $aliases{$alias}) {
        $aliasrmap{$_}= $alias;
    }
}

my %stats_per_month_per_from;
my %stats_total_per_month;
my %stats_total_per_from;
my $path= "/home/itsme/wolbonet";
my $i= 0;
opendir(DIR, $path) or warn "$!: reading $path\n";
while (my $file= readdir DIR)
{
    process("$path/$file") if (-f "$path/$file");
}
closedir DIR;

printf("found %d dates\n", scalar keys %stats_per_month_per_from);

# add words/mail stat
for my $t (keys %stats_per_month_per_from) {
    for my $f (keys %{$stats_per_month_per_from{$t}}) {
        $stats_per_month_per_from{$t}{$f}{wpm}= $stats_per_month_per_from{$t}{$f}{words}/$stats_per_month_per_from{$t}{$f}{mails}; 
    }
    $stats_total_per_month{$t}{wpm}= $stats_total_per_month{$t}{words}/ $stats_total_per_month{$t}{mails};
}
for my $f (keys %stats_total_per_from) {
    $stats_total_per_from{$f}{wpm}= $stats_total_per_from{$f}{words}/$stats_total_per_from{$f}{mails};
}

#output top 5 lists per month
for my $what (qw(mails lines words chars wpm)) {
    print "----------------------$what\n";
    for my $t (sort keys %stats_per_month_per_from) {
        my @top= sort { $stats_per_month_per_from{$t}{$b}{$what}<=>$stats_per_month_per_from{$t}{$a}{$what} } keys %{$stats_per_month_per_from{$t}};

        printf("%s : %7d %s\n",
            $t, 
            $stats_total_per_month{$t}{$what},
            join(", ", map { sprintf("%7d: %-15s", $stats_per_month_per_from{$t}{$_}{$what}, $_); } @top[0..4]));
    }
}

print "\n\n";
# output stats per user
for my $f (sort keys %stats_total_per_from) {
    printf("%s : %s\n", join(', ', map { sprintf("%7d", $stats_total_per_from{$f}{$_}); } (qw(mails lines words chars wpm))), $f);
}

sub process {
    my $file= shift;
    my $fh= IO::File->new($file, "r");
    if (!$fh) {
        warn "$file: $@\n";
        return;
    }
    my @lines= <$fh>;
    $fh->close();

    if (!@lines) {
        warn "$file: empty\n";
        return;
    }

    if (my ($from, $month, $year)= ($lines[0] =~ /^From (.+) \w+ (\w+) \d+ \S+ (\d+)\s*$/)) {
        if ($from =~ /<(\S+)>/) {
            $from= $1;
        }
        $from= lc($from);
        $from =~ s/\((?:marcel|oberdorfer_+tfw_+)\)//;
        $from =~ s/\s//g;

        if ($from eq "") {
            warn "$file: empty from $1\n";
        }
        if (exists $aliasrmap{$from}) {
            $from = $aliasrmap{$from};
        }

        my %contentstats= countcontent(\@lines);
        for (keys %contentstats) {
            $stats_per_month_per_from{"$year-$mnum{$month}"}{$from}{$_}+= $contentstats{$_};
            $stats_total_per_month{"$year-$mnum{$month}"}{$_}+= $contentstats{$_};
            $stats_total_per_from{$from}{$_}+= $contentstats{$_};
        }
    }
    else {
        warn "unknown: $file : $lines[0]\n";
    }

}
sub countcontent {
    my $lines= shift;

    my $i= 0;
    while ($i<=$#$lines && $lines->[$i] !~ /^\s*$/ ) { $i++; }

    $i++;

    my ($nlines, $nwords, $nchars)= (0,0,0);
    while($i<=$#$lines)
    {
        $nlines++;
        $nwords += scalar split(/\W+/, $lines->[$i]);
        $nchars += length($lines->[$i]);

        $i++;
    }
    return (mails=>1, lines=>$nlines, words=>$nwords, chars=>$nchars);
}

