#!/usr/bin/perl
# $Id: ipacsum,v 1.10 1998/02/23 14:08:13 moritz Exp $
#
# Summarize all IP accounting files from start to end time
#
# Copyright (C) 1997, 1998 Moritz Both
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# The author can be reached via email: moritz@daneben.de, or by
# snail mail: Moritz Both, c/o Comlink, Im Moore 26, 30167 Hannover,
#             Germany. Phone: +49-511-1617811
#

use Getopt::Std;
use Sys::Hostname;

@moff = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 );
@mofg = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31);

# =()<$datdir="@<ACCTDIR>@";>()=
$datdir="/var/log/ip-acct";
# =()<$datdelim="@<DATDELIM>@";>()=
$datdelim="#-#-#-#-#";
# =()<$version="@<VERSION>@";>()=
$version="0.96";

$me=$0;
$now=time;
$replace=0;
$exact = 0;
$progression = 0;		# if 1, collect progressions (needs memory!)
$graph = 0;			# make ascii graphs
$rule_regex = ".*";		# match rules with this regex only

$graph_width = 55;
$graph_intervall = 60*60;	# seconds

# calculate time zone offset in seconds - use difference of output of date
# command and time function, round it
$tzoffset = 0;			# ! makeunixtime needs this!
$tzoffset = int( ($now-makeunixtime(`date +"%Y%m%d%H%M%S"`)) / 60) *60;
# get time zone name
$tzname = `date +%Z`; chop $tzname;
# get host name
$hostname = &hostname;

$starttime=0 + $tzoffset;
$endtime=$now;

&getopts("d:e:f:ghi:rs:t:x");

&usage if ($opt_h || $opt_h);

if ($opt_s) {
	$starttime=makeunixtime($opt_s);
	$starttime = $now - &parse_cmd_time($opt_s) if (!$starttime);
}
if ($opt_e) {
	$endtime=makeunixtime($opt_e);
	$endtime = $now - &parse_cmd_time($opt_e) if (!$endtime);
}
$exact=1 if ($opt_x || $opt_x);

$replace = 1 if ($opt_r || $opt_r);

$graph = 1 if ($opt_g || $opt_g);
$progression = 1 if ($graph);
$rule_regex = $opt_f if ($opt_f);

if ($opt_i) {
	die "$me: invalid option -i without -g\n" if (! $graph);
	$graph_intervall=parse_cmd_time($opt_i);
}

if ($opt_t) {
	&set_time_frame($opt_t);
}

$datdir = $opt_d if ($opt_d);

$endtime = $now if ($endtime > $now);
$mystarttime = makemytime($starttime);
$myendtime = makemytime($endtime);
%rule_firstfile =  %rule_lastfile = ( );

# Loop through dir, identify files
opendir(DIR, $datdir) || die "$me: can't open datdir $datdir\n";
while(defined($file = readdir DIR)) {
	next if ($file =~ /^\./);
	next if ($file lt $mystarttime || $file gt $myendtime);
	push(@files, $file);
}
closedir DIR;

$rulenumber=0;
@files = sort @files;

for ($ifile=0; $ifile<=$#files; $ifile++) {
	&collect_data_from_file($ifile);
}

@rules_sorted = sort { $rulenames{$a} <=> $rulenames{$b} } keys %rulenames;

&make_one_file_from_many() if $replace;

printf "IP accounting summary\nHost: $hostname / Time created: %s $tzname\n",
	nice_date(makemytime($now));
printf "Data from %s $tzname to %s $tzname\n", 
	nice_date($mystarttime), nice_date($myendtime);

$incomplete_data=0;
foreach (@rules_sorted) {
	if (/$rule_regex/) {
		&print_sum_line($_);
	}
}
if ($incomplete_data) {
	print "* = data incomplete, rule was not there all the time\n";
}

if ($graph) {
	&out_graph;
}

sub print_sum_line {
	my($f) = shift;
	my($s) = " ";

	if ($#files > $rule_count{$f}) {
		$incomplete_data++;
		$s="*";
	}

	printf("%s %s: %15s\n", 
		$s,
		$filter{$f}, 
		$exact ? $bytes{$f} : &nice_number($bytes{$f})
	);
}

# read one file - must be called in correct sorted file name order
sub collect_data_from_file {
	my($file, $ifile, $s, $indata, $i, @rulenames);
	my($pkts, $bytes);

	$ifile=shift;
	$file=$files[$ifile];
	$indata=0;

	open(FILE, "$datdir/$file") || die "$me: cant open file $file\n";
	$i=-1;
	while(<FILE>) {
		$indata=1 if (!$indata && /IP accounting rules/);
		if (/^\s*(\d+)\s+(\d+)/) {
			$indata=1;
			$i++;
			# found accouting data
			($pkts, $bytes) = ($1, $2);

			$s = $rulenames[$i];
			if (!defined($s)) {
				print "$me: more data than rules in $file - extra ignored\n";
				last;
			}
			&init_filter_id($s, $ifile);
			$bytes{$s} += $bytes;
			$pkts{$s} += $pkts;
			$rule_count{$s}++;
			$rule_lastfile = $file;
			if ($progression) {
				$$prog_bytes{$s}[$ifile] = $bytes;
				$$prog_pkts{$s}[$ifile] = $pkts;
				$prog_bytes_max{$s} = $bytes
					if ($prog_bytes_max{$s} < $bytes);
				$prog_pkts_max{$s} = $pkts
					if ($prog_pkts_max{$s} < $pkts);
			}
		}
		elsif (! $indata) {
			if (/^$datdelim$/) {
				$indata=1;
				next;
			}
			next if (/^#/);
			chop;
			push(@rulenames, $_);
			$rulenames{$_}=$rulenumber++ if (!defined($rulenames{$_}));
		}
	}
	close FILE;
}

sub init_filter_id {
	my($s, $ifile) = @_;

	if (!defined $bytes{$s}) {
		$bytes{$s}=0;
		$pkts{$s}=0;
		$filter{$s} = sprintf("%-48s", $s);
		$rule_firstfile{$s} = $files[$ifile];
		$rule_lastfile{$s} = "";
		$rule_count{$s} = 0;
		if ($progression) {
			$prog_pkts{$s} = [ ];
			$prog_pkts_max{$s} = 0;
			$prog_bytes{$s} = [ ];
			$prog_bytes_max{$s} = 0;
		}
	}
}

# given a string in format YYYYMMDD[hh[mm[ss]]], make unix time
# use time zone offset $tzoffset (input=wall clock time, output=UTC)
sub makeunixtime {
	my($y, $m, $d, $h, $i, $e);
	$s = shift;

	$h=0; $i=0; $e=0;
	if ($s =~ /^(\d\d\d\d)(\d\d)(\d\d)/) {
		($y, $m, $d) = ($1, $2, $3);
		if ($s =~ /^\d\d\d\d\d\d\d\d-?(\d\d)/) {
			$h=$1;
			if ($s =~ /^\d\d\d\d\d\d\d\d-?\d\d(\d\d)/) {
				$i=$1;
				if ($s =~ /^\d\d\d\d\d\d\d\d-?\d\d\d\d(\d\d)/) {
					$e=$1;
				}
			}
		}
	}
	else {
		return 0;
	}

	$y-=1970;
	$s = (($y)*365) + int(($y+2)/4) + $moff[$m-1] + $d-1;
	$s-- if (($y+2)%4 == 0 && $m < 3);
	$s*86400 + $h*3600 + $i*60 + $e + $tzoffset;
}

sub makemytime {
	my($s)=shift;

	my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
                                                 localtime($s);
	return sprintf("%04d%02d%02d-%02d%02d%02d", 1900+$year, $mon+1, $mday,
		$hour, $min, $sec);
}

# parse time as a duration
# syntax is
# cmd_time:		seconds | cmd_time_with_size
# cmd_time_with_size:	cmd_time_atom | cmd_time_with_size cmd_time_atom
# cmd_time_atom:	number size
# size:			"s"|"m"|"h"|"D"|"W"|"M"|"Y"
#			(sec, min, hours, Days, Weeks, Months, Years)
# seconds:		number
# return number of seconds
sub parse_cmd_time {
	my($sec) =0;
	$_=shift;

	return $_ if (/^\d+$/);

	while($_) {
		if (! /^(\d+)\s?([smhDWMY])(.*)$/) {
			die "$me: syntax error in time (duration)\n";
		}
		$_=$3;
		if ($2 eq "s") {
			$sec += $1;
		}
		elsif ($2 eq "m") {
			$sec += $1*60;
		}
		elsif ($2 eq "h") {
			$sec += $1*60*60;
		}
		elsif ($2 eq "D") {
			$sec += $1*60*60*24;
		}
		elsif ($2 eq "W") {
			$sec += $1*60*60*24*7;
		}
		elsif ($2 eq "M") {
			$sec += $1*60*60*24*30;
		}
		elsif ($2 eq "Y") {
			$sec += $1*60*60*24*365;
		}
	}
	$sec;
}

sub set_time_frame {
	my($opt_t) = shift;
	my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
                                                 localtime($now);

	
	if ($opt_t =~ /^this hour/i) {
		$opt_t = "the hour 0 hours ago";
	}
	elsif ($opt_t =~ /^last hour/i) {
		$opt_t = "the hour 1 hour ago";
	}
	elsif ($opt_t =~ /^today/i) {
		$opt_t = "the day 0 days ago";
	}
	elsif ($opt_t =~ /^yesterday/i) {
		$opt_t = "the day 1 day ago";
	}
	elsif ($opt_t =~ /^the day before yesterday/i) {
		$opt_t = "the day 2 days ago";
	}
	elsif ($opt_t =~ /^this week/i) {
		$opt_t = "the week 0 weeks ago";
	}
	elsif ($opt_t =~ /^last week/i) {
		$opt_t = "the week 1 week ago";
	}
	elsif ($opt_t =~ /^the week before last week/i) {
		$opt_t = "the week 2 weeks ago";
	}
	elsif ($opt_t =~ /^this month/i) {
		$opt_t = "the month 0 months ago";
	}
	elsif ($opt_t =~ /^last month/i) {
		$opt_t = "the month 1 month ago";
	}
	elsif ($opt_t =~ /^this year/i) {
		$opt_t = "the year 0 years ago";
	}
	elsif ($opt_t =~ /^last year/i) {
		$opt_t = "the year 1 year ago";
	}

	if ($opt_t =~ /^the hour (\d+) hours? ago/i) {
		$i=$1;
		my($thishour)=makeunixtime(sprintf("%04d%02d%02d%02d0000", 
			1900+$year, $mon+1, $mday, $hour));
		$starttime=$thishour - 60*60 * $i;
		$endtime = $thishour - 60*60 * ($i-1) -1;
	}
	elsif ($opt_t =~ /^the day (\d+) days? ago/i) {
		$i=$1;
		my($thismorning)=makeunixtime(sprintf("%04d%02d%02d000000", 
			1900+$year, $mon+1, $mday));
		$starttime=$thismorning - 60*60*24 * $i;
		$endtime=$thismorning - 60*60*24 * ($i-1) -1;
	}
	elsif ($opt_t =~ /^the week (\d+) weeks? ago/i) {
		$i=$1;
		my($monday)=makeunixtime(sprintf("%04d%02d%02d000000", 
			1900+$year, $mon+1, $mday-($wday >0 ? $wday-1 : 6)));
		$starttime=$monday - 60*60*24*7 * $i;
		$endtime=$monday- 60*60*24*7 * ($i-1) -1;
	}
	elsif ($opt_t =~ /^the month (\d+) months? ago/i) {
		$mon = $mon - $1;
		while ($mon < 0) {
			$year--;
			$mon += 12;
		}

		$starttime=makeunixtime(sprintf("%04d%02d01000000", 
			1900+$year, $mon+1));
		$endtime=$starttime + 60*60*24*$mofg[$mon] -1;
		$endtime += 60*60*24 if ((1900+$year)%4 ==0 && $mon==1);
	}
	elsif ($opt_t =~ /^the year (\d+) years? ago/i) {
		$i=$1;

		$starttime=makeunixtime(sprintf("%04d0101000000", 
			1900+$year-$i));
		$endtime=makeunixtime(sprintf("%04d0101000000", 
			1900+$year-$i+1))-1;
	}
	else {
		die "$me: Unknown time frame: \"$opt_t\"\n";
	}
}

sub nice_number {
	$n=shift;

	if ($n > 9999 * 1024) {
		$n = sprintf("%dM", $n/1048576);
	}
	elsif ($n > 9999) {
		$n = sprintf("%dK", $n/1024);
	}
	$n;
}

# format date in format YYYYMMDD-HHMMSS nicely.
sub nice_date {
	$s = shift;

	$s =~ s@^(\d\d\d\d)(\d\d)(\d\d)-(\d\d)(\d\d)(\d\d)@$1/$2/$3 $4:$5:$6@;
	$s;
}


# replace: summarize all selected files to one
sub make_one_file_from_many {
	return if (! @files);
	my($fname) = $files[$#files]; # it's sorted

	# we "print" the file in memory
	my(@text);
	push(@text, sprintf("# ipac summary file generated %s\n", makemytime(time)));
	push(@text, sprintf("# source files: %s to %s\n", $files[0], $fname));
	push(@text, "#\n");

	foreach (sort { $rulenames{$a} <=> $rulenames{$b} } keys %rulenames) {
		push(@text, "$_\n");
	}
	push(@text, "$datdelim\n");
	foreach (sort { $rulenames{$a} <=> $rulenames{$b} } keys %rulenames) {
		push(@text, sprintf("%10d %15d\n", $pkts{$_}, $bytes{$_}));
	}
	
	# remove all source files
	foreach(@files) {
		unlink "$datdir/$_";
	}

	# write updated file
	if (! open(FILE, ">$datdir/$fname")) {
		warn "$me: could not open $datdir/$fname, writing to STDOUT\n";
		open(FILE, ">-") || die "$me: cant open STDOUT!\n";
	}
	print FILE @text;
	close FILE;
}

sub out_graph {
	foreach $rule (@rules_sorted) {
		if ($rule =~ /$rule_regex/) {
			print "Graph for rule \"$rule\"\n";
			&single_graph($rule);
		}
	}
}

sub single_graph {
	my($rule) = shift;
	my($max, $i, $ifile, $s, $inter_st, $inter_end, $value, $iut, $oldiut);
	my(@values, @valtime, $bytes);

#	$max = $prog_bytes_max{$rule};
	$max=0;


	$inter_st = $starttime;
	$inter_end = $inter_st + $graph_intervall;
	$value=0;
	$iut=$starttime;
	for ($ifile=0; $ifile<=$#files; $ifile++) {
		DBXXX("new file: nr=$ifile, name=$files[$ifile]\n");
		if (defined($$prog_bytes{$rule}[$ifile])) {
			$bytes = $$prog_bytes{$rule}[$ifile];
		}
		else {
			$bytes = 0;
		}
		$oldiut = $iut;
		$iut = makeunixtime($files[$ifile]);
		while ($iut > $inter_end) {
			# number of bytes still in current time frame
			$i = int($bytes * ($inter_end-$oldiut)/($iut-$oldiut)+.5);
			DBXXX(sprintf("- add rest: \$bytes=$bytes, \$value=$value, \$inter_st=%s, \$inter_end=%s,\n", 
				makemytime($inter_st), makemytime($inter_end)));
			DBXXX(sprintf("\t\$iut=%s, \$oldiut=%s; so \$i=$i\n",
				makemytime($iut), makemytime($oldiut)));
			$value += $i;
			push(@values, $value);
			$max = $value if ($value > $max);
			$value=0;
			$oldiut=$inter_end;
			$inter_st = $inter_end;
			$inter_end += $graph_intervall;
			$bytes -= $i;
		}
		DBXXX("  add \$value += \$bytes: $value + $bytes ="); 
		$value += $bytes;
		DBXXX(" $value\n");
	}
	push(@values, $value);
	$max = $value if ($value > $max);
		
	printf "time          bytes 0%s%5s\n", string(" ", $graph_width-6), 
			nice_number($max);

	$inter_st=$starttime;
	for ($i=0; $i<=$#values; $i++) {
		$s= $max ? string("*", 
			int(($values[$i]/$max)*$graph_width+.5)
			) : "";
		printf "%s %s\n", nice_date(makemytime($inter_st)), $s;
		$inter_st += $graph_intervall;
	}
}

# repeat string n times
sub string {
	my($s, $i);

	$s="";
	for ($i=1; $i<$_[1]; $i++) {
		$s .= $_[0];
	}
	$s;
}

sub usage {
	print <<EOF;
ipacsum V$version (C) 1997, 1998 Moritz Both; see file COPYING for license
Usage: $me [Options]
Generates summary of ip accounting
Options:
 -s time 	Start time, default: The epoch
 -e time	End time, default: now
Times are either absolute in format YYYYMMDD[hh[mm[ss]]] !Note year is 4 digit!
 or relative in format n{s|m|h|D|W|M|Y}... (=sec, min, hours, Days, Weeks...)
 -f regex	filter output by rule names on regular expression
 -g		print progression graph for every rule
 -i intervall	specify prograssion graph (-g) intervall; default 1 hour;
		 format: any combination of (number size) pairs, where size is
		 one of smhDWMY (sec,min,hours,days,weeks,months,years)
 -t time_frame	Start and End time in one; time_frame is one of these:
		 today, yesterday, "the day before yesterday",
		 "the day n days ago", "this week", "last week", 
		 "the week n weeks ago" and so on with (months, years).
 -h 		Print this help
 -r 		replace all summarized accounting files by one
  		 file name will be according to end time (= highest file name)
 -x		eXact values (dont use K or MByte values)
 -d dir		specify directory conaining the accounting data
EOF
	exit 1;
}

sub DBXXX {
#	print @_;

}

# EOF
