#ifdef _WIN32
#define NOMINMAX
#endif
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>

// some functions handling be<->le conversion

// value : 0x12345678
// bigendian = network order = 68k/motorola/ppc order = hi-lo  : 12 34 56 78
// littleendian = intel order = lo-hi  : 78 56 34 12
inline bool i_am_bigendian()
{
    union {
        uint8_t b[2];
        uint16_t s;
    } test;
    test.s=1;
    return test.b[1];
}
inline uint32_t decode_be32(uint32_t netval)
{
    if (i_am_bigendian()) {
        return netval;
    }
    union {
        uint8_t b[4];
        uint32_t l;
    } cv;
    cv.l= netval;
    uint8_t swap;
    swap= cv.b[0]; cv.b[0]= cv.b[3]; cv.b[3]= swap;
    swap= cv.b[1]; cv.b[1]= cv.b[2]; cv.b[2]= swap;
    return cv.l;
}
inline uint16_t decode_be16(uint16_t netval)
{
    if (i_am_bigendian()) {
        return netval;
    }
    union {
        uint8_t b[4];
        uint16_t s;
    } cv;
    cv.s= netval;
    uint8_t swap;
    swap= cv.b[0]; cv.b[0]= cv.b[1]; cv.b[1]= swap;
    return cv.s;
}

#ifdef _WIN32
#define snprintf _snprintf
#endif

// building with msvc requires a copy of stdint.h to put in the include path
//     ( i put it in 'c99' )
//   cl rdlog.cpp -I c99 /EHsc ws2_32.lib
//
// with cygwin / gcc
//   g++ -O3 rdlog.cpp
//


//   compiler defs for optional functionality
// I_XREF		- print duplicates
// GLOBALINFO		- keep one global info list, instead of creating a new one per datafile
// _KEEP_BROKEN		- keep broken record in 27-13, or delete broken record

#include <vector>
#include <map>
#include <string>

// TODO: print station statistics

//
// --- timestamp  : every file lasts exactly 3600 seconds, except the first. which is 40 secs.
//          almost no gaps, except these:
// log-2006-12-27-12
//    !ts 4592632b -> 459263ed ( +194 )
//    !ts 45926433 -> 45926488 (  +85 )
// log-2006-12-28-15
//    !ts 4593df71 -> 4593dfef ( +126 )
// log-2006-12-28-17
//    !ts 4593fca0 -> 4593fe70 ( +464 )
// log-2006-12-28-21
//    !ts 45943875 -> 459439cc ( +343 )
// log-2006-12-29-00
//    !ts 45945fd6 -> 45946047 ( +113 )
//
// --- timestamp
//  00:=45
//  01: 92 .. 96
//  02: 00 .. ff
//  03: 00 .. ff
// --- reader_ip  ( one of 24 ip addresses )
//  04: 02 .. fe
//  05: 00 .. 08
//  06:=fe
//  07:=0a
//  -- unknown constant
//  08:=02
//  09:=00
//  0a:=79
//  0b:=41
//  --- reversed ip of reader
//  0c:=0a
//  0d:=fe
//  0e: 00 .. 08
//  0f: 02 .. fe
//
//  10:=10         - size of frame
//  11:=17         - protocol
//  12: 00,02      - flags  ( 0 : 10930798, 2 : 213435)
//  13: 00,55,aa,ff- strength ( 182874, 568413, 1167287, 9225659 )
//  --- sequence nr
//  14: 00 .. ff  ( some files 00 .. 02 )
//  15: 00 .. ff  ( some files smaller ranges )
//  16: 00 .. ?   ( upper limit file dependent )
//  17: 00 .. ff
//
// -> real data is:
//    * timestamp value from 0x45926017 .. 0x45967eef
//    * readerid ( 0..23 )
//    * flag     ( 0..1 )
//    * strength ( 0..3 )
//    * sequencenr ( 32bit )
//
struct binframe {
	uint32_t timestamp;
	uint32_t src_ip;

	uint32_t unknown;

	uint32_t rev_ip;

	uint8_t size;
	uint8_t proto;
	uint8_t flags;
	uint8_t strength;

	uint16_t seqhi;
	uint16_t seqlo;
};

struct eventinfo {
	uint32_t timestamp;
	uint16_t seqhi;
	uint16_t seqlo;
	unsigned char reader;
	unsigned char strength;
	unsigned char flag;
	unsigned char __padding;
};

// comparison operator used by info_map
bool operator<(const eventinfo& a, const eventinfo& b)
{
	if (a.timestamp<b.timestamp) return true;
	if (a.timestamp>b.timestamp) return false;
	if (a.seqhi<b.seqhi) return true;
	if (a.seqhi>b.seqhi) return false;
	if (a.seqlo<b.seqlo) return true;
	if (a.seqlo>b.seqlo) return false;
	if (a.reader<b.reader) return true;
	if (a.reader>b.reader) return false;
	if (a.strength<b.strength) return true;
	if (a.strength>b.strength) return false;
	if (a.flag<b.flag) return true;
	if (a.flag>b.flag) return false;
	return false;
}
bool operator==(const eventinfo& a, const eventinfo& b)
{
	if (a.timestamp!=b.timestamp) return false;
	if (a.seqhi!=b.seqhi) return false;
	if (a.seqlo!=b.seqlo) return false;
	if (a.reader!=b.reader) return false;
	if (a.strength!=b.strength) return false;
	if (a.flag!=b.flag) return false;
	return true;
}
typedef std::vector<eventinfo> info_list;

#ifdef GLOBALINFO
info_list info;
#endif

bool g_verbose=false;

bool g_fulltext=false;
std::string fulldir;

bool g_fieldstats=false;
bool g_bytestats=false;

bool g_split_ranges=false;	// 
double g_slope= 0;		// 0.396 when splitting ranges
double g_width= 0;		// 1 when splitting ranges, 256 when broadly analyzing data
std::string rangedir;

typedef std::map<uint32_t,int> uint_map;
typedef std::map<int32_t,int> int_map;

uint_map stations;

// used by g_fieldstats
uint_map srcxref;
uint_map flagxref;
uint_map strengthxref;


typedef std::map<eventinfo,int> info_map;

#ifdef I_XREF
info_map ixref;
#endif

// helper template for analyzing min/max/average of a value
template<typename T>
class mima {
	static int g_id;
	const std::string _name;
	T _min;
	T _max;
	T _sum;
	T _sum2;	// sum of squares
	int _count;
	int _id;
public:
	mima(const std::string& name="") : _name(name), _count(0) { if (name.empty()) _id=g_id++; }
	~mima() {
		char buf[32];
		if (_name.empty())
			snprintf(buf, sizeof(buf), "  %3ld", _id%sizeof(struct binframe));
		if (_count) {
			if (sizeof(T)==sizeof(int64_t)) 
				printf("%-10s : %10lld .. %10lld  avg=%10lld  err=%10g\n", _name.empty()?buf:_name.c_str(), _min, _max, avg(), err());
			else
				printf("%-10s : %10d .. %10d  avg=%10d  err=%10g\n", _name.empty()?buf:_name.c_str(), _min, _max, avg(), err());
		}
//	else
//		printf("%-10s : ---\n", _name.empty()?buf:_name.c_str());
		if (_id==0 && _name.empty() && g_bytestats)
			printf("---------\n");
	}
	void add(T x)
	{
		if (_count++) {
			if (_min>x) _min=x;
			if (_max<x) _max=x;
			_sum += x;
			_sum2 += x*x;
		}
		else {
			_min= x;
			_max= x;
			_sum= x;
			_sum2= x*x;
		}
	}
	T min() { return _min; }
	T max() { return _max; }
	T count() { return _count; }
	T avg() { return _sum/_count; }

	// 'err' calculates the standard deviation
	// sqrt(sum[(x-avg)^2]/n) = sqrt(sum2/n-avg*avg) = sqrt(sum2*n-sum*sum)/n
	double err() { return sqrt((double)_sum2*_count-_sum*_sum)/_count; }
};
// global static id in mima template.
template<typename T> int mima<T>::g_id;

// used by g_fieldstats, keeps mima for all fields in the binary frames
template<typename T>
struct stats {
	stats() :
timestamp("timestamp"),
src_ip   ("src_ip"),
unknown  ("unknown"),
rev_ip   ("rev_ip"),
size     ("size"),
proto    ("proto"),
flags    ("flags"),
strength ("strength"),
seqhi    ("seqhi"),
seqlo    ("seqlo")
{ }

	~stats() { }

	mima<T> timestamp;
	mima<T> src_ip   ;
	mima<T> unknown  ;
	mima<T> rev_ip   ;
	mima<T> size     ;
	mima<T> proto    ;
	mima<T> flags    ;
	mima<T> strength ;
	mima<T> seqhi    ;
	mima<T> seqlo    ;
};

////////////////////////////////////////////////////////////////////////////
int get_station_id(uint32_t ip)
{
	std::pair<uint_map::iterator,bool> i= stations.insert(uint_map::value_type(ip,stations.size()));
	return (*i.first).second;
}

bool read_station_list(const std::string& fn)
{
	// TODO - read file, and populate 'stations' map
	FILE *f= fopen(fn.c_str(), "r");
	if (f==NULL) {
		perror(fn.c_str());
		return false;
	}
	char line[256];
	while (fgets(line, sizeof(line), f))
	{
		get_station_id(strtoul(line, 0, 16));
	}
	fclose(f);
	if (g_verbose)
		printf("loaded %ld stations from %s\n", stations.size(), fn.c_str());
	return true;
}
int lineid(int seqlo, int ts)
{
	if (g_slope)
		return floor(((double)seqlo-g_slope*(ts%3600))/g_width);
	else if (g_width!=1)
		return floor(seqlo/g_width);
	else
		return seqlo;
}

bool save_seq_range(const info_list& list, int low_id, int high_id, const std::string& fn)
{
	printf("saving %d - %d to %s\n", low_id, high_id, fn.c_str());
	FILE *f= fopen(fn.c_str(), "w+");
	if (f==NULL) {
		perror(fn.c_str());
		return false;
	}
	for (info_list::const_iterator i=list.begin() ; i!=list.end() ; ++i)
	{
		int lid= lineid((*i).seqlo, (*i).timestamp);
		if (lid>=low_id && lid<high_id)
			fprintf(f, "%10d %6d %6d %2d %d %d\n", (*i).timestamp, (*i).seqhi, (*i).seqlo, (*i).reader, (*i).flag, (*i).strength);
	}
	fclose(f);
	return true;
}

// this converts and analyses an array of binary frames
bool processdata(const std::string& fn, const struct binframe*framelist, size_t nframes)
{
	// used by g_fieldstats
	stats<int64_t> fieldstats;

	// used by g_bytestats
	mima<int32_t> bytestats[sizeof(struct binframe)];

	int_map idhistogram;		// used for g_split_ranges

#ifndef GLOBALINFO
	info_list info;
#endif
	size_t slashidx= fn.find_last_of("/\\");
	if (slashidx!=fn.npos)
		slashidx++;
	std::string basename= fn.substr(slashidx);

	FILE *of= NULL;
	if (g_fulltext) {
		if (fulldir.empty()) {
			of= stdout;
		}
		else {
			std::string ofn= fulldir+"/"+basename;
			of= fopen(ofn.c_str(), "w");
			if (of==NULL) {
				perror(ofn.c_str());
				return false;
			}
		}
	}

  	info.reserve(info.size()+nframes);
	for (size_t i=0 ; i<nframes ; i++)
	{
		info.resize(info.size()+1);
		eventinfo *pinfo= &(info.back());

		pinfo->timestamp=decode_be32(framelist[i].timestamp);
		pinfo->seqhi    =decode_be16(framelist[i].seqhi    );
		pinfo->seqlo    =decode_be16(framelist[i].seqlo    );

		pinfo->reader   = get_station_id(framelist[i].src_ip);

		pinfo->flag     = framelist[i].flags >> 1;
		pinfo->strength = framelist[i].strength&3;
		pinfo->__padding= 0;

		if (g_fulltext) {
			fprintf(of, "%10d %6d %6d %2d %d %d\n", pinfo->timestamp, pinfo->seqhi, pinfo->seqlo, pinfo->reader, pinfo->flag, pinfo->strength);
		}

		if (g_split_ranges) {
			idhistogram[lineid(pinfo->seqlo, pinfo->timestamp)]++;
		}

/*
		if (!info.empty()) {
			if (info[info.size()-2]==info.back() || (info.size()>2 && info[info.size()-3]==info.back())) {
				info.resize(info.size()-1);
			}
			else {
#ifdef I_XREF
				ixref[*pinfo]++;
#endif
			}
		}
*/

		if (g_bytestats) {
			unsigned char *pb= (unsigned char *)&framelist[i];
			for (size_t ib=0 ; ib<sizeof(struct binframe) ; ib++)
				bytestats[ib].add(pb[ib]);
		}


		if (g_fieldstats) {
			fieldstats.timestamp.add(decode_be32(framelist[i].timestamp));

			if (g_verbose && i) {
				// dump timestamps that take a step larger than 2
				uint32_t t0= decode_be32(framelist[i-1].timestamp);
				uint32_t t1= decode_be32(framelist[i].timestamp);
				if (t0!=t1 && (t0+1!=t1) && (t0+2!=t1)) {
					printf("%08lx : !ts %08x -> %08x ( %+d )\n", i, t0, t1, t1-t0);
				}
			}
			fieldstats.src_ip   .add(decode_be32(framelist[i].src_ip   ));
			srcxref[framelist[i].src_ip]++;
			fieldstats.unknown  .add(     (framelist[i].unknown  ));
			fieldstats.rev_ip   .add(     (framelist[i].rev_ip   ));
			fieldstats.size     .add(     (framelist[i].size     ));
			fieldstats.proto    .add(     (framelist[i].proto    ));
			fieldstats.flags    .add(     (framelist[i].flags    ));
			flagxref[framelist[i].flags]++;
			fieldstats.strength .add(     (framelist[i].strength ));
			strengthxref[framelist[i].strength]++;
			fieldstats.seqhi    .add(decode_be16(framelist[i].seqhi    ));
			fieldstats.seqlo    .add(decode_be16(framelist[i].seqlo    ));

			// verify that the reverse ip is always the reverse of 'src_ip'
			if (decode_be32(framelist[i].src_ip)!=framelist[i].rev_ip) {
				printf("%s : %8ld : %08x != %08x\n", fn.c_str(), i, decode_be32(framelist[i].src_ip), framelist[i].rev_ip);
			}
		}
	}
	if (of && of!=stdout)
		fclose(of);
	if (g_verbose) printf("processed %s - %8ld records\n", fn.c_str(), nframes);

	if (g_split_ranges) {
		if (g_verbose) {
			for (int_map::iterator i=idhistogram.begin() ; i!=idhistogram.end() ; i++)
				printf(" %d:%d", (*i).first, (*i).second);
			printf("\n");
		}

		// loop over histogram from lowest to highest value
		int32_t i=(*idhistogram.begin()).first;
		int32_t lasti= (*idhistogram.rbegin()).first;
		while (i<=lasti)
		{
			while (i<=lasti && idhistogram[i]==0)
				i++;
			int firstnonzero=i;
			while (i<=lasti && idhistogram[i])
				i++;
			int lastnonzero=i;

			char buf[64];
			snprintf(buf, sizeof(buf), "%+d%+d", firstnonzero, lastnonzero);
			save_seq_range(info, firstnonzero, lastnonzero, rangedir+"/"+basename+buf);
		}
	}

	// NOTE: the stats above automatically dump their results to stdout when they go out of scope.
	return true;
}
size_t filesize(FILE *inf)
{
    if (fseek(inf, 0, SEEK_END)) {
        perror("seek-end");
        return 0;
    }
    size_t ofs= ftell(inf);
    if (ofs==(size_t)-1L) {
        perror("tell-end");
        return 0;
    }
    fseek(inf, 0, 0);
    return ofs;
}
bool processfile(const std::string& fn)
{
	// read entire file at once
	FILE *inf=fopen(fn.c_str(), "rb");
	if (inf==NULL) {
		perror(fn.c_str());
		return false;
	}

	size_t fsize= filesize(inf);
	if (fsize==0) {
        fclose(inf);
		perror(fn.c_str());
		fprintf(stderr, "getting size");
		return false;
	}
	size_t asize= fsize;
	bool fix_corrupted_record=false;
	if ((fsize%sizeof(struct binframe))==8) {
		fix_corrupted_record=true;
		asize += 16;
	}
	else if (fsize%sizeof(struct binframe)) {
        fclose(inf);
		fprintf(stderr, "fsize not multiple of %ld: %s\n", sizeof(struct binframe), fn.c_str());
		return false;
	}


	unsigned char *buf= new unsigned char [asize];
	memset(buf, 0, asize);
	int n= fread(buf, fsize, 1, inf);
	fclose(inf);
	if (n!=1) {
		fprintf(stderr, "error reading from file %s\n", fn.c_str());
		return false;
	}

	if (fix_corrupted_record) {
		if (g_verbose) printf("fixed %s\n", fn.c_str());
#ifdef _KEEP_BROKEN
		// only useful when intereted in this specific broken record.
		memmove(buf+0xc5010, buf+0xc5000, fsize-0xc5000);
		// copy data from earlier same ip
		memcpy(buf+0xc5000, buf+0xc5000-2*sizeof(struct binframe), 0x10);
#else
		memmove(buf+0xc5010-sizeof(struct binframe), buf+0xc5000, fsize-0xc5000);
		asize-=sizeof(struct binframe);
#endif
	}
	processdata(fn, (struct binframe*)buf, asize/sizeof(struct binframe));
	free(buf);

	return true;
}
void usage()
{
	fprintf(stderr, "Usage: rdlog [options] [file(s)]\n");
	fprintf(stderr, "\t-full [DIR] : dump binary logs as ascii to stdout, or to DIR/filename\n");
	fprintf(stderr, "\t-split DIR  : split file in chunks disjunct horizontal blocks\n");
	fprintf(stderr, "\t-slope VAL  : rotate graph by 'VAL', before splitting ( for instance 0.396 )\n");
	fprintf(stderr, "\t-width VAL  : horizontal splitting resolution\n");
	fprintf(stderr, "\t-bytes      : dump per byte statistics\n");
	fprintf(stderr, "\t-stats      : dump per field statistics\n");
	fprintf(stderr, "\t-stations FN: specify filename containing station ip addresses\n");
	fprintf(stderr, "\t-v          : print extra info, like notes about file corruption\n");
}
int main(int argc, char **argv)
{
	for (int i=1 ; i<argc ; i++) {
		if (strcmp(argv[i], "-full")==0) {
			g_fulltext= true;
			if (i+1<argc && !(argv[i+1][0]=='-' && argv[i+1][1])) {
				fulldir= argv[++i];
				if (fulldir=="-")
					fulldir.erase();
			}
			// else: output to stdout
		}
		else if (strcmp(argv[i], "-stats")==0) {
			g_fieldstats= true;
		}
		else if (strcmp(argv[i], "-bytes")==0) {
			g_bytestats= true;
		}
		else if (strcmp(argv[i], "-split")==0) {
			g_split_ranges= true;
			if (i+1<argc && argv[i+1][0]!='-') {
				rangedir= argv[++i];
			}
			else {
				usage();
				return 1;
			}
		}
		else if (strcmp(argv[i], "-slope")==0) {
			if (i+1<argc && argv[i+1][0]!='-') {
				g_slope= strtod(argv[++i],0);
			}
			else {
				usage();
				return 1;
			}
		}
		else if (strcmp(argv[i], "-width")==0) {
			if (i+1<argc && argv[i+1][0]!='-') {
				g_width= strtol(argv[++i], 0, 0);
			}
			else {
				usage();
				return 1;
			}
		}
		else if (strcmp(argv[i], "-stations")==0) {
			if (i+1<argc && argv[i+1][0]!='-') {
				if (!read_station_list(argv[++i]))
					return 1;
			}
			else {
				usage();
				return 1;
			}
		}
		else if (strncmp(argv[i], "-v", 2)==0) {
			g_verbose= true;
		}
		else if (argv[i][0]=='-') {
			usage();
			return 1;
		}
		else {
			// default for width, when not explicitly specified.
			if (g_width==0)
				g_width=g_slope ? 1 : 256;
			if (!processfile(argv[i]))
				return 1;
		}
	}
#ifdef GLOBALINFO
	if (g_verbose) printf("total: %d\n", info.size());
#endif
	if (g_fieldstats) {
		printf("src ip : \n");
		for (uint_map::iterator i=srcxref.begin() ; i!=srcxref.end() ; ++i)
			printf("%08x : %8d\n", (*i).first, (*i).second);
		printf("flags : \n");
		for (uint_map::iterator i=flagxref.begin() ; i!=flagxref.end() ; ++i)
			printf("%08x : %8d\n", (*i).first, (*i).second);
		printf("strength : \n");
		for (uint_map::iterator i=strengthxref.begin() ; i!=strengthxref.end() ; ++i)
			printf("%08x : %8d\n", (*i).first, (*i).second);
	}

#ifdef I_XREF
	printf("dups : \n");
	for (info_map::iterator i=ixref.begin() ; i!=ixref.end() ; ++i)
		if ((*i).second>1)
		printf("%08lx-%04x-%04x-%02x-%02x-%02x : %8d\n", 
				(*i).first.timestamp, 
				(*i).first.seqhi    , 
				(*i).first.seqlo    , 
				(*i).first.reader   , 
				(*i).first.strength , 
				(*i).first.flag     , 
				(*i).second);
#endif
	return 0;
}

