#!/usr/bin/perl -w
# extracting data from an easy-archive database.
use strict;

use Dumpvalue;
my $d= new Dumpvalue(veryCompact=>1);
$|=1;

##############################################################
package File;

use strict;
use IO::File;

sub new {
    my $class= shift;
    my $filename= shift;
    return bless { filename=>$filename }, $class;
}

sub open {
    my $self= shift;
    my $filename= $self->{filename};
    my $fh= IO::File->new();
    $fh->open($filename) or die "File::open $filename: $!\n";
    binmode($fh);

    $self->{fh}= $fh;
}

sub eof {
    my $self= shift;
    my $fh= $self->{fh};

    return $fh->eof;
}
sub close {
    my $self= shift;
    my $fh= $self->{fh};

    $fh->close();
    delete $self->{fh};
}

##############################################################
package EISFile;
our @ISA= qw(File);

sub ReadRecord {
    my $self= shift;
    my $fh= $self->{fh};

    my $data;
    $fh->read($data, 1024) or die "EISFile::ReadRecord read(1024): $!\n";

    my %record;

    (
        $record{int1},   # v
        $record{int2},   # v
        $record{int3},   # V       reclength
        $record{str1},   # A14
        $record{int4},   # V       timestamp
        $record{int5},   # V       timestamp
        $record{str2},   # A256
        $record{int6},   # v
        $record{int7},   # v
        $record{str3},   # A256
        $record{int8},   # V       timestamp
        $record{int9},   # V
        $record{int10},  # V
        $record{int11},  # V
        $record{str4},   # A13
        $record{str5},   # A449
    )= unpack("v v V A14 V V A256 v v A256 V V V V A13 A449", $data);

    return \%record;
}

sub DumpRecord {
    my $self= shift;
    my $rec= shift;

    my @e;
    if (@e) { print "ERRORS: @e\n"; }
}

##############################################################
package ENFFile;
our @ISA= qw(File);

sub ReadRecord {
    my $self= shift;
    my $fh= $self->{fh};

    my $data;
    $fh->read($data, 896) or die "ENFFile::ReadRecord read(896): $!\n";

    my %record;

    my $longdata;
    (
        $record{str1},   # A12
        $record{str2},   # A5
        $record{int3},   # V      # always 0
        $longdata,       # a866
        $record{word4},  # v
        $record{rest2},  # a*     # always 7 NUL bytes
    )= unpack("A12 A5 V a866 v a*", $data);

    $record{rest0}= substr($longdata, 0, $record{word4});
    $record{rest1}= substr($longdata, $record{word4});     # always 0

    if ($record{str2} =~ /CIN\d/) {
        require Crypt::DES;

# this des-key was recovered from one of the dll's - I don't remember which.
        my $crypt= new Crypt::DES(pack("H*", "07a7137045da2a16"));

        my $completeblocks= int($record{word4}/8);
        my $partialblock= $record{word4}%8;

        $record{decrypted}= join "", map {$crypt->decrypt(substr($longdata, $_*8, 8));} (0..$completeblocks-1);
        $record{decrypted} .= xordecrypt(substr($longdata, $completeblocks*8, $partialblock));
    }
    return \%record;
}

sub xordecrypt {
    my $ciphertext= shift;
    my $plaintext= "";
    for (my $i=0 ; $i<length($ciphertext) ; $i++) {
        my $c= ord(substr($ciphertext, $i, 1)) ^ 0xaa;
        $plaintext .= chr(($c>>4) | ($c<<4)&0xff);
    }
    return $plaintext;
}

sub DumpRecord {
    my $self= shift;
    my $rec= shift;

    my @e;
    if ($rec->{int3} != 0 ) { push(@e, sprintf("int3=%08x", $rec->{int3})); }
    if ($rec->{rest1} !~ /^\0*$/) { push(@e, sprintf("rest1=%s",unpack("H*", $rec->{rest1}))); }
    if ($rec->{rest2} !~ /^\0*$/) { push(@e, sprintf("rest2=%s",unpack("H*", $rec->{rest2}))); }
    if (defined $rec->{decrypted}) {
        printf("%-12s %-5s %04lx\n", $rec->{str1}, $rec->{str2}, $rec->{word4});
        print "$rec->{decrypted}\n";
    }
    else {
        printf("%-12s %-5s %04lx %s\n", $rec->{str1}, $rec->{str2}, $rec->{word4}, unpack("H*", $rec->{rest0}));
    }
    if (@e) { print "ERRORS: @e\n"; }
}

##############################################################
package EDBFile;
our @ISA= qw(File);

sub ReadRecord {
    my $self= shift;
    my $fh= $self->{fh};

    my $data;
    $fh->read($data, 256) or die "EDBFile::ReadRecord read(256): $!\n";

    my %record;

    (
        $record{str1},    # A9   
        $record{str2},    # A4
        $record{str3},    # A81
        $record{str4},    # A12
        $record{int5},    # V
        $record{int6},   # V
        $record{int7},   # V
        $record{word8},  # v
        $record{word9},  # v
        $record{int10},   # V
        $record{int11},   # V
        $record{int11a},   # v
        $record{int11b},   # v
        $record{rest12},  # a8
        $record{str13},   # A80  - 'TITEL' most of the time
        $record{rest14},  # a*  - 46 bytes ?
    )= unpack("A9 A4 A81 A12 V V V v v V V v v a8 A80 a*", $data);

    return \%record;
}

sub DumpRecord {
    my $self= shift;
    my $rec= shift;

    my @e;
    if (unpack("H*", $rec->{rest12}) ne "0000ffffffffffff") { push(@e, sprintf("rest12=%s", unpack("H*", $rec->{rest12}))); }

    if ($rec->{str13} ne "TITEL") { push(@e, sprintf("str13=%s", $rec->{str13})); }
    if ($rec->{rest14} !~ /^\0*$/) { push(@e, sprintf("rest14=%s", unpack("H*", $rec->{rest14}))); }

    printf("%-8s %-3s %-11s %08x %08x %08x %04x %04x %08x %08x %d %d %s\n",
            $rec->{str1},  $rec->{str2},  $rec->{str4},
            $rec->{int5},  $rec->{int6},  $rec->{int7},
            $rec->{word8}, $rec->{word9}, $rec->{int10}, $rec->{int11}, $rec->{int11a}, $rec->{int11b},  $rec->{str3});
    if (@e) { print "ERRORS: @e\n"; }
}

##############################################################
package EFDFile;
our @ISA= qw(File);

sub ReadRecord {
    my $self= shift;
    my $fh= $self->{fh};

    my $data;
    $fh->read($data, 65) or die "EFDFile::ReadRecord read(65): $!\n";

    my %record;

    (
        $record{fieldid},    # v
        $record{fieldname},  # A20
        $record{word1},      # v
        $record{fieldtype},  # V
        $record{word2},      # v
        $record{rest},       # a*
    )= unpack("v A20 v V v a*", $data);

    return \%record;
}

sub DumpRecord {
    my $self= shift;
    my $rec= shift;

    my @e;
    if ($rec->{rest} !~ /^\0*$/) { push(@e, sprintf("rest=%s", unpack("H*", $rec->{rest}))); }
    printf("%04x %4d %1x %04x %04x: %s\n", $rec->{fieldid}, $rec->{fieldid}, 
        $rec->{fieldtype}, $rec->{word1}, $rec->{word2},
        $rec->{fieldname});

    if (@e) { print "ERRORS: @e\n"; }
}

##############################################################
package ECFFile;
our @ISA= qw(File);

use strict;

use POSIX;
use Time::local;

sub ReadRecordHeader {
    my $self= shift;
    my $fh= $self->{fh};

    my $data;
    $fh->read($data, 192) or die "ECFFile::ReadRecordHeader read(192): $!\n";

    # lowercase 'v' = 16 bit int
    # uppercase 'V' = 32 bit int
    my %rec;
    (   $rec{docnr},           # A9                  #101
        $rec{docver},          # A5                  #102
        $rec{recsize},         # V
        $rec{nul1},            # v
        $rec{archivedate},     # V                   #103
        $rec{modifydate},      # V                   #104
        $rec{word1},           # v  - always 4
        $rec{word2},           # v  - always 3
        $rec{rest1},           # a8 - always 0000ffffffffffff
        $rec{titel},           # A80                 #110
        $rec{nrfields},        # v                   #105
        $rec{nrfixedfields},   # v                   #106
        $rec{nrvariablefields},# v                   #107
        $rec{long1},           # V  - always 00000000
        $rec{word3},           # v
        $rec{rest2},           # a* - always 60 x 0x00
    ) = unpack("A9 A5 V v V V v v a8 A80 v v v V v a*", $data);

    if ($rec{recsize}==0) {
        %rec=();

        ( $rec{skipsize}, $rec{skipped} )= unpack("V a*", $data);
        if ($rec{skipsize} > 188) {
            my $skipdata;
            $fh->read($skipdata, $rec{skipsize}-192+4) or die "ECFFile::ReadRecordHeader skip: read($rec{skipsize}-192+4): $!\n";
            $rec{skipped} .= $skipdata;
        }
        elsif ($rec{skipsize} < 188) {
            die "ECFFile::ReadRecordHeader skip: too small : $rec{skipsize} at fileoffset ", $fh->tell();
        }
    }

#print "\n\n$rec{docnr}.$rec{docver} : ";
#print POSIX::strftime("%c", localtime $rec{archivedate}), "  ";
#print POSIX::strftime("%c", localtime $rec{modifydate}), "\n";
    return \%rec;
}

# 
sub ReadRecord {
    my $self= shift;
    my $fh= $self->{fh};

    my $startofs= $fh->tell();

    my $header= $self->ReadRecordHeader();
    $header->{startofs}= $startofs;

    if ($header->{skipsize}) {
        return $header;
    }

    my @fieldsizes;
    my @fields;
    while ($fh->tell()<$startofs+$header->{recsize}) {
        my $field= $self->ReadField();
        push @fields, $field;
        push @fieldsizes, $field->{datasize};
    }
    $header->{realfieldcount}= scalar @fields;
    $header->{fieldsizes}= \@fieldsizes;

    return {header=>$header, fields=>\@fields};
}

sub ReadField {
    my $self= shift;
    my $fh= $self->{fh};

    my $startofs= $fh->tell();

    my $fieldsize= $self->ReadInt32();

    my $data;
    $fh->read($data, $fieldsize-4) or die "ECFFile::ReadField read($fieldsize-4): $!\n";

    my $fielddata;

    my %fieldinfo;
    $fieldinfo{datasize}= $fieldsize;
    $fieldinfo{startofs}= $startofs;

    (   $fieldinfo{datalen},    # V
        $fieldinfo{fieldid},    # v
        $fieldinfo{fieldtype1a},# C
        $fieldinfo{fieldtype1b},# C
        $fieldinfo{fieldtype2}, # v
        $fielddata              # a*
    ) = unpack("V v C C v a*", $data);
    my $descriptionlen= $fieldsize - $fieldinfo{datalen}-0x1b;

    if ($fieldinfo{fieldtype1b} == 0) {       # field offset info
        my @fieldoffsets= unpack("V*", $fielddata);
        $fieldinfo{offsets}= \@fieldoffsets;

        #print "\n", sprintf("%08x : ", $startofs), join " ", map {sprintf("%08x", $_);} @fieldoffsets;
#    } elsif ($fieldinfo{fieldtype2} == 1) {  # tiff image
#        $fieldinfo{description}= substr($descript_data, 0, $descriptionlen);
    } else {  # type 1 and 2

        my $descript_data;
        (
            $fieldinfo{sourcefile}, # A9
            $fieldinfo{sourceext},  # A4
            $descript_data          # a*
        )= unpack("A9 A4 a*", $fielddata);

        ($fieldinfo{description}= substr($descript_data, 0, $descriptionlen)) =~ s/\0*$//;

        $fieldinfo{data}= substr($descript_data, $descriptionlen);
        if (length($fieldinfo{data}) != $fieldinfo{datalen}) {
            $d->dumpValue(\%fieldinfo);
            die "error : fieldsize mismatch dlen=$descriptionlen flen=".length($fieldinfo{data})."\n";
        }
    }

    return \%fieldinfo;
}
sub ReadInt32 {
    my $self= shift;
    my $fh= $self->{fh};

    my $data;
    $fh->read($data, 4) or die "ECFFile::ReadInt32 read(4): $!\n";
    return unpack("V", $data);
}

sub DumpRecord {
    my $self= shift;
    my $rec= shift;

    my @e;

    if ($rec->{skipsize}) {
        if ($rec->{skipped} !~ /^\0*$/) { push(@e, "skipped data not all 0"); }
        printf("%08x: skipping %08x bytes\n", $rec->{startofs}, $rec->{skipsize});
    }
    else {
        my $hdr= $rec->{header};

        # report anything out of the ordinary:
        # nul1 is sometimes 4
        if ($hdr->{nul1}!=0) { push(@e, sprintf("nul1=%04x", $hdr->{nul1})); }

        # word1 and word2 are occasionally both 1, or both 0.
        if ($hdr->{word1}!=4 && $hdr->{word1}!=1) { push(@e, sprintf("word1=%04x", $hdr->{word1})); }
        if ($hdr->{word2}!=3 && $hdr->{word2}!=1) { push(@e, sprintf("word2=%04x", $hdr->{word2})); }

        # rest1 sometimes is all 0.
        if ($hdr->{rest1} ne pack("H*", "0000ffffffffffff")
                && $hdr->{rest1} !~ /^\0*$/) { push(@e, sprintf("rest1=%s", unpack("H*", $hdr->{rest1}))); }
        if ($hdr->{long1}!=0) { push(@e, sprintf("long1=%04x", $hdr->{long1})); }
        if ($hdr->{rest2} !~ /^\0*$/) { push(@e, sprintf("rest2=%s", unpack("H*", $hdr->{rest2}))); }

        printf("%08x: %1s %1s %1x %1x %04x\n", $hdr->{startofs}, $hdr->{docnr}, $hdr->{docver}, $hdr->{word1}, $hdr->{word2}, $hdr->{word3});
    }

    if (@e) { print "ERRORS: @e\n"; }

    for my $field (@{$rec->{fields}}) {
        DumpField($field);
    }
}

sub DumpField {
    my $field= shift;

    printf("   %4d %1d %1d %1d %1s.%1s : %1s\n", $field->{fieldid},
            $field->{fieldtype1a}, $field->{fieldtype1b}, $field->{fieldtype2},
            $field->{sourcefile} || "-", $field->{sourceext} || "-",
            $field->{description} || "-");
}
##############################################################
##############################################################
package main;

use strict;
use IO::File;
use Getopt::Long;

my $usage= "
Usage:
$0 { ecffile | enffile | edbfile | efdfile }
    will dump the contents of the file as ascii

$0 ecffile outputdir
    will convert the ecf file to a index.out file + image files.

$0 btrfile binfile
    will convert the btrieve dump file to a plain binary file.
";

# ... no options yet.
my $filetype= undef;
my $startoffset= 0;
my $length= undef;
GetOptions(
        "type=s" => \$filetype,
        "length=i" => \$length,
        "startoffset=i" => \$startoffset) 
    or die "invalid option\n\n$usage";

my $dbfile= shift || die "need dbfile\n\n$usage";

print "start at ", POSIX::strftime("%c", localtime time()), "\n";

if (!defined $filetype) {
    ($filetype)= (lc($dbfile) =~ /\.(\w+)$/);
}

my $g_corrected= 0;

if (@ARGV) {
    if ($filetype eq "ecf") {
        my $imageroot= shift;

        my $indexfile= "$imageroot/index.out";
        ConvertECF($dbfile, $imageroot, $indexfile);
    }
    elsif ($filetype eq "btr") {
        my $outputfile= shift;
        ConvertBTR($dbfile, $outputfile);
    }
    elsif ($filetype eq "enf") {
        my $outputfile= shift;
        ConvertENF($dbfile, $outputfile);
    }
    else {
        die "cannot convert $dbfile\n";
    }
}
else {
    DumpFile($dbfile, $filetype, $startoffset, $length);
}

print "finish at ", POSIX::strftime("%c", localtime time()), "\n";

exit(0);

sub DumpFile {
    my $dbfile= shift;
    my $filetype= shift;
    my $startoffset= shift;
    my $length= shift;

    my $class= uc($filetype)."File";

    my $file= $class->new($dbfile);
    $file->open();
    if (defined $startoffset) {
        $file->{fh}->seek($startoffset, 0);
    }

    while (!$file->eof() && (!defined $length || $file->{fh}->tell()-($startoffset||0)<$length))
    {
        my $rec= $file->ReadRecord();

        $file->DumpRecord($rec);
    }
    $file->close();
}

sub ConvertECF {
    my $dbfile= shift;
    my $imageroot= shift;
    my $indexfile= shift;

    $g_corrected= 0;

    my $ecf= ECFFile->new($dbfile);
    $ecf->open();

    my $indexfh= IO::File->new();
    $indexfh->open($indexfile, ">") or die "ConvertECF open >$indexfile: $!\n";

    while (!$ecf->eof())
    {
        my $rec= $ecf->ReadRecord();

        if ($rec->{header} && $rec->{header}{titel} && $rec->{fields}) {
            WriteRecord($rec, $indexfh, $imageroot);
        }
        # else empty record
    }

    $indexfh->close();
    $ecf->close();

    if ($g_corrected) {
        print "corrected $g_corrected records\n";
    }
}


sub WriteRecord {
    my $rec= shift;
    my $indexfh= shift;
    my $imageroot= shift;

    my %values;
    GetHeaderFields(\%values, $rec->{header});

    my $corrected= 0;       # detects corrupt data field.

    my $datafieldid= 1001;
    my $imgfieldid= 1;
    for my $field (@{$rec->{fields}}) {
        if ($field->{fieldtype1b}==1) {     # data record
            if ($datafieldid != $field->{fieldid}) {
                $corrected= 1;
            }
            ($values{$datafieldid}= $field->{data}) =~ s/\0*$//;

            $datafieldid++;
        }
        elsif ($field->{fieldtype1b}==2) {
            if ($imgfieldid+2000 != $field->{fieldid}) {
                $corrected= 1;
            }
            my $imgfilename= MakeFileName($rec->{header}{docnr}, $rec->{header}{docver},
                        $imgfieldid, $field->{sourceext}, $imageroot);

            $values{$imgfieldid+2000}= $imgfilename;
            saveimage($field->{data}, "$imageroot/$imgfilename");

            $imgfieldid++;
        }
    }
    $indexfh->print( "\@IMPHDR,^$rec->{header}{titel}^,", join(",", sort {$a<=>$b} keys %values), "\n");
    $indexfh->print( join(",", map { "^$values{$_}^" } sort {$a<=>$b} keys %values), "\n");

    if ($corrected) {
        $g_corrected++;
    }
}

sub MakeFileName {
    my ($docnr, $docver, $imgid, $ext, $root) = @_;

    my $filename= sprintf("%s-%s-%03d.%s", $docnr, $docver, $imgid, $ext);
    if (-e "$root/$filename") {
        for my $letter ('a' .. 'z') {
            $filename= sprintf("%s-%s-%s%03d.%s", $docnr, $docver, $letter, $imgid, $ext);
            if (! -e "$root/$filename") {
                return $filename;
            }
        }
        print "WARNING could not generate a unique name for $filename\n";
    }
    return $filename;
}

sub GetHeaderFields {
    my ($values, $header)= @_;

    $values->{101}= $header->{docnr};
    $values->{102}= $header->{docver};
    $values->{103}= datestr($header->{archivedate});
    $values->{104}= datestr($header->{modifydate});
}
sub datestr {
    return POSIX::strftime("%Y-%m-%d %H:%M:%S", localtime shift);
}
sub saveimage {
    my $data= shift;
    my $filename= shift;

    if (-e $filename) { warn "overwriting $filename\n"; }

    my $fh= IO::File->new();
    $fh->open($filename, ">") or die "saveimage open >$filename: $!\n";
    binmode($fh);
    $fh->print($data);
    $fh->close();
}

sub ConvertBTR {
    my ($dbfile, $outputfile)= @_;
    my $in= IO::File->new();
    $in->open($dbfile, "<") or die "ConvertBTR open <$dbfile: $!\n";
    binmode($in);
    my $out= IO::File->new();
    $out->open($outputfile, ">") or die "ConvertBTR open >$outputfile: $!\n";
    binmode($out);

    my $fixedlen= undef;

    while (! $in->eof) {
        my $len= ReadRecordLength($in);
        if ($len<0) { last; }
        if (!defined $fixedlen) {
            $fixedlen= $len;
        }
        elsif ($fixedlen != $len) {
            print "Warning found non fixed length record\n";
        }
        my $data;
        $in->read($data, $len);
        my $eof;
        $in->read($eof, 2);  # read CRLF
        if ($eof ne "\r\n") {
            print "warning: expected CRLF\n";
        }
        $out->print($data);
    }
    $out->close();
    $in->close();
}

sub ReadRecordLength {
    my $fh= shift;
    my $number= "";
    while (1) {
        my $c;
        $fh->read($c, 1) or return -1;
        if ($c eq ",") {
            last;
        }
        $number .= $c;
    }
    return $number;
}

sub ConvertENF {
    my ($dbfile, $outputfile)= @_;

    my %notes;

    print "reading notes\n";

    my $enf= ENFFile->new($dbfile);
    $enf->open();

    while (!$enf->eof())
    {
        my $rec= $enf->ReadRecord();

        if ($rec->{str2} =~ /CIN\d/) {
            $notes{$rec->{str1}}{$rec->{str2}}= $rec->{decrypted};
        }
    }

    $enf->close();

    print "writing notes\n";
    my $out= IO::File->new();
    $out->open($outputfile, ">") or die "ConvertENF open >$outputfile: $!\n";
    binmode($out);

    for my $docnr (sort keys %notes) {
        $out->print(pack("A12A16A896", $docnr, $notes{$docnr}{CIN1} || "", $notes{$docnr}{CIN0} || ""));
    }
    $out->close();

}

