
use strict;

# field numbers offset by 1 because we prepend a random number
use constant WORD => 1;
use constant ROOTTYPE => 2;
use constant GLOSS => 3;


my $arg;
my $dictfile = undef;
my $roottypes = undef;

my $pct = 0;

my $unknownfile = "unknown.rdb";
my $knownfile = "known.rdb";

while ( $arg = shift ) {
    if ( $arg !~ /^-/ ) {
	$dictfile = shift;
	next;
    }
    if ( $arg = "-r" ) {
	$roottypes = shift;
    }
}

if ( not defined $dictfile ) {
    $dictfile = "conlang13c_lexicon_dated.rdb";
}

## Slurp in both unknown.rdb and DICTFILE.rdb, prepend a random 
## number to each row, and sort by the random number
## Reading in unknown.rdb means that unknown words occur twice
## as often in the resultant array.
## Also, use of various factors for multiplying the random
## numbers means definitely unknown words are more likely
## to show up early, and words not yet quizzed are 
## more likely to show up early than words definitely known.

my @rows;
my @fields;
srand;

open UNKNOWN, "$unknownfile"  or die "can't open $unknownfile for reading";
while ( <UNKNOWN> ) {
    my $uknown_word_row = rand() * 100 . "\t" . $_;
    push @rows, $uknown_word_row;
}
close UNKNOWN;

open KNOWN, "$knownfile"   or die "can't open $knownfile for reading";
my @knownwords = <KNOWN>;
close KNOWN;

open DICTFILE,  $dictfile   or die "can't open $dictfile for reading";
my $factor = 0;
while ( <DICTFILE> ) {
    @fields = split(  /\t/, $_ );
    # skip wordforms with no definition assigned
    if ( $fields[4] =~ /^$/ ) {
	next;
    }
    if ( $fields[4] =~ /(archaic|nonce)/ ) {
	#print "ARCHAIC/NONCE\t $fields[0]\t$fields[2]\n"; ###DEBUG
	$factor = 1000;
    } elsif ( grep ( /$fields[0]/, @knownwords ) ) {
	$factor = 500;
    } else {
	$factor = 200;
    }
    my $word_row = (rand() * $factor) . "\t" . $_;
    push @rows, $word_row;
}
close DICTFILE;

@rows = sort { $a <=> $b } @rows;
my $known = 0;
my $unknown = 0;
my $total = 0;

print "dxiql-zox pxoq {h(oqnx)}, cxoq twax-txy kax-i hyw-tq-van henx,\n";
print "rej pxoq {} cxoq te kax-i hyw-van, rej {gx(yl)} boq sru-van gxyl-zox.\n\n";

open UNKNOWN, ">> $unknownfile"  or die "can't open $unknownfile for writing";
open KNOWN, ">> $knownfile"  or die "can't open $knownfile for writing";

foreach ( @rows ) {
    @fields = split(  /\t/, $_ );
#    if ( $fields[ ROOTTYPE ] =~ /[rdspnk]/ ) {
    if ( $fields[ ROOTTYPE ] =~ /[$roottypes]/ ) {
	$total++;
	print $fields[ WORD ] . "\n";
	my $hyw = <STDIN>;
	if ( $hyw =~ /^h(oq(nx)*)*/ ) {
	    $_ =~ s/^[0-9\.]+\s+//;
	    $unknown++;
	    print UNKNOWN $_;
	} elsif ( $hyw =~ /^g(x(y(l)*)*)*/) {
	    $total--;
	    $pct = ($known / $total) * 100;
	    print "$total total, $known known, $unknown unknown, percent known = $pct\n";
	    close;
	    exit;
	} elsif ( $hyw =~ /^ *$/ ) {
	    $known++;
	    print KNOWN $fields[ WORD ] . "\n";
	} else { 
	    printf "?twax-noq-tq-zox. ";
	}
#	select( undef, undef, undef, 5);
	print "\t" . $fields[ GLOSS ] . "\n";
    }
}

close;
