# Generate all possible morphemes for a given phonology.
# Can filter output with another script to get more redundant
# vocabulary.

my $debug = 0;
my $inputfile = shift;

if  ( ! $inputfile ) {
    die ( "Argument: input file with phoneme lists\n" );
}

open (PHONEMES, $inputfile) || die ("Couldn't find $inputfile\n");

my @phoneme_sets;
my $phoneme_set_idx = 0;
my $phoneme_idx = 0;
my $input_line;
my $phoneme_set_count = -1;
my $forbidden_seq_section = 0;
my @forbidden_sequences;

while (defined ( $input_line = <PHONEMES>) ) {
   
   chop ($input_line);
   $input_line =~ s/\r//g;
   $input_line =~ s/#.*//;  # remove comments
   $input_line =~ s/[\t ]+//g;  # remove whitespace

   if ( $input_line =~ /^SLOT *([0-9]+)/ ) {
       $phoneme_set_count++;
       $phoneme_set_idx = 0;
       $forbidden_seq_section = 0;
       next;
   }

   if ( $input_line =~ /^FORBIDDEN/ ) {
       $forbidden_seq_section = 1;
   }

   if ($input_line !~ /^ *$/ ) {
       if ( $forbidden_seq_section == 0 ) {
	   print "assigning $input_line to row $phoneme_set_count column $phoneme_set_idx\n" if $debug;
	   $phoneme_sets[ $phoneme_set_count ][ $phoneme_set_idx ] = $input_line;
	   $phoneme_set_idx++;
       } else {
	   push @forbidden_sequences, $input_line;
       }
   }
}



# now generate real morphemes.

my @loopindices;
my $dimensions;
my $i = 0;
# , $j = 0, $k = 0;
my @dimension_size;
for ( $i = 0; $i < scalar(@phoneme_sets); $i++ ) {
    my $slotref = $phoneme_sets[ $i ];
    $dimension_size[ $i ] = scalar( @{$slotref} );
    print "\$dimension_size\[ $i \]  = " .  $dimension_size[ $i ] . "\n" if $debug;
}


# new architecture won't use a morpheme prism approach.  instead,
# iteratively generate all possible morphemes and then filter out the
# ones that are too similar to each other.

$dimensions = $phoneme_set_count;

# this will call itself recursively and print or save morphemes when
# it gets to the limit

&gen_syllables( 0, "" );


sub gen_syllables {
    my ($dimension, $string) = @_;
    my $i;
    print "\$dimension: $dimension, \$string: [$string]\n"   if $debug;

    my $bad;
    foreach $bad ( @forbidden_sequences ) {
	if ( $string =~ $bad ) {
	    print "abandon work on [$string] because of forbidden sequence [$bad]\n"   if $debug;
	    return;
	}
    }

    if ( $dimension == $dimensions ) { # at limit of max depth
	for ( $i = 0; $i < $dimension_size[ $dimension ]; $i++ ) {
	    my $gen_string = $string . $phoneme_sets[ $dimension ][ $i ];
	    foreach $bad ( @forbidden_sequences ) {
		if ( $gen_string =~ $bad ) {
		    print "abandon work on [$gen_string] because of forbidden sequence [$bad]\n"   if $debug;
		    return;
		}
	    }
	    print $gen_string . "\n";
	}
    } else {
	for ( $i = 0; $i < $dimension_size[ $dimension ]; $i++ ) {
	    &gen_syllables( $dimension+1, $string . $phoneme_sets[ $dimension ][ $i ] );
	}
    }
}
