# Generate redundant morphemes given a sequence of phoneme sets.
# Resultant morpheme set will have no two morphemes which differ by
# fewer than two phonemes.

###TODO: Fix defect where middle slot has more options than first and last.

# Last fix takes care of 3x5x3 case, but 3x5x3x5 produces 42 instead of 45;
# doesn't leave any cells unused by uses cells in the last two layers of the
# last cube inefficiently.

# 6 10 6 10 -- same proportions -- gives 344 actually generated, should do 360.
# 6 9 6 9  and  6 9 6 10 produce 324 (correct) while 6 10 6 9 produces only 319 (should be 324)
# but 3 5 3 4 produces 36 (correct)
#  3 5 3 6 and  3 5 3 7 both correctly produce 45

# Several similar 4-dim and 5-dim cases (first coord width less than second)
# all work correctly however: 2 3 2 3,  2 5 3 6,  7 8 3 5,   2 8 3 5, 2x3x2x3x2

###TODO also in rev6: randomizing phoneme lists' orders

###TODO: handle FORBIDDEN section like gen-all-possible-morphemes.pl

###TODO: currently it assumes you are working with at least 3 dimensions,
# because why not figure a 2- dimension problem by hand?  But would be wiser to fix it
# so it makes no such assumption...

# For some reason doing
#     use strict
# messed up the original version of
#     eval $print_prism loop;
# But now it works.  I'm not sure what I changed to fix it.

use strict;

my $debug = 0;


my $arg;
my $inputfile;
my $randomize = 0;
while ( $arg = shift ) {
    print "processing \$arg == $arg\n" if $debug;
    if ( $arg eq "-d" ) {
	$debug = shift;
	print "setting \$debug to $debug\n"  if $debug;
	next;
    }
    if ( $arg eq "-r" ) {
	$randomize = 1; ###TODO implement this scrambling of phoneme lists later...
	print STDERR "Warning: -r randomize option not implemented yet \n";
	next;
    }
    ###TODO print arg help if any other dash '-' option is given
    
    $inputfile = $arg;
}

if  ( ! $inputfile ) {
    die ( "Argument: input file with phoneme lists\n" );
}

open (PHONEMES, $inputfile) || die ("Couldn't find $inputfile\n");

my @phoneme_sets;
my $phoneme_set_idx = 0;
my $phoneme_idx = 0;
my $input_line;
my $phoneme_set_count = -1;
my $forbidden_seq_section = 0;
my @forbidden_sequences;

while (defined ( $input_line = <PHONEMES>) ) {
    
   $input_line =~ s/#.*//;            # remove comments
   $input_line =~ s/\s+//g;           # remove whitespace

   next   if $input_line =~ m/^$/;    # skip blank lines

   if ( $input_line =~ /^SLOT/ ) {
       $phoneme_set_count++;
       $phoneme_set_idx = 0;
       $forbidden_seq_section = 0;
       next;
   }

   if ( $input_line =~ /^FORBIDDEN/ ) {
       $forbidden_seq_section = 1;
       print STDERR "Warning: FORBIDDEN section in format files not fully supported yet\n";
       next;
   }

   if ( $forbidden_seq_section == 0 ) {
       print "assigning $input_line to row $phoneme_set_count column $phoneme_set_idx\n" if $debug;
       $phoneme_sets[ $phoneme_set_count ][ $phoneme_set_idx ] = $input_line;
       $phoneme_set_idx++;
   } else {
       push @forbidden_sequences, $input_line;
   }
}


# now generate real morphemes.
# note this only works if we have 3 dimensions; needs extensive work to do 4 or 5

my $i = 0;

my @dimension_size;
my $n_dimensions = scalar(@phoneme_sets);
my @coord;

for ( $i = 0; $i < $n_dimensions; $i++ ) {
    my $slotref = $phoneme_sets[ $i ];
    $dimension_size[ $i ] = scalar( @{$slotref} );
    $coord[$i] = 0;
    print "\$dimension_size\[ $i \]  = " .  $dimension_size[ $i ] . "\n" if $debug;
}

my @morpheme_prism;
my $finished = 0;

&clear_whole_prism;

my $cell_code = "\$morpheme_prism";
for ( $i = 0; $i < $n_dimensions; $i++ ) {
    $cell_code .= "[ \$coord[$i] ]";
}

my $current_layer_coord = 2;
my $iter = 0;
my $current_cell_coord_debug_string = "";


CELL: while ( $finished == 0 ) {
    ++$iter;
    if ( $debug ) {
	$current_cell_coord_debug_string = "";
	for ( $i = 0; $i < $n_dimensions; $i++ ) {
	    $current_cell_coord_debug_string .= $coord[$i];
	    if ( $i < $n_dimensions - 1 ) {
		$current_cell_coord_debug_string .= ", ";
	    }
	}
	print "step $iter -- try cell at coordinates: (" . $current_cell_coord_debug_string . ")\n";
    }

    &print_whole_prism  if $debug >= 2;
    print "==============\n"  if $debug >= 2;

    my $cell_val = eval $cell_code;

    if ( $cell_val != 1 && $cell_val != 2 ) {
	if ( $debug >= 2 ) {
	    print "cell ($current_cell_coord_debug_string) == $cell_val\n";
	}
	&mark_used( \@morpheme_prism, \@coord );

	# always increment at least two coords.  Once a given cell is
	# marked, motion along a single coordinate in any direction
	# will necessarily find a blocked cell.

	$coord[0] = ($coord[0] + 1) % $dimension_size[ 0 ];
	$coord[1] = ($coord[1] + 1) % $dimension_size[ 1 ];
	next;
    } else {
	print "skipping cell ($current_cell_coord_debug_string) as it's already used or blocked\n"  if $debug;

	# if the cell we tried is blocked, look for an open cell on this
	# plane.  Only if we can't find any open cells on this plane
	# should we go to the next plane or cube...

	my $j;
	my $init_0 = $coord[0];
	my $plane_area = ( $dimension_size[0] *  $dimension_size[1] );
	print "searching plane for open cell (1)... \n"   if $debug;
	for ( $j = 0; $j <= $plane_area; $j++ ) {
	    if ( ( $coord[0] = ($coord[0] + 1) % $dimension_size[ 0 ] ) == $init_0 ) {
		$coord[1] = ($coord[1] + 1) % $dimension_size[ 1 ];
	    }
	    print "checking cell (" . $coord[0] . "," . $coord[1] . ")\n"   if $debug >= 3;

	    if ( (eval $cell_code ) == 0 ) {
		next CELL;		 
	    }
	}
    }

    $coord[0] = ($coord[0] + 1) % $dimension_size[ 0 ];
    $coord[2]++;

    if ( $coord[2] >= $dimension_size[2] ) {
	for ( $i = 2; $i < $n_dimensions; $i++ ) {
	    #$coord[ $current_layer_coord ] = 0;
	    $coord[ $i ]++;
	    if ( $coord[ $i ] < $dimension_size[ $i ] ) {
		last;
	    }
	    $coord[ $i ] = 0;	    
	    if ( $i == $n_dimensions - 1 ) {
		$finished = 1;
		last CELL;
	    }
	}

	# YAY! this worked fine with 5-dim 3x3x3x3x3 matrix.  Now test with simpler and more complex cases.....
	
	###TODO maybe unneeded with search code now copied above... retest with 4+ dim cases and remove if not used
	###OK: it is still used with CVCV_17x5x17x5.fmt.  But is it really needed?  Try commenting it out and see
	### if it still works fine.

=pod
	my $j;
	my $init_0 = $coord[0];
	my $plane_area = ( $dimension_size[0] *  $dimension_size[1] );
	print "searching new plane for open cell (2)... "   if $debug;
	for ( $j = 0; $j <= $plane_area; $j++ ) {
	    if ( ( $coord[0] = ($coord[0] + 1) % $dimension_size[ 0 ] ) == $init_0 ) {
		$coord[1] = ($coord[1] + 1) % $dimension_size[ 1 ];
	    }
	    print "checking cell (" . $coord[0] . "," . $coord[1] . ")"   if $debug >= 3;

	    if ( (eval $cell_code ) == 0 ) {
		last;
	    }
	}
	if ( $j == $plane_area ) {
	    ###TODO: increment a higher dimensional coordinate? which one?
	    if ( $debug ) {
		print "bailing out at (";
		for ( my $k = 0; $k < $n_dimensions; $k++  ) {
		    print $coord[$i] . ", ";
		}
		print ") because searched whole plane without finding open cell\n";
	    }
#	    $finished = 1;
#	    last
	}
=cut
    }
}

&print_whole_prism  if $debug;

####################################################################################


my $print_prism_loop = "";

sub init_print_whole_prism {
    my $i;

    my $cell_code = "\$morpheme_prism";
    my $word_string_code = "";
    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$cell_code .= "[ \$indices[$i] ]";
	$word_string_code .= qq(\$phoneme_sets[ $i ][ \$indices[$i] ] . );	
    }
    my $blocked_cell_sign = ("#" x $n_dimensions) . " ";
    my $open_cell_sign = ("_" x  $n_dimensions) . " ";

    $print_prism_loop = <<LOOPCODE
	if ( $cell_code == 1 ) {
	    print \"$blocked_cell_sign\";
	} elsif ( $cell_code == 2 ) {
	    print $word_string_code " ";
	} else {
	    print \"$open_cell_sign\";
	}
LOOPCODE
    ;

#    for ( $i = $n_dimensions - 1; $i >= 0; $i-- ) {
    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$print_prism_loop = "for ( \$indices[$i] = 0; \$indices[$i] < $dimension_size[$i]; \$indices[$i]++ ) {\n" 
	    . $print_prism_loop;
	#if ( $i < $n_dimensions - 1 ) {
	if ( $i > 0 ) {
	    $print_prism_loop .= qq(\nprint "\\n";);
	}
	$print_prism_loop .= "\n}";
    }

#    $print_prism_loop = qq(print "in eval of generated print_prism_loop now\\n";\n) 
#	. $print_prism_loop . qq(\nprint "done with eval of print_prism_loop\\n";\n)  if $debug >= 3;

    $print_prism_loop = "my \@indices;\n" . $print_prism_loop;

    print "loop code: \n" . $print_prism_loop . "\n"    if $debug >= 3;
}



sub print_whole_prism {
    if ( $print_prism_loop eq "" ) {
	&init_print_whole_prism;
    }

    eval $print_prism_loop;
}

##################################################


sub clear_whole_prism {
    my @indices;
    my $i = 0;
    my $clear_loop_code = "\$morpheme_prism";
    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$clear_loop_code .= "[ \$indices[$i] ]";
    }
    $clear_loop_code .= " = 0;";

    # ok, now iterate over values of each member of @indices within range indicated by @dimension_size
    # then eval  $cell_code . " = 0;"  for each set of values for indices.

    # will have to build up said loop from inside out, concatenating
    #another for () {} around the string at each step

    for ( $i = $n_dimensions - 1; $i >= 0; $i-- ) {
	$clear_loop_code = "for ( \$indices[$i] = 0; \$indices[$i] < $dimension_size[$i]; \$indices[$i]++ ) {\n" 
	    . $clear_loop_code . "\n}";
    }
    $clear_loop_code = qq(print "in eval of generated clear_loop_code now\\n";\n) . $clear_loop_code   if $debug >= 3;

    print "clear_whole_prism loop: \n" . $clear_loop_code . "\n\n"   if $debug >= 3;

    eval $clear_loop_code;
}

##################################################

my $mark_used_loop = "";

sub init_mark_used {

# hard-coded section will map incoming args - two array refs
# @arr & @mark_coord

    my ($i, $j);
    $mark_used_loop = qq(print "marking these cells as blocked: "  if \$debug >= 3; \n);

    my $current_cell_coord_debug_string = "";
    
    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$mark_used_loop .= "for ( \$z=0; \$z< $dimension_size[ $i ]; \$z++ ) {\n";
	$mark_used_loop .= "\t\$arr";
	my $cell_coord_code = "";
	$current_cell_coord_debug_string = "(";
	for ( $j = 0; $j < $n_dimensions; $j++ ) {
	    if ( $i == $j ) {
		$cell_coord_code .= "[ \$z ]";
		$current_cell_coord_debug_string .= "\$z, ";
	    } else {
		$cell_coord_code .= "[ \$mark_coord[$j] ]";
		$current_cell_coord_debug_string .= "\$mark_coord[$j], ";
	    }

	}
	$current_cell_coord_debug_string .= ")";
	$mark_used_loop .= $cell_coord_code . " = 1;";
	$mark_used_loop .= qq(\n\tprint "$current_cell_coord_debug_string "  if \$debug >= 3;\n}\n);
	$mark_used_loop .= qq(print "\\n"   if \$debug >= 3;\n\n);
    }

    my $mark_target_cell_code = "\$arr";
    my $target_cell_debug_string = "( ";
    my $print_word_for_marked_cell = "print ";
    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$mark_target_cell_code .= "[ \$mark_coord[$i] ]";
	$target_cell_debug_string .= "\$mark_coord[$i]";
	$print_word_for_marked_cell .= "\$phoneme_sets[ $i ][ \$mark_coord[$i] ] . ";
	if ( $i < $n_dimensions - 1 ) {
	    $target_cell_debug_string .= ", ";
	} else {
	    $target_cell_debug_string .= ")";
	}
    }
    $mark_target_cell_code .= " = 2;\n";
    my $debug_print_string = qq(print "marking cell $target_cell_debug_string as used: *** "    if \$debug;\n);
    $print_word_for_marked_cell .= qq("\\n");
    
    $mark_used_loop .= $mark_target_cell_code . $debug_print_string . $print_word_for_marked_cell;
    print "\nmark_used code:\n" . $mark_used_loop . "\n"     if $debug >= 3;
}

sub mark_used {
    my ( $arr_ref, $mark_coord_ref ) = @_;
    my @arr = @{ $arr_ref };
    my @mark_coord = @{ $mark_coord_ref };
    my $z;

    if ( $mark_used_loop eq "" ) {
	&init_mark_used;
    }

    eval $mark_used_loop;
}
