# Generate redundant morphemes given a sequence of phoneme sets.
# Resultant morpheme set will have no two morphemes which differ by
# fewer than two phonemes.

# This works theoretically with any number of phonemes in any number of slots;
# have tested anywhere from 3 to 6, will test 7+ soon.
# Also all tests so far have had as many or more phonemes in first slot as second;
# should do some tests with fewer in first than second.

### TODO: currently it assumes you are working with at least 3 dimensions,
# because why not figure a 2- dimension problem by hand?  But would be wiser to fix it
# so it makes no such assumption...

# For some reason doing
#     use strict
# messed up the original version of
#     eval $print_prism loop;
# But now it works.  I'm not sure what I changed to fix it.

use strict;

my $debug = 0;


my $arg;
my $inputfile;
my $randomize = 0;
while ( $arg = shift ) {
    print "processing \$arg == $arg\n" if $debug;
    if ( $arg eq "-d" ) {
	$debug = shift;
	print "setting \$debug to $debug\n"  if $debug;
	next;
    }
    if ( $arg eq "-r" ) {
	$randomize = 1; ###TODO implement this scrambling of phoneme lists later...
	print STDERR "Warning: -r randomize option not implemented yet \n";
	next;
    }
    ###TODO print arg help if any other dash '-' option is given
    
    $inputfile = $arg;
}

if  ( ! $inputfile ) {
    die ( "Argument: input file with phoneme lists\n" );
}

open (PHONEMES, $inputfile) || die ("Couldn't find $inputfile\n");

my @phoneme_sets;
my $phoneme_set_idx = 0;
my $phoneme_idx = 0;
my $input_line;
my $phoneme_set_count = -1;
while (defined ( $input_line = <PHONEMES>) ) {
    
#   chop ($input_line);
#   $input_line =~ s/\r//g;
   $input_line =~ s/#.*//;  # remove comments
   $input_line =~ s/\s+//g;  # remove whitespace
   next if $input_line =~ m/^$/;

   if ( $input_line =~ /SLOT/ ) {
       $phoneme_set_count++;
       $phoneme_set_idx = 0;
       next;
   }
   if ($input_line !~ /^ *$/) {
       print "assigning $input_line to row $phoneme_set_count column $phoneme_set_idx\n" if $debug;
       $phoneme_sets[ $phoneme_set_count ][ $phoneme_set_idx ] = $input_line;
       $phoneme_set_idx++;
   }
}


# now generate real morphemes.
# note this only works if we have 3 dimensions; needs extensive work to do 4 or 5

my $i = 0;

my @dimension_size;
my $n_dimensions = scalar(@phoneme_sets);
my @coord;

for ( $i = 0; $i < $n_dimensions; $i++ ) {
    my $slotref = $phoneme_sets[ $i ];
    $dimension_size[ $i ] = scalar( @{$slotref} );
    $coord[$i] = 0;
    print "\$dimension_size\[ $i \]  = " .  $dimension_size[ $i ] . "\n" if $debug;
}

my @morpheme_prism;
my $finished = 0;

&clear_whole_prism;

my $cell_code = "\$morpheme_prism";
for ( $i = 0; $i < $n_dimensions; $i++ ) {
    $cell_code .= "[ \$coord[$i] ]";
}

my $current_layer_coord = 2;
my $iter = 0;
my $current_cell_coord_debug_string = "";

# need to have a var for whether we just advanced to the next plane or not.
# set true when we advance and set false at the end of each cell eval.
# if we find a cell blocked and we have just advanced to this plane, then we need
# to do a search for an open cell.  (this may obviate the need for a search when
# advancing to the next cube or hypercube...?)

# whether we have just gone from one plane to the next
my $just_advanced = 0;

CELL: while ( $finished == 0 ) {
    ++$iter;
    if ( $debug ) {
	$current_cell_coord_debug_string = "";
	for ( $i = 0; $i < $n_dimensions; $i++ ) {
	    $current_cell_coord_debug_string .= $coord[$i];
	    if ( $i < $n_dimensions - 1 ) {
		$current_cell_coord_debug_string .= ", ";
	    }
	}
	print "step $iter -- try cell at coordinates: (" . $current_cell_coord_debug_string . ")\n";
    }

    &print_whole_prism  if $debug >= 2;
    print "==============\n"  if $debug >= 2;

    my $cell_val = eval $cell_code;

    if ( $cell_val != 1 && $cell_val != 2 ) {
	if ( $debug >= 2 ) {
	    print "cell ($current_cell_coord_debug_string) == $cell_val\n";
	}
	&mark_used( \@morpheme_prism, \@coord );

	# always increment at least two coords.  Once a given cell is
	# marked, motion along a single coordinate in any direction
	# will necessarily find a blocked cell.

	$coord[0] = ($coord[0] + 1) % $dimension_size[ 0 ];
	$coord[1] = ($coord[1] + 1) % $dimension_size[ 1 ];
	$just_advanced = 0;
	next;
    } else {
	print "skipping cell ($current_cell_coord_debug_string) as it's already used or blocked\n"  if $debug;
	if ( $just_advanced ) {
	    # if we just went to a new plane and the first cell we try
	    # is blocked, look for an open cell on this plane.  Only
	    # if we can't find any open cells on this plane should we
	    # go to the next cube.

	    my $j;
	    my $init_0 = $coord[0];
	    my $plane_area = ( $dimension_size[0] *  $dimension_size[1] );
	    print "searching new plane for open cell (1)... \n"   if $debug;
	    for ( $j = 0; $j <= $plane_area; $j++ ) {
		if ( ( $coord[0] = ($coord[0] + 1) % $dimension_size[ 0 ] ) == $init_0 ) {
		    $coord[1] = ($coord[1] + 1) % $dimension_size[ 1 ];
		}
		print "checking cell (" . $coord[0] . "," . $coord[1] . ")"   if $debug >= 3;

		if ( (eval $cell_code ) == 0 ) {
		    $just_advanced = 0;
		    next CELL;		 
		}
	    }
	}
	$just_advanced = 0;
    }



    # this works for 3 or 4 dimensions but breaks down for 5

    $coord[0] = ($coord[0] + 1) % $dimension_size[ 0 ];
    $coord[2]++;
    $just_advanced = 1;
    if ( $coord[2] >= $dimension_size[2] ) {
	for ( $i = 2; $i < $n_dimensions; $i++ ) {
	    #$coord[ $current_layer_coord ] = 0;
	    $coord[ $i ]++;
	    if ( $coord[ $i ] < $dimension_size[ $i ] ) {
		last;
	    }
	    $coord[ $i ] = 0;	    
	    if ( $i == $n_dimensions - 1 ) {
		$finished = 1;
		last CELL;
	    }
	}

	# YAY! this worked fine with 5-dim 3x3x3x3x3 matrix.  Now test with simpler and more complex cases.....
	
	###TODO maybe unneeded with search code now copied above... retest with 4+ dim cases and remove if not used

	my $j;
	my $init_0 = $coord[0];
	my $plane_area = ( $dimension_size[0] *  $dimension_size[1] );
	print "searching new plane for open cell (2)... "   if $debug;
	for ( $j = 0; $j <= $plane_area; $j++ ) {
	    if ( ( $coord[0] = ($coord[0] + 1) % $dimension_size[ 0 ] ) == $init_0 ) {
		$coord[1] = ($coord[1] + 1) % $dimension_size[ 1 ];
	    }
	    print "checking cell (" . $coord[0] . "," . $coord[1] . ")"   if $debug >= 3;

	    if ( (eval $cell_code ) == 0 ) {
		last;
	    }
	}
	if ( $j == $plane_area ) {
	    ###TODO: increment a higher dimensional coordinate? which one?
	    if ( $debug ) {
		print "bailing out at (";
		for ( my $k = 0; $k < $n_dimensions; $k++  ) {
		    print $coord[$i] . ", ";
		}
		print ") because searched whole plane without finding open cell\n";
	    }
#	    $finished = 1;
#	    last
	}
    }
}

&print_whole_prism  if $debug;

####################################################################################


my $print_prism_loop = "";

sub init_print_whole_prism {
    my $i;

    my $cell_code = "\$morpheme_prism";
    my $word_string_code = "";
    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$cell_code .= "[ \$indices[$i] ]";
	$word_string_code .= qq(\$phoneme_sets[ $i ][ \$indices[$i] ] . );	
    }
    my $blocked_cell_sign = ("x" x $n_dimensions) . " ";
    my $blocked_cell_sign = ("x" x $n_dimensions) . " ";
    my $open_cell_sign = ("0" x  $n_dimensions) . " ";

    $print_prism_loop = <<LOOPCODE
	if ( $cell_code == 1 ) {
	    print \"$blocked_cell_sign\";
	} elsif ( $cell_code == 2 ) {
	    print $word_string_code " ";
	} else {
	    print \"$open_cell_sign\";
	}
LOOPCODE
    ;

#    for ( $i = $n_dimensions - 1; $i >= 0; $i-- ) {
    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$print_prism_loop = "for ( \$indices[$i] = 0; \$indices[$i] < $dimension_size[$i]; \$indices[$i]++ ) {\n" 
	    . $print_prism_loop;
	#if ( $i < $n_dimensions - 1 ) {
	if ( $i > 0 ) {
	    $print_prism_loop .= qq(\nprint "\\n";);
	}
	$print_prism_loop .= "\n}";
    }

#    $print_prism_loop = qq(print "in eval of generated print_prism_loop now\\n";\n) 
#	. $print_prism_loop . qq(\nprint "done with eval of print_prism_loop\\n";\n)  if $debug >= 3;

    $print_prism_loop = "my \@indices;\n" . $print_prism_loop;

    print "loop code: \n" . $print_prism_loop . "\n"    if $debug >= 3;
}



sub print_whole_prism {
    if ( $print_prism_loop eq "" ) {
	&init_print_whole_prism;
    }

    eval $print_prism_loop;
}



sub old_print_whole_prism {

    my ($m, $n, $o);
    for ( $o = 0; $o < $dimension_size [ 2 ]; $o++ ) {
	for ( $n = 0; $n < $dimension_size [ 1 ]; $n++ ) {
	    for ( $m = 0; $m < $dimension_size [ 0 ]; $m++ ) {
		if ( $morpheme_prism[ $m ][ $n ][ $o ] == 1 ) {
		    print "xxx ";
		} elsif ( $morpheme_prism[ $m ][ $n ][ $o ] == 2 ) {
###		    print $phoneme_sets[ 0 ][ $m ] . $phoneme_sets[ 1 ][ $n ] . $phoneme_sets[ 2 ][ $o ] . " ";
		    print "=== ";
		} else {
   		    print "000 ";
		}
	    }
	    print "\n";
	}
	print "\n";
    }
}


sub clear_whole_prism {
    my @indices;
    my $i = 0;
    my $clear_loop_code = "\$morpheme_prism";
    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$clear_loop_code .= "[ \$indices[$i] ]";
    }
    $clear_loop_code .= " = 0;";

    # ok, now iterate over values of each member of @indices within range indicated by @dimension_size
    # then eval  $cell_code . " = 0;"  for each set of values for indices.

    # will have to build up said loop from inside out, concatenating
    #another for () {} around the string at each step

    for ( $i = $n_dimensions - 1; $i >= 0; $i-- ) {
	$clear_loop_code = "for ( \$indices[$i] = 0; \$indices[$i] < $dimension_size[$i]; \$indices[$i]++ ) {\n" 
	    . $clear_loop_code . "\n}";
    }
    $clear_loop_code = qq(print "in eval of generated clear_loop_code now\\n";\n) . $clear_loop_code   if $debug >= 3;

    print "clear_whole_prism loop: \n" . $clear_loop_code . "\n\n"   if $debug >= 3;

    eval $clear_loop_code;
}

sub old_clear_whole_prism {
### old code ###
    
    my ($m, $n, $o);
    for ( $o = 0; $o < $dimension_size [ 2 ]; $o++ ) {
	for ( $n = 0; $n < $dimension_size [ 1 ]; $n++ ) {
	    for ( $m = 0; $m < $dimension_size [ 0 ]; $m++ ) {
		$morpheme_prism[ $m ][ $n ][ $o ] = 0;
	    }
	}
    }
}


my $mark_used_loop = "";

sub init_mark_used {

# hard-coded section will map incoming args - two array refs
# @arr & @mark_coord

    my ($i, $j);
    $mark_used_loop = qq(print "marking these cells as blocked: "  if \$debug >= 3; \n);

    my $current_cell_coord_debug_string = "";
    
    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$mark_used_loop .= "for ( \$z=0; \$z< $dimension_size[ $i ]; \$z++ ) {\n";
	$mark_used_loop .= "\t\$arr";
	my $cell_coord_code = "";
	$current_cell_coord_debug_string = "(";
	for ( $j = 0; $j < $n_dimensions; $j++ ) {
	    if ( $i == $j ) {
		$cell_coord_code .= "[ \$z ]";
		$current_cell_coord_debug_string .= "\$z, ";
	    } else {
		$cell_coord_code .= "[ \$mark_coord[$j] ]";
		$current_cell_coord_debug_string .= "\$mark_coord[$j], ";
	    }

	}
	$current_cell_coord_debug_string .= ")";
	$mark_used_loop .= $cell_coord_code . " = 1;";
	$mark_used_loop .= qq(\n\tprint "$current_cell_coord_debug_string "  if \$debug >= 3;\n}\n);
	$mark_used_loop .= qq(print "\\n"   if \$debug >= 3;\n\n);
    }

    my $mark_target_cell_code = "\$arr";
    my $target_cell_debug_string = "( ";
    my $print_word_for_marked_cell = "print ";
    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$mark_target_cell_code .= "[ \$mark_coord[$i] ]";
	$target_cell_debug_string .= "\$mark_coord[$i]";
	$print_word_for_marked_cell .= "\$phoneme_sets[ $i ][ \$mark_coord[$i] ] . ";
	if ( $i < $n_dimensions - 1 ) {
	    $target_cell_debug_string .= ", ";
	} else {
	    $target_cell_debug_string .= ")";
	}
    }
    $mark_target_cell_code .= " = 2;\n";
    my $debug_print_string = qq(print "marking cell $target_cell_debug_string as used: *** "    if \$debug;\n);
    $print_word_for_marked_cell .= qq("\\n");
    
    $mark_used_loop .= $mark_target_cell_code . $debug_print_string . $print_word_for_marked_cell;
    print "\nmark_used code:\n" . $mark_used_loop . "\n"     if $debug >= 3;
}

sub mark_used {
    my ( $arr_ref, $mark_coord_ref ) = @_;
    my @arr = @{ $arr_ref };
    my @mark_coord = @{ $mark_coord_ref };
    my $z;

    if ( $mark_used_loop eq "" ) {
	&init_mark_used;
    }

    eval $mark_used_loop;
}

sub old_mark_used {

    my ( $arr_ref, $p, $q, $r ) = @_;
    my @arr = @{ $arr_ref };
    my $z;
    
    my $previous = $arr[ $p ][ $q ][ $r ];
#    print "previous value: [" . $arr[ $p ][ $q ][ $r ] . "] " if $debug; 
    print "previous value: [ $previous ] " if $debug; 
    if ( $previous == 1 || $previous == 2 ) {
	print "bailing out of mark_used() as previous value of this cell was $previous\n";
	return;
    }
    print "marking these cells as blocked: " if $debug;
    for ( $z=0; $z< $dimension_size[ 0 ]; $z++ ) {
	$arr[ $z ][ $q ][ $r ] = 1;
	print "($z, $q, $r) " if $debug;
    }
    print "\n" if $debug;
    for ( $z=0; $z< $dimension_size[ 1 ]; $z++ ) {
	$arr[ $p ][ $z ][ $r ] = 1;
	print "($p, $z, $r) " if $debug;
    }
    print "\n" if $debug;
    for ( $z=0; $z< $dimension_size[ 2 ]; $z++ ) {
	$arr[ $p ][ $q ][ $z ] = 1;
	print "($p, $q, $z) " if $debug;
    }
    print "\n" if $debug;

    $arr[ $p ][ $q ][ $r ] = 2;
    print "marking cell [ $p, $q, $r ] as used ***** " if $debug;

    print $phoneme_sets[ 0 ][ $p ] . $phoneme_sets[ 1 ][ $q ] . $phoneme_sets[ 2 ][ $r ] . "\n";
#    print "1:" . $phoneme_sets[ 0 ][ $p ] . "2:" . $phoneme_sets[ 1 ][ $q ] .  "3:" . $phoneme_sets[ 2 ][ $r ] . "\n" . "$p $q $r\n";
    
}
