#!/usr/bin/perl

# (c) 2006 by Jim Henry III.  Creative Commons license.
# http://www.pobox.com/~jimhenry/conlang/redundancy.htm

# Generate redundant morphemes given a sequence of phoneme sets.
# Resultant morpheme set will have no two morphemes which differ by
# fewer than two phonemes.

###TODO: Fix defect where middle slot has more options than first and last.

# Last fix takes care of 3x5x3 case, but 3x5x3x5 produces 42 instead of 45;
# doesn't leave any cells unused but uses cells in the last two layers of the
# last cube inefficiently.

# 6 10 6 10 -- same proportions -- gives 344 actually generated, should do 360.
# 6 9 6 9  and  6 9 6 10 produce 324 (correct) while 6 10 6 9 produces only 319 (should be 324)
# but 3 5 3 4 produces 36 (correct)
#  3 5 3 6 and  3 5 3 7 both correctly produce 45

# Several similar 4-dim and 5-dim cases (first coord width less than second)
# all work correctly however: 2 3 2 3,  2 5 3 6,  7 8 3 5,   2 8 3 5, 2x3x2x3x2


# For some reason doing
#     use strict
# messed up the original version of
#     eval $print_prism loop;
# But now it works.  I'm not sure what I changed to fix it.

use strict;

my $debug = 0;


my $arg;
my $inputfile;
my $randomize = 0;
my $shifting = 0;

while ( $arg = shift ) {
    print "processing \$arg == $arg\n" if $debug;
    if ( $arg eq "-d" ) {
	$debug = shift;
	if ( $debug !~ /[0-9]+/ ) { 	# bare -d with no number argument
	    unshift @ARGV, $debug;
	    $debug = 1;
	}
	print "setting \$debug to $debug\n";
	next;
    }
    if ( $arg eq "-r" ) {
	print "will randomize phoneme sequences\n"  if $debug;
	$randomize = 1;
	srand;
	next;
    }

    if ( $arg eq "-s" ) {
	$shifting = shift;
	if ( $shifting !~ /[0-9]+/ ) {    # bare -s with no number argument 
	    unshift @ARGV, $shifting;
	    $shifting = 1;
	}
	next;
    }

    if ( $arg =~ m/^-/ ) {
	&display_usage;
	exit;
    }
    $inputfile = $arg;
}

if  ( ! $inputfile ) {
    &display_usage;
    exit;
    die ( "Argument: input file with phoneme lists\n" );
}

open (PHONEMES, $inputfile) || die ("Couldn't find $inputfile\n");

my @phoneme_sets;
my $phoneme_set_idx = 0;
my $phoneme_idx = 0;
my $input_line;
my $phoneme_set_count = -1;
my $forbidden_seq_section = 0;
my @forbidden_sequences;

while (defined ( $input_line = <PHONEMES>) ) {
    
   $input_line =~ s/#.*//;            # remove comments
   $input_line =~ s/\s+//g;           # remove whitespace

   next   if $input_line =~ m/^$/;    # skip blank lines

   if ( $input_line =~ /^SLOT/ ) {
       $phoneme_set_count++;
       $phoneme_set_idx = 0;
       $forbidden_seq_section = 0;
       next;
   }

   # note there is no reason not to have multiple FORBIDDEN sections,
   # they'll all append to the same list; it/they can be before or after the SLOT sections
   # or among them

   if ( $input_line =~ /^FORBIDDEN/ ) {
       $forbidden_seq_section = 1;
       next;
   }

   if ( $forbidden_seq_section == 0 ) {
       if ( $phoneme_set_count < 0 ) {
	   die "Unknown text found before first SLOT or FORBIDDEN section\n";
       }
       print "assigning $input_line to row $phoneme_set_count column $phoneme_set_idx\n" if $debug;
       $phoneme_sets[ $phoneme_set_count ][ $phoneme_set_idx ] = $input_line;
       $phoneme_set_idx++;
   } else {
       push @forbidden_sequences, $input_line;
   }
}

if ( $phoneme_set_count <= 0 ) {
    die "Must have at least two SLOT sections in format file\n";
}



my $i = 0;

my @dimension_size;
my $n_dimensions = scalar(@phoneme_sets);
my @coord;

for ( $i = 0; $i < $n_dimensions; $i++ ) {
    my $slotref = $phoneme_sets[ $i ];

    if ( ! defined $slotref ) {
	die "No phonemes listed in SLOT $i\n";
    }

    $dimension_size[ $i ] = scalar( @{$slotref} );
    $coord[$i] = 0;
    print "\$dimension_size\[ $i \]  = " .  $dimension_size[ $i ] . "\n" if $debug;

#    if ( $shifting && $i % 2 == 0 ) {
    if ( $shifting ) {
	my $shift_amount = $randomize ? int(rand $dimension_size[ $i ]) : $shifting;

	if ( $debug ) {
	    print "shifting phoneme set $i by $shift_amount spaces ";
	    print (( $i % 2 == 0 ) ? "backward\n" : "forward\n");
	}

	for ( my $j = 0; $j < $shift_amount; $j++ ) {
	    if ( $i % 2 == 0 ) {
		my $phoneme = pop @{ $phoneme_sets[ $i ] };
		unshift @{ $phoneme_sets[ $i ] }, $phoneme;
	    } else {
		push @{ $phoneme_sets[ $i ] }, shift @{ $phoneme_sets[ $i ] };
	    }
	}
    } elsif ( $randomize ) {
	print "randomly shuffling phonemes in set $i...\n"   if $debug;
	# is square of the size of each dimension enough swaps to thoroughly randomize them?
	for ( my $j = 0; $j < $dimension_size[ $i ] ** 2; $j++ ) {
	    my $pos1 = int(rand $dimension_size[ $i ]);
	    my $pos2 = int(rand $dimension_size[ $i ]);
	    my $swaptemp = $phoneme_sets[ $i ][ $pos1 ];
	    $phoneme_sets[ $i ][ $pos1 ] = $phoneme_sets[ $i ][ $pos2 ];
	    $phoneme_sets[ $i ][ $pos2 ] = $swaptemp;
	}
    }
}


# now generate real morphemes.

my @morpheme_prism;
my $finished = 0;

&clear_whole_prism;

my $cell_code = "\$morpheme_prism";
for ( $i = 0; $i < $n_dimensions; $i++ ) {
    $cell_code .= "[ \$coord[$i] ]";
}

my $current_layer_coord = 2;
my $iter = 0;
my $current_cell_coord_debug_string = "";


CELL: while ( $finished == 0 ) {
    ++$iter;
    if ( $debug ) {
	print "==============\n"  if $debug >= 2;
	$current_cell_coord_debug_string = "";
	for ( $i = 0; $i < $n_dimensions; $i++ ) {
	    $current_cell_coord_debug_string .= $coord[$i];
	    if ( $i < $n_dimensions - 1 ) {
		$current_cell_coord_debug_string .= ", ";
	    }
	}
	print "step $iter -- try cell at coordinates: (" . $current_cell_coord_debug_string . ")\n";
	&print_whole_prism   if $debug >= 2;
    }

    my $cell_val = eval $cell_code;

#    if ( $cell_val != 1 && $cell_val != 2 ) {
    if ( $cell_val == 0 ) {
	if ( $debug >= 2 ) {
	    print "cell ($current_cell_coord_debug_string) == $cell_val\n";
	}
	&mark_used( \@morpheme_prism, \@coord );

	# always increment at least two coords.  Once a given cell is
	# marked, motion along a single coordinate in any direction
	# will necessarily find a blocked cell.

	$coord[0] = ($coord[0] + 1) % $dimension_size[ 0 ];
	$coord[1] = ($coord[1] + 1) % $dimension_size[ 1 ];
	next;
    } else {
	print "skipping cell ($current_cell_coord_debug_string) as it's already used or blocked\n"  if $debug;

	# if the cell we tried is blocked, look for an open cell on this
	# plane.  Only if we can't find any open cells on this plane
	# should we go to the next plane or cube...

	my $j;
	my $init_0 = $coord[0];
	my $plane_area = ( $dimension_size[0] *  $dimension_size[1] );
	print "searching plane for open cell (1)... \n"   if $debug;
	for ( $j = 0; $j <= $plane_area; $j++ ) {
	    if ( ( $coord[0] = ($coord[0] + 1) % $dimension_size[ 0 ] ) == $init_0 ) {
		$coord[1] = ($coord[1] + 1) % $dimension_size[ 1 ];
	    }
	    print "checking cell (" . $coord[0] . "," . $coord[1] . ")\n"   if $debug >= 3;

	    if ( (eval $cell_code ) == 0 ) {
		next CELL;		 
	    }
	}
    }

    if ( $n_dimensions == 2 ) {
	last CELL;
    }

    $coord[0] = ($coord[0] + 1) % $dimension_size[ 0 ];
    $coord[2]++;

    if ( $coord[2] >= $dimension_size[2] ) {
	for ( $i = 2; $i < $n_dimensions; $i++ ) {
	    #$coord[ $current_layer_coord ] = 0;
	    $coord[ $i ]++;
	    if ( $coord[ $i ] < $dimension_size[ $i ] ) {
		last;
	    }
	    $coord[ $i ] = 0;	    
	    if ( $i == $n_dimensions - 1 ) {
		$finished = 1;
		last CELL;
	    }
	}
    }
}

&print_whole_prism  if $debug;

####################################################################################

sub display_usage {

    print<<HELP;

Usage: $0 <file>  [-d N] [-r] 

<file>   input file: for format see examples in *.fmt
-d [N]:  set debug trace level to N (1-3), N optional (set to 1 if next arg is non-numeric)
-r:      randomly shuffle the phonemes within each list in the input file before
         generating words
-s [N]:  shift (rotate) phonemes within each list before generating words by N spaces.
         N optional (set to 1 if next arg is non-numeric).  If combined with -r, shift
	 each list by a random amount instead of shuffling them.

HELP

}

my $print_prism_loop = "";

sub init_print_whole_prism {
    my $i;

    my $cell_code = "\$morpheme_prism";
    my $word_string_code = "";
    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$cell_code .= "[ \$indices[$i] ]";
	$word_string_code .= qq(\$phoneme_sets[ $i ][ \$indices[$i] ] . );	
    }
    my $blocked_cell_sign = ("#" x $n_dimensions) . " ";
    my $open_cell_sign = ("_" x  $n_dimensions) . " ";
    my $forbidden_cell_code = ( "*" x $n_dimensions) . " ";

    $print_prism_loop = <<LOOPCODE
	if ( $cell_code == 1 ) {
	    print \"$blocked_cell_sign\";
	} elsif ( $cell_code == 2 ) {
	    print $word_string_code " ";
	} elsif ( $cell_code == 3 ) {
	    print \"$forbidden_cell_code\";
	} else {
	    print \"$open_cell_sign\";
	}
LOOPCODE
    ;

#    for ( $i = $n_dimensions - 1; $i >= 0; $i-- ) {
    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$print_prism_loop = "for ( \$indices[$i] = 0; \$indices[$i] < $dimension_size[$i]; \$indices[$i]++ ) {\n" 
	    . $print_prism_loop;
	#if ( $i < $n_dimensions - 1 ) {
	if ( $i > 0 ) {
	    $print_prism_loop .= qq(\nprint "\\n";);
	}
	$print_prism_loop .= "\n}";
    }

#    $print_prism_loop = qq(print "in eval of generated print_prism_loop now\\n";\n) 
#	. $print_prism_loop . qq(\nprint "done with eval of print_prism_loop\\n";\n)  if $debug >= 3;

    $print_prism_loop = "my \@indices;\n" . $print_prism_loop;

    print "loop code: \n" . $print_prism_loop . "\n"    if $debug >= 3;
}



sub print_whole_prism {
    if ( $print_prism_loop eq "" ) {
	&init_print_whole_prism;
    }

    eval $print_prism_loop;
}

##################################################


sub clear_whole_prism {
    my @indices;
    my $i = 0;
    my $clear_loop_code = "\$morpheme_prism";
    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$clear_loop_code .= "[ \$indices[$i] ]";
    }
    $clear_loop_code .= " = 0;";

    # ok, now iterate over values of each member of @indices within range indicated by @dimension_size
    # then eval  $cell_code . " = 0;"  for each set of values for indices.

    # will have to build up said loop from inside out, concatenating
    #another for () {} around the string at each step

    for ( $i = $n_dimensions - 1; $i >= 0; $i-- ) {
	$clear_loop_code = "for ( \$indices[$i] = 0; \$indices[$i] < $dimension_size[$i]; \$indices[$i]++ ) {\n" 
	    . $clear_loop_code . "\n}";
    }
    $clear_loop_code = qq(print "in eval of generated clear_loop_code now\\n";\n) . $clear_loop_code   if $debug >= 3;

    print "clear_whole_prism loop: \n" . $clear_loop_code . "\n\n"   if $debug >= 3;

    eval $clear_loop_code;
}

##################################################

my $mark_used_loop = "";

sub init_mark_used {

# hard-coded section will map incoming args - two array refs
# @arr & @mark_coord

    my ($i, $j);

    ###### First, check if cell has a forbidden phoneme sequence.  Then mark it blocked and return
    # early if so.

    $mark_used_loop = "my \$this_cell_word = ";
    my $target_cell_code = "\$arr";
    my $target_cell_debug_string = "(";

    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$mark_used_loop .= "\$phoneme_sets[ $i ][ \$mark_coord[$i] ]";
	$target_cell_code .= "[ \$mark_coord[$i] ]";
	$target_cell_debug_string .= "\$mark_coord[$i]";

	if ( $i < $n_dimensions - 1 ) {
	    $mark_used_loop .= " . ";
	    $target_cell_debug_string .= ", ";
	} else {
	    $target_cell_debug_string .= ")";
	}
    }
    $mark_used_loop .= ";\n";

    my $check_forbidden =<<IF_FORBIDDEN
    my \$bad;
    foreach \$bad ( \@forbidden_sequences ) {
	if ( \$this_cell_word =~ \$bad ) {
	    print qq(marking "\$this_cell_word" blocked because of forbidden sequence "\$bad"\\n)   if \$debug;
	    $target_cell_code = 3;
	    return;
	}
    }
IF_FORBIDDEN
    ;

    $mark_used_loop .= $check_forbidden;

    ######

    $mark_used_loop .= qq(print "marking these cells as blocked: "  if \$debug >= 3; \n);

    my $current_cell_coord_debug_string = "";
    
    for ( $i = 0; $i < $n_dimensions; $i++ ) {
	$mark_used_loop .= "for ( \$z=0; \$z< $dimension_size[ $i ]; \$z++ ) {\n";
	$mark_used_loop .= "\t\$arr";
	my $cell_coord_code = "";
	$current_cell_coord_debug_string = "(";
	for ( $j = 0; $j < $n_dimensions; $j++ ) {
	    if ( $i == $j ) {
		$cell_coord_code .= "[ \$z ]";
		$current_cell_coord_debug_string .= "\$z, ";
	    } else {
		$cell_coord_code .= "[ \$mark_coord[$j] ]";
		$current_cell_coord_debug_string .= "\$mark_coord[$j], ";
	    }

	}
	$current_cell_coord_debug_string .= ")";
	$mark_used_loop .= $cell_coord_code . " = 1;";
	$mark_used_loop .= qq(\n\tprint "$current_cell_coord_debug_string "  if \$debug >= 3;\n}\n);
	$mark_used_loop .= qq(print "\\n"   if \$debug >= 3;\n\n);
    }

    my $print_word_for_marked_cell = qq(print \$this_cell_word . "\\n";);

    my $mark_target_cell_code .= $target_cell_code . " = 2;\n";
    my $debug_print_string = qq(print "marking cell $target_cell_debug_string as used: *** "    if \$debug;\n);
    $print_word_for_marked_cell .= qq("\\n");
    
    $mark_used_loop .= $mark_target_cell_code . $debug_print_string . $print_word_for_marked_cell;
    print "\nmark_used code:\n" . $mark_used_loop . "\n"     if $debug >= 3;
}

sub mark_used {
    my ( $arr_ref, $mark_coord_ref ) = @_;
    my @arr = @{ $arr_ref };
    my @mark_coord = @{ $mark_coord_ref };
    my $z;

    if ( $mark_used_loop eq "" ) {
	&init_mark_used;
    }

    eval $mark_used_loop;
}
