# SymbolGenerator class for Boris

# By Jim Henry, based largely on code by John Fisher, with
# design/implementation ideas from Alex Fink and Mark J. Reed
# http://www.pobox.com/~jimhenry/conlang/conlang.htm

# John Fisher's orginal credits lines:

# Boris - Generate symbol sequences
# John Fisher Ver.1 Nov 96
# Do what you like with it but leave these three lines

# This class abstracts out the symbol generator from Boris so we can
# have multiple generators in use at once.  A symbol generator
# consists of a set of rules for expanding tokens into sequences or
# alternations of other tokens and/or literals, and a set of rules for
# doing string substitutions on the final symbol sequence.

# It also has code for handling symbol-trees, but as yet I don't think
# I need class overhead for the symbol trees.  A symbol tree is a hash
# with a string value and a reference to an array of children, possibly
# empty.  The children, if existent, are references to symbol trees.

# The SymbolGenerator has these public functions:

# ctor 'new'

# &generate_sequence generates a symbol sequence and returns it as a string.

# &mutate is a kind of copy constructor which returns a new generator
# with the same metaformat but a different symbol tree, roughly
# similar to the generator for which it's called

# &get_sequence returns a string representation of the current symbol tree

# Note that although this library uses rand a lot, it doesn't do an srand.
# The caller should normally do srand -- or not do it, if we're in debug
# mode and want consistent, predictable output.


###TODO: make probability biases arbitrary nonnegative real numbers, not just integers

###TODO add %include directive support to read_parameters()




# $Log: SymbolGenerator.pm,v $
# Revision 1.16  2010-10-31 01:58:41  jim
# More work on eval'd replacement text.  Still doesn't work right.
#
# Revision 1.15  2010-09-23 16:54:37  jim
# Handle comments at end of file better.
#
# Revision 1.14  2009-08-09 04:15:42  jim
# Move more file-scope variables into the %global_vars hash so the user
# can set them at runtime.  Improvements to cmd_set() incl. error checks
# on var names and values and parsing of \t \n \s for separator.
#
# Revision 1.13  2009-08-08 05:43:25  jim
# Implement 'save' command.  Add 'file=' argument to 'dump' command.
# Let user get help on several commands at once with serial args to
# 'help' command.
#
# Revision 1.12  2009-08-08 04:24:33  jim
# mutate finally works consistently even when mutating a mutated language. Yay!
#
# Revision 1.11  2009-08-07 21:52:34  jim
# Another fix to mutate and mutate_tree: start with an empty saved_vals hash on each
# mutation, and add node to saved_vals every time mutate_tree finishes calling itself
# recursively on its children.  First-order mutation now works consistently, but
# second-order mutation fails more often than not.
#
# Revision 1.10  2009-08-07 07:39:26  jim
# More tracing, and more fixes for mutate; works more often but not always.
#
# Revision 1.9  2009-08-06 05:27:59  jim
# More trace functions.  Fix memory leak problem with original language being mangled
# and its saved_vals hash growing indefinitely as we created mutated daughter langs from
# it.  Partial fix of problem with parenthesized tokens; but still doesn't work right
# more often than not.
#
# Revision 1.8  2009-08-05 02:18:42  jim
# Finished refactoring interactive_mode() to use a hash of subroutine refs for
# functions per specific commmands.   Further work on mutate_tree(): it now
# works intermittently, about a third of the time, which is better than before,
# but it also crashes sometimes due to stringify() encountering a node with
# no 'value'.
#
# Revision 1.7  2009-08-05 00:36:06  jim
# Working on fixes to mutate_tree() to handle parenthesized tokens correctly;
# still doesn't work right.   Start work on rewriting interactive_mode()
# with more flexible architecture; not finished, but it's stable and works
# as it is.
#
# Revision 1.6  2009-08-04 00:57:08  jim
# Better tracing, writing to file if in interactive mode.  More work on user interface,
# including 'show' and 'words' commands.  Fix bug in UI's use of mutate and get_sequence.
# But mutate function itself still doesn't work right; can't figure out why but it's
# making identical copies instead of mutated ones.
#
# Revision 1.5  2009-08-03 02:56:46  jim
# Converting SymbolGenerator to use instance variables instead of package variables
# for most purposes.   Not finished yet; code is untestable at the moment.
#
# Revision 1.4  2009-08-02 08:11:30  jim
# first draft, untested, of mutate and mutate_tree functions
# can't test this until I rewrite the whole package as a proper
# class moving package variables into the anon hash etc.
#

use strict;
use warnings;

package SymbolGenerator;

use constant INTERACTIVE => 4;

# bitwise values for $debug and trace() local variable $level
use constant GENERAL => 1;
use constant DETAILS => 2;
use constant MUTATE  => 4;
use constant DEEP_COPY => 8;
use constant EXPAND => 16;
use constant PARENS => 32;


# this can be a package var, no need AFAIK to have some objects of this class
# run in debug mode and others not...
my $debug = 0;

my $run_mode = 0;


BEGIN {
    use Exporter();
    our @ISA = qw(Exporter);
    our @EXPORT = qw( &generate_sequence
        &get_sequence
  	&mutate
	&dump_info
  	&new );

    return 1;
}

my $init_trace = 0;
my $debug_file = "symbolgenerator_log.txt";
sub trace {
    my $msg = $_[0];
    my $level = defined $_[1] ? $_[1] : 1;
    if ( not ($debug & $level) ) {
	return;
    }
    ###TODO make trace output filename configurable
    if ( $run_mode & INTERACTIVE ) {
	if ( not $init_trace ) {
	    open DEBUG, ">>$debug_file"	or die "can't append to $debug_file\n";
	    print DEBUG "\n========================================\n\nTraced run started at " . localtime() . "\n\n";
	    $init_trace = 1;
	}
	print DEBUG $msg;
    } else {
	print STDERR $msg;
    }

#    if ( $debug & MUTATE and $msg =~ /count/ ) { ####TODO remove
#	print $msg;
#    }
}



sub new {
    my $class = shift;
    my %args = @_;
    my $self = {};
    bless $self, $class;

    # create refs to empty hashes and arrays
    $$self{totals} 	= {};
    $$self{lits} 	= {};
    $$self{biases}	= {};
    $$self{saved_vals}	= {};
    $$self{main_tree} 	= {};
    $$self{matches}	= [];
    $$self{subs}	= [];    

    # set valid to true; if we have errors later, reset it to false.  Caller should check it after new() returns.
    $$self{valid} = 1;

    if ( defined $args{debug} ) {
	$debug = $args{debug};
    }

    if ( defined $args{run_mode} ) {
	$run_mode = $args{run_mode};
    }

    if (defined $args{name} ) {
	$$self{name} = $args{name};
    } elsif ( defined $args{paramfile} ) {
	$$self{name} = $args{paramfile};
    } else {
	$$self{name} = "(nameless SymbolGenerator)";
    }

    if ( defined $args{paramfile} ) {
	$self->read_paramfile( $args{paramfile} );
    } else {
	if ( defined $args{paramstring} ) {
	    $$self{parameter_string} = $args{paramstring};
	    $self->read_parameters();
	} else {
	    print STDERR "no paramfile or paramstring argument to SymbolGenerator::new()\n";
	    $$self{valid} = 0;
	}
    }

    if ( defined $args{"tree"} ) {
	###TODO handle  tree argument with deep copy
	die "unimplemented use of tree argument to SymbolGenerator::new";
    } # it's an optional argument so no 'else'
    
    if ( defined $args{treefile} ) {
	###TODO read tree from file, save in $$self{main_tree}
	die "unimplemented use of treefile argument to SymbolGenerator::new";
    }

    ###TODO other arguments: direct or meta mode? number of sequences
    # to generate?... probably don't need those here, that behavior
    # should be taken care of in main.

    return $self;
}

sub generate_sequence {
    my $self = shift;
    undef $$self{saved_vals};

    $$self{main_tree} = &expand_string ($self, "TOP");
    trace "\nafter primary call to &expand_tree\n\n";
    my $terminals = &stringify( $$self{main_tree} );
    
    trace "sequence before doing substitutions: $terminals\n";
    $$self{sequence} = &substitute( $self, $terminals );
    trace "sequence after doing substitutions: " . $$self{sequence} . "\n";
    return $$self{sequence};
}


sub get_sequence {
    my $self = shift;

    if ( not defined $$self{sequence} ) {
	if ( defined $$self{main_tree} ) {
	    my $terminals = &stringify(  $$self{main_tree} );
	    $$self{sequence} =  &substitute ($self, $terminals);
	} else {
	    return generate_sequence( $self );
	}
    }

    return $$self{sequence};
}



sub dump_info {
    my $self = shift;
    my $file = shift;
    my $saved_vals = $$self{saved_vals};
    if ( defined $file ) {
	if ( not open OUT, ">>" . $file ) {
	    print STDERR qq(Can't open "$file" for appending\n);
	    return;
	}
	select OUT;
    }
    print "\nlanguage name: $$self{name}\n";
    print "\nsaved_vals hash:\n\n";
    foreach ( sort keys %$saved_vals ) {
	print $_ . "\n";
	my $arr = $$saved_vals{ $_ };
	foreach ( @$arr ) {
	    print "\t" . &stringify( $_ ) . "\n";
	}
    }
    print "\ntree:\n\n";
    print &make_tree_diagram( $$self{main_tree} );
    if ( defined $file ) {
	select STDOUT;
    }
}



sub read_paramfile {
    my $self = shift;
    my $param_file = shift;
    if ( not open( PARAMS, $param_file ) ) {
	print STDERR qq(Couldn't read parameter file "$param_file"\n);
	$$self{valid} = 0;
	return;
    }

    my @params = <PARAMS>;
    close PARAMS;
    $$self{parameter_string} = join "\n", @params;

    $self->read_parameters();
}


sub read_parameters {
    my $self = shift;

    # make a local copy of it because we mangle it in various ways in the course of
    # parsing it, and we may want to refer to the original unaltered parameter string
    # later.
    my $param_string = $$self{parameter_string};

    # protect escaped metachars.  substitute() will change these internal symbols back into literal metachars
    # without the preceding slashes.
    $param_string =~ s/\\\+/___plus___/g;
    $param_string =~ s/\\-/___minus___/g;
    $param_string =~ s/\\:/___colon___/g;
    $param_string =~ s/\\;/___semicolon___/g;
    $param_string =~ s/\\\(/___leftparen___/g;
    $param_string =~ s/\\\)/___rightparen___/g;
    $param_string =~ s/\\\#/___octothorpe___/g;

    # remove comments
    $param_string =~ s/#[^\n]*\n//g;
    # handle comments at the end of a file with no newline after
    $param_string =~ s/#[^\n]*$//g;
	
    my $head;
    my $sign;
    my $value;

    my @statements = split ";", $param_string;
    foreach (@statements) {
	# any/all whitespace sequence becomes single space -- this
	# simplifies other regexes which can now have ' ' in place of '\s+'
	s/\s+/ /g;
	# strip trailing spaces
	s/ *$//;
	if ( m/^ *([^=> ]*) *([=>]) *(.*)$/) {
	    $head=$1; $sign=$2; $value=$3;
	    
	    trace "read_parameters(): \$head = [$head]\n\$sign = [$sign]\n\$value = [$value]\n", GENERAL;
	       
	    if ($value =~ m/[^\\][=>]/) {
		print "$_ \n";
		print STDERR "Missing semicolon after '$head'\n";
		$$self{valid} = 0;
		return;
	    }
	    if ($sign eq "=") {
		# remove spacing around plus/minus signs and colon
		$value =~ s/ +([\+\-:])/$1/g;
		$value =~ s/([\+\-:]) +/$1/g;
		
		my $total = 0;
		my $litstr = "";
		my $biasstr = "";
		my $item;
		foreach $item (split (/ /,$value)) {
		    my $lit;
		    my $bias;
		    ($lit, $bias) = split(/:/, $item);
		    
		    if ( defined $bias and $bias !~ m/^[0-9]+$/ ) {
			print STDERR qq(Probability bias "$bias" is not an integer:\n$_\n);
			$$self{valid} = 0;
			return;
		    }
		    
		    $total += defined($bias) ? $bias : 1 ;
		    $litstr .= "$lit ";
		    $biasstr .= "$total ";
		}
		trace "read_parameters(): \$head = $head, \$total = $total, \$biasstr = $biasstr\n", GENERAL;
		$$self{totals}->{$head} = $total;
		chop $litstr;		#removing the trailing space makes split work right later
		$$self{lits}->{$head} = $litstr;
		$$self{biases}->{$head} = $biasstr;
	    } else {
		# $sign must be eq '>' so we have a substition rule
		push @{ $$self{matches} }, $head;
		push @{ $$self{subs} }, $value;
	    }
	}
    }

    trace "\n\nFinished parsing parameters.  Found " . scalar( keys %{ $$self{totals} } ) . " expansion rules and "
	. scalar( @{ $$self{subs} } ) . " substitution rules.\n\n";
}

# it's not thread-safe to leave this and other recursion counters as
# package var... but when would boris use threads?  leave it for now.
my $recursion_depth = 0;

sub expand_string {
    my $self = shift;
    my $target = shift;

    if ( $recursion_depth++ > 100 ) {
	die "Possible infinite recursion while expanding string '$target'\n";
    }

    trace "expand_string: \$self = $self, \$target = $target, \$recursion_depth = $recursion_depth\n", EXPAND;

    # $tree is a reference to an anonymous hash representing a node and possible children; it will be our return value.
    my $tree = {};
    $$tree{"value"} = $target;
    $$tree{"catenator"} = '';

    # $val is the string value of this tree node.
    my $val;

    # if target is a parenthesized token, then reuse a randomly chosen
    # earlier expansion of the token if possible.  We don't need to
    # make a copy of it; just reusing the ref to the same object is
    # safe in this context.  The point is we want everything from here
    # on down to be the same as when the token was expanded on some
    # recent occasion.
    if ( $target =~ m/^\(([A-Za-z0-9_]+)\)$/ ) {
	trace "expand_string() found parenthesized token $1\n", PARENS | EXPAND;
	if ( defined $$self{saved_vals}->{ $1 } ) {
	    my $arrayref = $$self{saved_vals}->{ $1 };
	    my $r = int rand scalar @$arrayref;
	    my $subtree = $$arrayref[ $r ];
	    trace "got index $r,  value " . stringify( $subtree ) . " for parenthesized token $1\n", PARENS | EXPAND;

	    # reset subtree's catenator in case it was last used in a context where space was required
	    # we never return a tree with a non-empty catenator; we sometimes set catenator to space 
	    # on the value returned by our recursive calls of ourselves.
	    $$subtree{"catenator"} = '';
	    
	    if ( defined $$tree{ "children"}  and  ref $$tree{ "children"}  and  scalar @{ $$tree{ "children"} } >= 1 ) {
		trace "children list exists so push subtree onto it\n", PARENS | EXPAND;
		push @{ $$tree{ "children"} }, $subtree;
	    } else {
		trace "children list does not exist so assign to it\n", PARENS | EXPAND;
		$$tree{ "children"} = [ $subtree ];
	    }

	    trace "returning tree with value " . $$tree{"value"} . "\n", PARENS | EXPAND;
	    $recursion_depth--;
	    return $tree;
	} else {
	    warn "no saved values found for target $1 yet, so expand it now -- this is probably not what you want\n";
	    my $subtree = &expand_string( $self, $1 );

	    if ( defined $$tree{ "children"}  and  ref $$tree{ "children"}  and  scalar @{ $$tree{ "children"} } >= 1 ) {
		trace "children list exists so push subtree onto it\n", PARENS | EXPAND;
		push @{ $$tree{ "children"} }, $subtree;
	    } else {
		trace "children list does not exist so assign list consisting of subtree to it\n", PARENS | EXPAND;
		$$tree{ "children"} = [ $subtree ];
	    }

	    trace "returning tree with value " . $$tree{"value"} . "\n", PARENS | EXPAND;
	    push @{ $$self{saved_vals}->{ $target } }, $tree;
	    $recursion_depth--;
	    return $tree;
	}
    }
    
    my $total = $$self{totals}->{$target};
    if ( not defined $total ) {
	trace "token $target is undefined so treat it as literal\n", EXPAND;
	# we return a leaf node with no children.  and because it's literal we don't need
	# to stash its expansion in $$self{saved_vals}.
	$$tree{ "is_leaf" } = 1;
	$recursion_depth--;
	return $tree;
    }

    if ($total == 0) {
	$val = $$self{lits}->{$target};
    } else {
	my @itemlits = split (/ /, $$self{lits}->{$target});
	my @itembiases = split (/ /, $$self{biases}->{$target});
	my $random = int (rand ($total));
	for (my $i = 0; $i < @itembiases; $i++) {
	    if ($random < $itembiases[$i]) {
		$val = $itemlits[$i];
		last;
	    }
	}
    }

    # $vv used to be where we built up the string over successive recursive calls to self
    # now it is used only for debugging
    # we may get rid of it entirely after &debug_print_tree() is finished
    my $vv = "";
    my $loopcount = 0;
    my $subtree;
    while ($val =~ m/^([^\+\-]*)([\+\-])(.*)/) {
	$loopcount++;
	my $left = $1; 
	my $sign = $2; 
	$val = $3;
	trace "parsing catenation of 2+ symbols... "
		. "\$left = $left, \$sign = $sign, \$val = $val, \$loopcount = $loopcount\n"
		. "\$vv == $vv\n", EXPAND;
	$subtree = &expand_string ( $self, $left);
	if ( $sign eq '-' ) {
	    $$subtree{ "catenator" } = ' ';
	}
	$vv .= &stringify( $subtree ) . $$subtree{"catenator"};

	if ( defined $$tree{ "children"}  and  ref $$tree{ "children"}  and  scalar @{ $$tree{ "children"} } >= 1 ) {
	    trace "children list exists so push subtree onto it\n", EXPAND;
	    push @{ $$tree{ "children"} }, $subtree;
	} else {
	    trace "children list does not exist so assign to it\n", EXPAND;
	    $$tree{ "children"} = [ $subtree ];
	}
	trace &stringify( $tree ) . "\n", EXPAND;
    }

    trace "after check for catenation... remaining string is either rightmost element of catenation" 
		. "or a solitary non-catenated item\n", EXPAND;

    # now $val is either the residual rightmost element of a +/- catenation sequence, or 
    # the sole element chosen from a sequence of options (if there were no + or - so we
    # skipped the above while loop)
    $subtree = &expand_string ($self, $val);

    if ( defined $$tree{ "children"}  and  ref $$tree{ "children"}  and  scalar @{ $$tree{ "children"} } >= 1 ) {
	trace "children list exists so push subtree onto it\n", EXPAND;
	push @{ $$tree{ "children"} }, $subtree;
    } else {
	trace "children list does not exist so assign to it\n", EXPAND;
	$$tree{ "children"} = [ $subtree ];
    }
    
    push @{ $$self{saved_vals}->{ $target } }, $tree;
    $recursion_depth--;
    if ( $debug & EXPAND ) {
	# don't waste time on stringify call if we're not in debug mode
	trace "about to return; stringification of tree: " .  &stringify($subtree) . "\n", EXPAND;
    }
    return $tree;
}


# stringify() doesn't need a copy of $self; it doesn't use any
# instance variables, only its argument and the package variable
# $debug

my $stringify_recursion = 0;

sub stringify {
    my $treeref = shift;
    my $literal = "";
    trace "&stringify() starting at recursion depth: " . ++$stringify_recursion . "\n", DETAILS;

    if ( $debug ) {
	trace "\tworking on the tree whose root is this node:\n", DETAILS;
	foreach ( keys %$treeref ) {
	    trace "\t\tkey: [" . $_ . "]\tvalue: [" . $$treeref{ $_ } . "]\n", DETAILS;
	}
    }

    if ( not defined $$treeref{"value"} ) {
	die "internal error: node has no value defined";
    }

    if ( not defined $$treeref{"catenator"} ) {
	die "internal error: node has no catenator defined";
    }

    if ( $$treeref{"is_leaf"} ) {
	trace "\tleaf node\n", DETAILS;
	$literal .= $$treeref{"value"} . $$treeref{"catenator"};
    } else {
	if ( not defined $$treeref{"children"} ) {
	    die "internal error: this should be a non-leaf node but its children array is undefined";
	}
	my $n = 0;
	trace "\nchildren of this node :" . scalar @{ $$treeref{"children"} } . "\n", DETAILS;
	foreach (  @{ $$treeref{"children"} } ) {
	    trace "\tchild #" . ++$n . " - " . $$_{"value"} . "\n", DETAILS;
	    $literal .= &stringify( $_ );
	}
	$literal .= $$treeref{"catenator"};
    }

    &debug_print_tree( $treeref )	if $debug & DETAILS;
    --$stringify_recursion;
    trace "\t&stringify is returning [$literal]\n", DETAILS;
    return $literal;
}


sub debug_print_tree {
    # it's the caller's responsibility to make sure we're in the right debug mode
    # before calling us.
    my $tree = shift;
    my $tree_diagram = "";

    $tree_diagram = make_tree_diagram( $tree );
    trace $tree_diagram, 255;
}

my $make_tree_diagram_recursion = 0;

sub make_tree_diagram {
    ++$make_tree_diagram_recursion;
    my $tree = shift;

    my $pfx = "  " x ($make_tree_diagram_recursion - 1);
    my $val = defined $$tree{value}
	? ( $$tree{value} ne '' 
	    ? $$tree{value}  
	    : "(empty string)" )
	: "***undefined***";
    my $diagram = $pfx . $val . "\n";
    if ( $$tree{is_leaf} ) {
	--$make_tree_diagram_recursion;
	return $diagram;
    } else {
	if ( not defined $$tree{children} ) {
	    warn "inconsistent node, supposedly nonleaf but has no children\n";
	    $diagram .= "**** inconsistent node, supposedly nonleaf but has no children ****\n";
	    foreach ( keys %$tree ) {
		$diagram .= "***\t$_\t$$tree{$_}\n";
	    }
	} else {
	    foreach ( @{ $$tree{children} } ) {
		$diagram .= make_tree_diagram( $_ );
	    }
	}
	--$make_tree_diagram_recursion;
	return $diagram;
    }
}


###TODO: fix this so it can handle regex backreferences like \1 \2 \3 in the
### search text, the $match value and also $

# tried doing /eeg (recursive execute) but when a match occurs
# where the match regex text has a \1 matching a parenthetical expr earlier
# in it
# the main regex, I get this error:

# Use of uninitialized value $replace in substitution iterator at . line 56.

# but if I have /eg or /g, then I just get literal $1 in the substituted
# text, which is not good

# OTOH with /eeg it can handle $1 backreferences in the substitution text fine

#... now I find that I sometimes get that error even when there are no
# parentheses or backreferences at all, only a literal replacement text.

=pod old code:
sub substitute {
    my $self = shift;
    my $term = shift;
    for (my $i = 0; $i < @{ $$self{matches} }; $i++) {
	#$term =~ s/$matches[$i]/$subs[$i]/eeg;
	#$term =~ s/$matches[$i]/$subs[$i]/eg;
	my $search = @{ $$self{matches} }[ $i ];
	my $replace = @{ $$self{subs} }[ $i ];
	trace "substitute() about to replace /$search/ with /$replace/\n", GENERAL;
	$term =~ s/$search/$replace/eg;
	#$term =~ s/$search/\$replace/eeg;
	#$term =~ s/$search/$replace/eeg;
    }
    
    # fix slash-escaped characters
    $term =~ s/\\t/\t/g;
    $term =~ s/\\n/\n/g;
    $term =~ s/\\s/ /g;
    
    $term =~ s/___plus___/\+/g;
    $term =~ s/___minus___/\-/g;
    $term =~ s/___colon___/:/g;
    $term =~ s/___semicolon___/;/g;
    $term =~ s/___leftparen___/\(/g;
    $term =~ s/___rightparen___/\)/g;
    $term =~ s/___octothorpe___/\#/g;
    
    $term =~ s/\\(.)/$1/g;

    return $term;
}
=cut

# Fix by William Annis based on code by Kent Fredric posted on
# Stack Overflow
# http://stackoverflow.com/questions/392643/how-to-use-a-variable-in-the-replacement-side-of-the-perl-substitution-operator

sub repl {
   my $find = shift;
   my $replace = shift;
   my $var = shift;

   # Capture first
   my @items = ( $var =~ $find );
   $var =~ s/$find/$replace/;
   for( reverse 0 .. $#items ){
       my $n = $_ + 1;
       #  Many More Rules can go here, ie: \g matchers  and \{ }
       $var =~ s/\\$n/${items[$_]}/g ;
       $var =~ s/\$$n/${items[$_]}/g ;
   }
   return $var;
}

sub substitute {
   my $self = shift;
   my $term = shift;
   for (my $i = 0; $i < @{ $$self{matches} }; $i++) {
       #$term =~ s/$matches[$i]/$subs[$i]/eeg;
       #$term =~ s/$matches[$i]/$subs[$i]/eg;
       my $search = @{ $$self{matches} }[ $i ];
       my $replace = @{ $$self{subs} }[ $i ];
       trace "substitute() about to replace /$search/ with /$replace/\n", GENERAL;
       $term = repl($search, $replace, $term);
   }

   # fix slash-escaped characters
   $term =~ s/\\t/\t/g;
   $term =~ s/\\n/\n/g;
   $term =~ s/\\s/ /g;

   $term =~ s/___plus___/\+/g;
   $term =~ s/___minus___/\-/g;
   $term =~ s/___colon___/:/g;
   $term =~ s/___semicolon___/;/g;
   $term =~ s/___leftparen___/\(/g;
   $term =~ s/___rightparen___/\)/g;
   $term =~ s/___octothorpe___/\#/g;

   $term =~ s/\\(.)/$1/g;

   return $term;
}



my $default_mutation_pct = 1;  ###TODO put this in a config file or something
 
sub mutate {
    my $self = shift;
    my $mutation_pct = shift;
    my $name = shift;
    if ( not defined $mutation_pct ) {
	$mutation_pct = $default_mutation_pct;
    } elsif ( $mutation_pct < 0  or  $mutation_pct > 100 ) {
	warn "invalid mutation percentage $mutation_pct (must be 0 <= n <= 100), using default $default_mutation_pct\n";
	$mutation_pct = $default_mutation_pct;
    }

    my $mutated_self = {};
    if ( $debug & MUTATE ) {
	trace "before deep_copy: self's saved_vals count: "  . &count_struct_items( $$self{saved_vals}, 0 ) . "\n", MUTATE; 
    }

    foreach ( keys %$self ) {
	if ( ref $$self{$_} ) {
	    if ( $_ ne 'main_tree' and $_ ne 'saved_vals' ) {
		trace "mutate() deep-copying value of key $_ \n", MUTATE;
		$$mutated_self{ $_ }  =  &deep_copy( $$self{$_} );
	    }
	# don't copy the sequence because it will need to be recreated after we mutate
	# the tree it's derived from
	} elsif ( $_ ne 'sequence' ) {
	    trace "mutate() shallow-copying value of key $_ \n", MUTATE;
	    $$mutated_self{ $_ } = $$self{ $_ };
	}
    }
    $$mutated_self{name} = defined $name ? $name : "(unnamed mutation of " . $$self{name} . ")";

    trace "new language name: $$mutated_self{name}\n", MUTATE;
    bless $mutated_self, ref($self);

    trace "mutate() starting mutate_tree at " . localtime() . " with old tree " .  $$self{main_tree}  . "\n", MUTATE;
    $$mutated_self{main_tree} = &mutate_tree( $mutated_self, $$self{main_tree}, $mutation_pct );

    if ( $debug & MUTATE ) {
	trace "mutate() finished mutate_tree at " . localtime() . "\nold tree: \n", MUTATE;
	&debug_print_tree( $$self{main_tree} );
	trace "new tree: " . $$mutated_self{main_tree} . "\n", MUTATE;
	&debug_print_tree( $$mutated_self{main_tree} );
#	trace "saved_vals hash now looks like this:\n" . &trace_struct( $$mutated_self{saved_vals} ), MUTATE;
	trace "after mutate: self's saved_vals count: "  . &count_struct_items( $$self{saved_vals}, 0 ) . "\t mutated_self's saved_vals count: " .  &count_struct_items( $$mutated_self{saved_vals}, 0 )  . "\n", MUTATE; 
    }

    return $mutated_self;
}



# memory issues: memory growth with lots of mutated languages is less than before, but could
# probably be improved by not saving so much in saved_vals hash.  200 mutations of the initial
# lang = 103.2MB  (was 1.6MB just after starting, creating one lang and 25 words of it)

# ...version 1.11: after fixes for mutate_tree to handle parenthesized tokens correctly, 200 mutations
# only takes up 55MB.  That's probably fine.

###TODO 	 I am sometimes getting warnings like this:

# 	Deep recursion on subroutine "SymbolGenerator::count_struct_items" at . line 408, <> line 7.

# -- more often with test-parens.boris than with metaphonology.boris.

my $mutate_recursion = 0;

sub mutate_tree {
    my $self = shift;
    my $tree = shift;
    my $mutation_pct = shift;
    if ( ++$mutate_recursion > 1000 ) {
	die "Apparently infinite recursion in mutate_tree()";
    }

    trace "mutate_tree: node value " . $$tree{"value"} . ", recursion depth $mutate_recursion\n", MUTATE;

    my $nodecopy = {};

    # if we're at a leaf node, return unaltered copy
    if ( $$tree{"is_leaf"} ) {
	foreach ( keys %$tree ) {
	    die "unexpected reference variable at key $_ of node with value $$tree{value}"	if ( ref $$tree{ $_ } );
	    $$nodecopy{ $_ } = $$tree{ $_ };
	}
	trace "leaf node so copy it unaltered.  returning $$nodecopy{value}\n", MUTATE;
	--$mutate_recursion;
	return $nodecopy;
    }

    # else we might return it as-is or we might recreate the tree from here on down using expand_string

    # if we have e.g. 10% mutation_pct then we have 10% chance of
    # mutating from the top node, but the probability increases at
    # each deeper level we go in the tree (measured by the recursion
    # depth of calls to mutate_tree()) - 19% at level 2, 27% at level
    # 3, and so on.
    my $probability_of_mutation = (1 - ( 1 - ( $mutation_pct / 100 )) ** $mutate_recursion);
    my $mutating = 0;
    if ( (rand) <= $probability_of_mutation ) {
	$mutating = 1;
    }
    trace "\$probability_of_mutation == $probability_of_mutation, \$mutating ==  $mutating \n", MUTATE;

    # FIRST check if we have parenthesized token.  We can't mutate it.
    if ( $$tree{value} =~  m/^\(([A-Za-z0-9_]+)\)$/ ) {

	my $token = $1;
	trace "mutate_tree() found parenthesized token $token\n", MUTATE | PARENS;
	# if we have a parenthesized token, then check to see if there is a value for
	# it in the saved_val hash.  if so, copy ref to said node.  if not, redo expand_string.
	if ( defined $$self{saved_vals}->{ $token } ) {
	    my @saved_vals = @{ $$self{saved_vals}->{ $token } };
	    my $match_found = 0;

	    # a parenthesized token should have exactly one child
	    my $child = $$tree{children}->[0];
	    die "missing child of node with parenthesized token"  if not defined $child;
	    foreach ( @saved_vals ) {
		my $saved_string = &stringify( $_ );
		my $child_string = &stringify( $child );
		if ( $saved_string eq $child_string ) {
		    my $subtree = $_;
		    trace "found matching node $saved_string in saved_vals for parenthesized token $$tree{value}\n", MUTATE | PARENS;
		    foreach ( keys %$tree ) {
			$$nodecopy{ $_ } = $$tree{ $_ } 	if not ref $$tree{ $_ };
		    }
		    $$subtree{catenator} = '';
		    $$nodecopy{children} = [ $subtree ];
		    $match_found = 1;
		    debug_print_tree( $nodecopy );
		    last;
		}
	    }

	    if ( not $match_found ) {
		my $r = int rand scalar @saved_vals;
		my $subtree = $saved_vals[ $r ];
		trace "no exact match found so reuse random expansion for parenthesized token\n", MUTATE | PARENS;
		# reset subtree's catenator in case it was last used in a context where space was required
		$$subtree{catenator} = '';
		$$nodecopy{children} = [ $subtree ];
		
		foreach ( keys %$tree ) {
		    $$nodecopy{ $_ } = $$tree{ $_ }  if not ref $$tree{ $_ };
		}
	    }
	} else {
	    $nodecopy = &expand_string( $self, $$tree{"value"} );
	    warn "mutate_tree() had to recreate node with parenthesized token $$tree{value} as no entries for it were found in saved_vals hash\n";
	    
	}
	--$mutate_recursion;
	return $nodecopy;

    } elsif ( $mutating ) {
	my $nodecopy = &expand_string( $self, $$tree{"value"} );
	$$nodecopy{catenator} = $$tree{catenator};
	if ( $debug & MUTATE ) { # don't waste time on stringify() if not debug mode
	    trace "mutate_tree() recreated node from here down, returning new node " . &stringify($nodecopy) . "\n", MUTATE;
	}
	--$mutate_recursion;
	return $nodecopy;

    } else { 	# not handling parenthesized token and not mutating this node; call self recursively on children,
		# decide then whether to mutate them or their
		# children...
	if ( not defined $$tree{"children"} ) {
	    die "internal error: " . $$tree{"value"} . " should be a non-leaf node but its children array is undefined";
	}

	trace "iterating over " . scalar @{ $$tree{"children"} } . " children and calling mutate_tree recursively on them\n", MUTATE;

	foreach ( keys %$tree ) {
	    $$nodecopy{ $_ } = $$tree{ $_ }  	unless ( ref ( $$tree{ $_ } ) );
	}
	my @new_children = ();
	foreach (  @{ $$tree{"children"} } ) {
	    my $subtree = &mutate_tree( $self, $_, $mutation_pct );
	    push @new_children, $subtree;
	}
	$$nodecopy{"children"} = \@new_children;
	push @{ $$self{saved_vals}->{ $$nodecopy{value} } }, $nodecopy;
#	push @{ $$self{saved_vals}->{ $target } }, $tree;

	if ( $debug & MUTATE ) {
	    my $new_expansion = &stringify( $nodecopy );
	    trace "mutate_tree() returning node with expansion $new_expansion\n", MUTATE;
	}
	--$mutate_recursion;
	return $nodecopy;
    }

    die "this point should not be reached! ";	# each branch of the if/else returns a value
}


# Remove the saved value, if any, for a single node of the tree.
sub remove_one_saved_val {
    my $self = shift;
    my $tree = shift;

    my $string_this_node = &stringify( $tree );

    my $match_found = 0;
    my $arr = $$self{saved_vals}->{  $$tree{value} };    
    if ( defined $arr ) {
	for ( my $n = 0; $n < (scalar @$arr) - 1; $n++ ) {
	    my $saved_val_string = &stringify( $$arr[$n] );
	    if ( $saved_val_string eq $string_this_node ) {
		trace "removing saved value $string_this_node for $$tree{value} \n", MUTATE | PARENS;
		splice ( @$arr, $n, 1 );
		++$match_found;
	    } else {
		###TODO prob remove
		trace "not a match for $$tree{value} expansion [$string_this_node]: [$saved_val_string]\n", 255;
	    }
	}
	if ( $match_found == 0 ) {
	    trace "saved_val array found for $$tree{value}, but no exact match found for this node's expansion $string_this_node\n", MUTATE|PARENS;
	}
    } else {
	trace "no saved value array found for $$tree{value}\n", MUTATE|PARENS;
    }

}


# Remove saved values for this node and all its descendants.
sub remove_saved_vals {
    my $self = shift;
    my $tree = shift;
    
    remove_one_saved_val( $self, $tree );

    if ( defined $$tree{children} ) {
	foreach ( @{ $$tree{children} } ) {
	    if ( not $_->{is_leaf} ) {
		remove_saved_vals( $self, $_ );
	    }
	}
    }
}



sub deep_copy {
    my $this = shift;

    if (not ref $this) {
	trace "deep_copy of literal value [" . $this . "]\n", DEEP_COPY;
	$this;
    } elsif (ref $this eq "ARRAY") {
	trace "deep-copy of array\n", DEEP_COPY;
	[map deep_copy($_), @$this];
    } elsif (ref $this eq "HASH"  or  ref $this eq "SymbolGenerator") {
	trace "deep-copy of hash\n", DEEP_COPY;
	+{map { $_ => deep_copy($this->{$_}) } keys %$this};
    } else {
	die "what type is $this ? ref returned: ["  . ref( $this ) . "]";
    }
}

my $trace_struct_recursion = 0;
my $trace_struct_item_count = 0;

sub trace_struct {
    my $struct = shift;
    $trace_struct_item_count = 0;
    my $msg = &trace_struct_real( $struct ) . "total items: $trace_struct_item_count\t";
    return $msg;
}

sub trace_struct_real {
    my $this = shift;
    ++$trace_struct_recursion;
    ++$trace_struct_item_count;
    my $pfx = "  " x ($trace_struct_recursion - 1);
    if (not ref $this) {
	--$trace_struct_recursion;
	return $pfx . $this . "\n"; 
    } elsif (ref $this eq "ARRAY") {
	my $ret = $pfx . "array:\n";
	map { $ret .= &trace_struct_real( $_ ) } @$this;
	--$trace_struct_recursion;
	return $ret;
    } elsif (ref $this eq "HASH"  or  ref $this eq "SymbolGenerator") {
	my $ret = $pfx . "hash:\n";
	foreach ( keys %$this ) {
	    $ret .= $pfx . $_ . " == ";
	    if ( ref $$this{$_} ) {
		$ret .= "\n" . &trace_struct_real( $$this{$_} );
	    } else {
		$ret .= $$this{$_} . "\n";
	    }
	}
	--$trace_struct_recursion;
	return $ret;
    } else {
	die "what type is $this ? ref returned: ["  . ref( $this ) . "]";
    }
}



sub count_struct_items {
    my $this = shift;
    my $count = shift;
    if ( not defined $count ) {
	$count = 0;
    }
    my $pfx = "  " x ($trace_struct_recursion - 1);
    if (not ref $this) {
	return $count + 1;
    } elsif (ref $this eq "ARRAY") {
	my $array_count = 0;
	foreach ( @$this ) {
	    $array_count += &count_struct_items( $_, 0 );
	}
	# count the array itself as an item, also each of its members
	return $count + 1 + $array_count;
    } elsif (ref $this eq "HASH"  or  ref $this eq "SymbolGenerator") {
	my $hash_count = 0;
	foreach ( keys %$this ) {
	    $hash_count += &count_struct_items( $$this{$_}, 0 );
	}
	# count the hash itself as an item, also each of its members
	return $count + 1 + $hash_count;
    } else {
	die "what type is $this ? ref returned: ["  . ref( $this ) . "]";
    }

}

return 1;
