#!/usr/bin/perl -w
# Everyword reads rules and patterns for words from the standard input
# and generates every possible word described by them.
# Exclusions allow illegal combinations to be suppressed.
# A rule looks like "name: token ..."
# An exclusion declaration looks like "/ token ..."
# A pattern looks like "token ..."

# by John Cowan, 2003
# http://listserv.brown.edu/archives/cgi-bin/wa?A2=ind0301B&L=CONLANG&P=R36719&I=-3 

# revised a bit by Jim Henry, 2006
# to fix treatment of exclusions so one can use regexes instead of 
# literal strings.

# Also fix so one can specify exclusions on multiple lines instead
# of all on one line

# and fix defect where '/' was always stored as an exclusion

# TO DO: accept some command line argument to say whether exclusions
# are literals or regexes?  Or more likely, this should be specified
# in format file.  E.g. '/' introduces a literal string exclusion and
# '!' introduces a regex exclusion...?

# TO DO: Revise generation code so input file is not senstive to order of sections.
# I.e. could put C V C patterns before or after the rules and exclusions.

use strict;

my %rules;              # maps rule names to token arrays
my @exclusions;         # strings to be excluded
my @tokens;             # tokens of a pattern to generate

my $debug = 1;

while (<>) 
{
        chomp;
        s/#.*$//;       # Kill comments
	s/\s+$//;       # trailing whitespace
        next if /^$/;   # and blank lines
        @tokens = split;
        my $name = $tokens[0];
        if ( $name =~ m/:$/ ) {            # rule names end in ":"
                $name =~ s/:$//;
                shift @tokens;
                $rules{$name} = [@tokens];      # store rule in %rules
		# recursively expand token list into new rule set
#		$rules{ $name } = \();
#		$rules{ $name } = [()];
#		&expand( $rules{$name}, \@tokens );
#		print "done with expand of line, now: ", join( ",", @{ $rules{$name} } ), "\n";
	} 
	elsif ($name eq "/") {
	    shift @tokens;                      # throw away the '/' character
	    push @exclusions, @tokens;          # add this line's exclusions to list
	}
        else {
	    # For every line containing a pattern, generate words for that pattern.
	    generate();
	}
}

##################################################

sub expand {
    print join( "\t", @_ ), "\n";
    my $newruleref = $_[0];
    my @tokens = @{ $_[1] };
    print join( ",", @tokens), "\n";
    foreach ( $_[0] ) {
	if ( exists( $rules{ $_ } ) ) {
	    expand( $newruleref, $rules{ $_ } );
	} else {
	    push @{ $newruleref }, $_;
	}
    }
    
}

sub generate {
        # Replace each token with an array of alternatives
        for (my $i = 0; $i < @tokens; $i++) {
                my $token = $tokens[$i];
                if (exists($rules{$token})) {
		    $tokens[$i] = $rules{$token};
		}
                else {                  # no rule? create singleton array
		    $tokens[$i] = [$token];
		}
	    }

        # Set up odometers to step through all cases
        my @odometer = (0) x scalar(@tokens);
        my @limits = map(scalar @{$_}, @tokens);
        while (1) {
                # Assemble a word based on the current odometer setting
                my $word = "";
                for (my $i = 0; $i < @tokens; $i++) {
                        $word .= $tokens[$i]->[$odometer[$i]];
                        }

                # Print unless excluded

# old code: exclude if literal substring matches
#                print $word, "\n"
#                        unless grep(index($word, $_) != -1, @exclusions);

# new code: exclude if regex matches
                print $word, "\n"
                        unless grep( $word =~ m/$_/, @exclusions);

                # Increment odometer
                for (my $i = @tokens - 1; $i >= 0; $i--) {
                        last unless ++$odometer[$i] >= $limits[$i];
                        $odometer[$i] = 0;
                        }
                # Check for termination (odometer is all zeros)
                last unless grep($_ != 0, @odometer);
                }
        }

=pod

Test rules:

/ wu yi                                 # suppress wu and yi
C: p t k s h m n q l w y                # basic consonants
V: i e u a                              # basic vowels
VV: ii ei ui ai ee uu ia ea ua aa       # diphthongs
N: n l                                  # liquids
T: p t k                                # stops
TT: pp tt kk                            # geminated stops

C V C V
V C V
C VV
VV
C V TT V
V TT V
C V N T V
V N T V
C VV C V
VV C V


Content-Type: text/plain; charset=us-ascii
Message-ID:  <[log in to unmask]>
Date:         Mon, 13 Jan 2003 15:40:45 -0500
Reply-To:     Constructed Languages List <[log in to unmask]>
Sender:       Constructed Languages List <[log in to unmask]>
From:         John Cowan <[log in to unmask]>
Subject:      Re: Language-generating software (was Re: Replies to my
In-Reply-To:  <[log in to unmask]> from
              "Jan van Steenbergen" at Jan 13,
              2003 05:16:33 PM

http://listserv.brown.edu/archives/cgi-bin/wa?A2=ind0301B&L=CONLANG&P=R36719&I=-3 

=cut
