# Tag words (conlang 13c) for their part of speech and length, weighted
# sum of phonemes on approximate time to pronounce

use strict;

my $fricative_weight = 0.25;
my $nasal_weight = 0.5;
my $liquid_weight = 0.375;
my $plosive_weight = 0.1;
my $vowel_weight = 1;
my %patterncount;
my %categorycount;
$, = "\t";                  # output field separator

while ( <> ) {
    s/\s+$//;
    my $fricatives = tr/xGSZszfv/xGSZszfv/;
    my $nasals = tr/mnN/mnN/;
    my $liquids = tr/lrjw/lrjw/;
    my $plosives = tr/ktpbdg/ktpbdg/;
    my $vowels = tr/aeiouA/aeiouA/;
    my $weighted_length = $vowels * $vowel_weight + $fricatives * $fricative_weight  
	+ $nasals * $nasal_weight + $liquids * $liquid_weight  + $plosives * $plosive_weight;

    my $pattern = $_;
    $pattern =~ s/[xGSZszfvktpgdbmnNlrjw]/C/g;
    $pattern =~ s/[AEIOUaeiou]/V/g;
    $patterncount{ $pattern }++;
    
    my $category;
    if ( m/[aeiouA]$/ ) {
	$category = "gp";        # grammatical particle
    } elsif ( m/^[ktpbdg].*[mnN]$/ ) {
	$category = "qu";        # quality, quantity, or state
    } elsif ( m/^[xGSZszfv].*[mnN]$/ ) {
	$category = "pr";        # process
    } elsif ( m/[xGSZszfv].*[lrwj]$/ ) {
	$category = "su";        # substance
    } elsif ( m/^[ktpbdg].*[lrwj]$/ ) {
	$category = "re";        # relationship
    } else {
	print STDERR "no category for $_\n";
    }

    $categorycount{$category}++;
    print $_, $pattern, $weighted_length, $category, "\n";
}



foreach ( keys %patterncount ) {
    print STDERR $patterncount{ $_ }, "\t", $_ . "\n";
}


foreach ( keys %categorycount ) {
    print STDERR $categorycount{ $_ }, "\t", $_ . "\n";
}
