in reply to unglue words joined together by juncture rules
you will be installing rules of the form:t|d => dd
The solution is in a subroutine (below) called functional_unglue and is called like this:dd => t|d
Here is a template you should use:my @results = functional_unglue( @arguments );
If you think about it, that is the only way a solution makes sense; You want your models to be extensible and you want to be able to change your minds later, and so it must be up to you to install new rules.use strict; use Data::Dumper; my @results = functional_unglue ( target => TARGET_STRING, lexicon => { morpheme_1 => undef, morpheme_2 => undef, ... morpheme_n => undef, }, pre_images => { pre_image_function_1 => sub { CODE }, pre_image_function_2 => sub { CODE }, ... pre_image_function_n => sub { CODE }, } ); @results = map{join('-',@$_)}@results; print Dumper([@results]);
So, for example, use it like this:sub functional_unglue { my %x = @_; my $x = $x{target}; my $l = $x{lexicon}; my $f = $x{pre_images}; my @q = ([$x]); my @r; while(@q){ my $t = shift @q; my @w = @{$t}; my $w = pop @w; for(keys %$f){ if(my @y = $f->{$_}->($w,$l,$f)){ result: for my $i(0..$#y){ my $n = $#{$y[$i]} == 0 ? 0 : $#{$y[$i]} - 1; for my $j(0..$n){ next result unless exists $l->{$y[$i][$j]} } push @q, [@w,@{$y[$i]}] } } } if(exists $l->{$t->[$#{$t}]}){ push @r, $t } } @r }
Produces:use strict; use Data::Dumper; my @z = functional_unglue ( target => 'cowboycaddog', lexicon => { cowboy => undef, cow => undef, boy => undef, cat => undef, dog => undef, }, pre_images => { concat => sub { ### this turns one long word into two my($x,$l) = @_; my @x; for(keys %$l){ my $s = $_; if($x =~ /^\Q$s\E/){ push @x, [$s,substr($x,length($s))]; } } @x }, simple => sub { ### this turns "WORD1ddWord2" into [WORD1t, dWORD2] my $x = shift; my @x; my %x = ( dd => [ [qw(t d)], ], ); ### it can handle arbitrary substitions, ### not just dd => t|d, but XY => A|B ### for any strings X,Y,A,B for(keys %x){ my $s = $_; if($x =~ /\Q$s\E/){ for my $i(0..$#{$x{$s}}){ my @y = ( $`.$x{$s}->[$i][0], $x{$s}->[$i][1].$' ); push @x, length($y[1]) ? [@y] : [$y[0]] } } } @x }, } ); @z = map{join('-',@$_)}@z; print Dumper([@z]);
Or you could try this:$VAR1 = [ 'cowboy-cat-dog', 'cow-boy-cat-dog' ];
Produces:my @z = functional_unglue ( target => 'zivAzvaH', lexicon => { ziva => 1, azvas => 1, zivA => 1, Azvas => 1, }, pre_images => { simple => sub { my $x = shift; my @x; my %x = ( A => [[qw(a a)], [qw(A a)], [qw(a A)], ['A',''], ['','A'], [qw(A A)]], H => [['s','']], ); for(keys %x){ my $s = $_; if($x =~ /\Q$s\E/){ for my $i(0..$#{$x{$s}}){ my @y = ($`.$x{$s}->[$i][0], $x{$s}->[$i][1].$'); push @x, length($y[1]) ? [@y] : [$y[0]] } } } @x }, } ); @z = map{join('-',@$_)}@z; print Dumper([@z]);
Now why this works and how this works is your job to figure out. (It sounds like you need to get some more advanced books on Perl, try Object Oriented Perl and Higher Order Perl)$VAR1 = [ 'ziva-azvas', 'zivA-azvas', 'ziva-Azvas', 'zivA-Azvas' ];
Produces:use strict; use Data::Dumper; my @z = functional_unglue ( target => 'boycowboycaddogdodogcaddyeyescatdogboycaddyecatt', lexicon => { cowboy => undef, caddy => undef, boytoy => undef, co => undef, cow => undef, boy => undef, cat => undef, cats => undef, do => undef, dog => undef, dye => undef, eddie => undef, eyes => undef, kowtow => undef, toy => undef, tow => undef, tyco => undef, yes => undef, ye => undef, }, pre_images => { concat => sub { my($x,$l) = @_; my @x; for(keys %$l){ my $s = $_; if($x =~ /^\Q$s\E/){ push @x, [$s,substr($x,length($s))]; } } @x }, simple => sub { my $x = shift; my @x; my %x = ( dd => [[qw(t d)]], db => [['d','']], td => [[qw(d t)]], td => [[qw(d d)]], tt => [['t','']], ye => [[qw(eye eye)]], caddye => [[qw(cats eyes)]], ); for(keys %x){ my $s = $_; if($x =~ /\Q$s\E/){ for my $i(0..$#{$x{$s}}){ my @y = ( $`.$x{$s}->[$i][0], $x{$s}->[$i][1].$' ); push @x, length($y[1]) ? [@y] : [$y[0]] } } } @x }, } ); @z = map{join('-',@$_)}@z; print Dumper([@z]);
|
|---|