#!/usr/bin/perl use strict; use warnings; sub int_1 ( $ ) { my $template = shift; # split template on the characters to switch in and out my @chunks = split /(\s*\S+)_DEL/, $template; # make sure the odd elements are the switched chars. unshift @chunks, '' if $template =~ /^\S+_DEL\b/; # check for boundary conditions my $n_toggles = int( @chunks / 2 ); if ( !$n_toggles ) { return $template; } elsif ( $n_toggles > 10 ) { die "max of 10 toggles (found $n_toggles) in '$template'"; } my @rv; my $n_bits = ( 1 << $n_toggles ) - 1; foreach my $bits ( 0 .. $n_bits ) { my $s = ''; for ( my $i = 0; $i < @chunks; $i += 2 ) { $s .= $chunks[$i]; $s .= $chunks[$i+1] if $bits & 1; $bits >>= 1; } push @rv, $s; } return @rv; } my $MAX_BIT = 0x10000; my $ALL_BITS = $MAX_BIT - 1; sub int_2 ( $ ) { my $template = shift; # split template on the phonemes to switch in and out my @chunks = split /(\s*\S+_DEL)/, $template; # mark which are toggles my @toggles; my $next_bit = 1; foreach ( @chunks ) { if ( s/_DEL$// ) { push @toggles, $next_bit; $next_bit <<= 1; die "too many toggles!" if $next_bit > $MAX_BIT; } else { push @toggles, $ALL_BITS; } } # anything to do? return $template if $next_bit == 1; my @rv; foreach my $bits ( 1 .. $next_bit ) { my $s = ''; for my $i ( 0 .. $#chunks ) { $s .= $chunks[$i] if $bits & $toggles[$i]; } push @rv, $s; } return @rv; } sub int_3 ( $ ) { my $template = shift; # split template on the phonemes to switch in and out my @chunks = split /(\s*\S+_DEL)/, $template; # mark which are toggles my $next_bit = 1; foreach ( @chunks ) { if ( s/_DEL$// ) { $_ = [ $next_bit, $_ ]; $next_bit <<= 1; die "too many toggles!" if $next_bit > $MAX_BIT; } else { $_ = [ $ALL_BITS, $_ ]; } } # anything to do? return $template if $next_bit == 1; my @rv; foreach my $bits ( 1 .. $next_bit ) { push @rv, join '', map { $_->[0] & $bits ? $_->[1] : '' } @chunks; } return @rv; } sub re_1 ( $ ) { my @templates = @_; # yet another variant on the repeat-and-double-until-done while # loop. while (1) { my @new_templates; foreach ( @templates ) { if ( m/(.*?)(\s*\S)_DEL(.*)/ ) { push @new_templates, $1.$2.$3, $1.$3; } } return @templates unless @new_templates; @templates = @new_templates; } } sub re_2 ( $ ) { my $template = shift; # make sure it ends with exactly one newline $template =~ s/\s*$/\n/; # make all the breaks unique: { my $i = 0; $template =~ s/(_DEL\b)/$1 . ++$i/ge; } # for each break, replace template with two copies of itself, one # with the replacement, one without: while ( $template =~ /((\s*\S+)_DEL(\d+)\b)/ ) { my ( $break, $rep ) = ( $1, $2 ); my ( $with, $without ) = ( $template ) x 2; $with =~ s/\Q$break/$rep/g; $without =~ s/\Q$break//g; $template = $with . $without; } # break back into individual chunks. (trailing \n should be # handled gracefully.) return split /\n/, $template; } sub re_3 ( $ ) { my $template = shift; # basic idea is to find every line with a blah_DEL in it, then # replace that line with two lines: one with "blah" inserted, one # without. Since there can be multiple _DEL directives per line, # we need to iterate until they're all gone. do {} while $template =~ s{ ^ (.*?) (\s* \S+)_DEL\b (.*) $ } {$1$3\n$1$2$3}xmg; # break back into individual chunks. (trailing \n should be # handled gracefully.) return split /\n/, $template; } while ( my $template = ) { $template =~ s/\s+$//; print "=== template:\n$template\n"; print "--- int_1:\n", map "$_\n", int_1 $template; print "--- int_2:\n", map "$_\n", int_2 $template; print "--- int_3:\n", map "$_\n", int_3 $template; print "--- re_1:\n", map "$_\n", re_1 $template; print "--- re_2:\n", map "$_\n", re_2 $template; print "--- re_3:\n", map "$_\n", re_3 $template; } exit 0; __DATA__ ch i_DEL ts a k u_DEL s i_DEL #