use strict; use warnings; use constant LEARN => 1; # Set true to learn by doing open WORDS, '<', '2of12.txt'; my @words = ; close WORDS; chomp @words; @words = map {[$_, asSMS ($_)]} @words; my $lookup = bless {lookup => {}, nextWordNIndex => 0}; $lookup->addWord ($_) for @words; $lookup->training (); $lookup->{scan} = $lookup->{lookup}; my %dict = map {($_->[0] => $_->[1])} @words; my $strokes = 0; my $characters = 0; my $txtWords = 0; while () { chomp; my @dataWords = split; for my $word (@dataWords) { $word =~ tr/a-z//cd; next if ! exists $dict{$word}; ++$txtWords; my @digits = @{asSMS ($word)}; my @pressed; $characters += @digits; while (1) { my $digit = (shift @digits) || '-'; my $result = $lookup->addDigit ($digit); ++$strokes; push @pressed, $digit; if ($result eq $word) { $lookup->addDigit ('+'); ++$characters; ++$strokes; push @pressed, '+'; last; } } print "@pressed ($word)\n"; } } print "$strokes strokes to send a $characters character message containing $txtWords words.\n"; sub addWord { my ($self, $word) = @_; my @digits = @{$word->[1]}; my $text = $word->[0]; my $scan = $self->{lookup}; for my $digit (@digits) { if (defined $scan->{$digit}) { push @{$scan->{$digit}{words}}, [$text, 0]; } else { $scan->{$digit} = {words => [[$text, 0]]}; } $scan = $scan->{$digit}; } } sub addDigit { my ($self, $digit) = @_; if ($digit eq '+') { $self->train ($self->{lastGuess}[0], 1) if LEARN; $self->doneWord (); return $self->{lastGuess}[0]; } elsif ($digit eq '-') { if (! exists $self->{scan}{wordsN}) { @{$self->{scan}{wordsN}} = grep {$self->{wordlen} == length $_->[0]} @{$self->{scan}{words}}; } die "Unknown word @{$self->{digits}}" if $self->{nextWordNIndex} == @{$self->{scan}{wordsN}}; # Skip words that have been tried already my $wordsN = $self->{scan}{wordsN}; ++$self->{nextWordNIndex} while exists $self->{tries}{$wordsN->[$self->{nextWordNIndex}][0]}; $self->{lastGuess} = $wordsN->[$self->{nextWordNIndex}] } else { die "Unknown word @{$self->{digits}} $digit" if ! exists $self->{scan}{$digit}; push @{$self->{digits}}, $digit; $self->{scan} = $self->{scan}{$digit}; ++$self->{wordlen}; my $index = 0; my $words = $self->{scan}{words}; # Skip words that have been tried already ++$index while exists $self->{tries}{$words->[$index][0]}; $self->{lastGuess} = $words->[$index] } $self->{tries}{$self->{lastGuess}[0]} = 1; return $self->{lastGuess}[0]; } sub train { my ($self, $word, $weight) = @_; my @digits = @{asSMS ($word)}; my $wordlen = length $word; $weight ||= 1; $weight += $wordlen / 10.0; # Sort the word lists by frequency my $scan = $self->{lookup}; for my $digit (@digits) { $scan = $scan->{$digit}; my @match = grep {$_->[0] eq $word} @{$scan->{words}}; last if ! @match; $_->[1] += $weight for @match; @{$scan->{words}} = sort {$b->[1] <=> $a->[1]} @{$scan->{words}}; } # Sort the wordN lists by frequency $scan = $self->{lookup}; $scan = $scan->{$_} for @digits; if (! exists $scan->{wordsN}) { @{$scan->{wordsN}} = grep {$wordlen == length $_->[0]} @{$scan->{words}}; } my @match = grep {$_->[0] eq $word} @{$scan->{wordsN}}; $_->[1] += $weight for @match; @{$scan->{wordsN}} = sort {$b->[1] <=> $a->[1]} @{$scan->{wordsN}}; } sub doneWord { my $self = shift; $self->{scan} = $self->{lookup}; $self->{digits} = (); $self->{wordlen} = 0; $self->{nextWordNIndex} = 0; $self->{tries} = (); } sub asSMS { my $word = shift; my @SMSWord; $word =~ tr /a-z//dc; for (split //, $word) { if (/[abc]/) { push @SMSWord,'2'; } elsif (/[def]/) { push @SMSWord,'3'; } elsif (/[ghi]/) { push @SMSWord,'4'; } elsif (/[jkl]/) { push @SMSWord,'5'; } elsif (/[mno]/) { push @SMSWord,'6'; } elsif (/[pqrs]/) { push @SMSWord,'7'; } elsif (/[tuv]/) { push @SMSWord,'8'; } elsif (/[wxyz]/) { push @SMSWord,'9'; } } return \@SMSWord; } sub training { my $self = shift; my %freqs = ( i => 1, an => 1, no => 1, so => 1, by => 1, at => 1, has => 1, far => 1, see => 1, key => 1, who => 1, but => 1, are => 1, each => 1, used => 1, with => 1, here => 1, only => 1, same => 1, been => 1, pure => 1, below => 1, might => 1, could => 1, value => 1, enter => 1, solve => 1, rules => 1, minus => 1, space => 1, pearl => 1, number => 1, winner => 1, fewest => 1, reduce => 1, secret => 1, amount => 1, chosen => 1, except => 1, resume => 1, return => 1, texting => 1, presses => 1, thought => 1, correct => 1, contain => 1, support => 1, fastest => 1, problem => 1, between => 1, present => 1, category => 1, thousand => 1, returned => 1, provided => 1, expected => 1, suggested => 1, incorrect => 1, lowercase => 1, technique => 1, challenge => 1, interface => 1, following => 1, algorithm => 1, submitted => 1, solutions => 1, implement => 1, suggestion => 1, dictionary => 1, authorized => 1, completion => 1, considered => 1, keystrokes => 1, predictive => 1, programming => 1, interesting => 1, punctuation => 1, information => 1, application => 1, if => 2, one => 2, and => 2, for => 2, that => 2, time => 2, text => 2, plus => 2, best => 2, digit => 2, being => 2, means => 2, digits => 2, entries => 2, instead => 2, spelled => 2, finished => 2, must => 3, words => 3, passed => 3, of => 4, it => 4, in => 4, word => 4, to => 5, all => 5, is => 7, will => 7, a => 8, be => 9, the => 12, ); if (! LEARN) { # Cheat by knowing the word frequencey up front $self->train ($_, $freqs{$_}) for keys %freqs; } } __DATA__ all predictive texting is a technique to reduce the number of key presses used to enter text with i thought it might be an interesting challenge to see who could implement the best algorithm here are the rules the problem text to solve is secret it will contain between and thousand words all words will be present in only authorized dictionary all words will be lowercase no punctuation except a space solutions must be pure pearl all entries must be submitted by one winner for each category below will be chosen fewest keystrokes fastest completion time all entries must support the same application programming interface to be considered digits will be passed in one at a time the expected return value will be the best suggestion for the amount of information provided so far if instead of a digit a plus is passed in it means that the word is finished being spelled and the correct word has been returned if instead of a digit a minus is passed in it means that the word is finished being spelled but the suggested word is incorrect digits will resume following a plus #### 844 strokes to send a 901 character message containing 170 words.