#! /usr/bin/perl -w use strict; use Getopt::Std; getopts('v'); use vars qw($opt_v); # Read the input into an array of hashes from normalized to denormalized # words my @denorm_word; print "Scanning input\n" if $opt_v; while (<>) { chomp; $denorm_word[ length($_) ]{ join '', sort split //, $_ } = $_; } my $longest_seq = []; my $longest_chrs = []; # Find what the shortest word length is so we can break out of the # reasoning early. my $shortest = 0; $shortest++ until $denorm_word[$shortest]; # Let's walk unvisited words by length first, and do depth first # searches of sequences. Clearing as we go of course. LEN: foreach my $i (reverse 0...$#denorm_word) { if (not $denorm_word[$i]) { # No words next; } if ($i-$shortest < @$longest_seq) { print "Length $i to shortest $shortest can't improve, aborting\n" if $opt_v; last LEN; } print "Working on length $i\n" if $opt_v; foreach my $word (values %{$denorm_word[$i]}) { my ($seq, $chrs) = find_seq_chrs($word); if (@$longest_seq < @$seq) { $longest_seq = $seq; $longest_chrs = $chrs; print " New longest:\n", map " $_\n", @$seq if $opt_v; } if ($i-$shortest < @$longest_seq) { print "Length $i to shortest $shortest can't improve, aborting\n" if $opt_v; last LEN; } } } # And dump it. foreach my $i (0..$#$longest_chrs) { print "$longest_seq->[$i] + $longest_chrs->[$i] =\n"; } print uc ($longest_seq->[-1]) . " (length " . (scalar @$longest_seq) . ")\n"; # Takes sub find_seq_chrs { my $word = shift || return; my $best_seq = [$word]; my $best_chrs = []; my ($out_chr, @end_list, @begin_list) = sort split //, $word; my $len = scalar @end_list; while (@end_list) { if (my $subword = delete($denorm_word[$len]{ join '', @begin_list, @end_list })) { my ($seq, $chrs) = find_seq_chrs($subword); if (@$best_seq <= @$seq) { push @$seq, $word; push @$chrs, $out_chr; $best_seq = $seq; $best_chrs = $chrs; } } push @begin_list, $out_chr; $out_chr = shift @end_list; } return ($best_seq, $best_chrs); } __END__ #### bash-2.01$ time perl add_chain.pl /etc/dictionary at + r = tar + t = tart + s = start + i = traits + n = transit + e = straiten + o = stationer + i = iterations + c = recitations + n = interactions + d = INDOCTRINATES (length 12) real 0m6.708s user 0m6.090s sys 0m0.180s