Win8 Strawberry 5.8.9.5 (32) Sat 08/20/2022 10:53:36 C:\@Work\Perl\monks >perl use 5.008; # i think code should work with 5.6, but this is untested use strict; use warnings; use autodie; use List::MoreUtils qw(uniq); use Data::Dump qw(dd pp); # for debug # word-per-line dictionary. use constant DICTIONARY => 'C:/@Work/moby/mwords/354984si.ngl'; open my $fh, '<', DICTIONARY; chomp(my @dictionary = <$fh>); # remove all newlines close $fh; printf "%d words in dictionary \n", scalar @dictionary; my @templates = ('', ' ', qw( neocene abcdbab 0123101 &*?+*&* Mississippi abccbccbddb 0000 )); printf "%d scan templates \n", scalar @templates; my $start = time; my $template_scans; TEMPLATE: for my $template (@templates) { my $rx_template = templ2regex($template); $rx_template = qr{ \A $rx_template \z }xms; # add appropriate bounds # print "template '$template' $rx_template: \n"; ; # for debug print "template '$template': \n"; my $count = 0; WORD: for my $word (@dictionary) { chomp $word; next WORD unless $word =~ $rx_template; print " '$word' \n"; ++$count; } # end while WORD print " ($count words) \n"; ++$template_scans; } # end for TEMPLATE printf "%d template scans. total time for all scans: %d secs. \n", $template_scans, time - $start; sub templ2regex { my ($template, ) = @_; # empty template never matches. # (what is proper match behavior of empty template?) return qr{ (?!) }xms if $template eq ''; my %t2g; # map template char => capture group number my $n_cg = 1; # number of current capture group my $rs = # regex string join ' ', map $t2g{$_} ? "\\$t2g{$_}" : scalar($t2g{$_} = $n_cg++, '(.)'), split '', $template ; --$n_cg; # undo last post-increment: now true highest capture group use re 'eval'; # print "=== \$template '$template' \$rs $rs \n"; ; # for debug return qr{ $rs }xms if $n_cg < 2; # no uniqueness test needed my $cap_vars = join ', ', map "\$$_", 1 .. $n_cg; # capture vars active $rs .= "\n(?(?{ $n_cg != uniq $cap_vars }) (?!))"; # add uniqueness test # print "=== \$template '$template' \$rs $rs \n"; ; # for debug return qr{ $rs }xms; } # end sub templ2regex() ^Z 354984 words in dictionary 9 scan templates template '': (0 words) template ' ': '2' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z' (27 words) template 'neocene': 'kaitaka' 'lauhala' 'metreme' 'neocene' 'tempete' (5 words) template 'abcdbab': 'kaitaka' 'lauhala' 'metreme' 'neocene' 'tempete' (5 words) template '0123101': 'kaitaka' 'lauhala' 'metreme' 'neocene' 'tempete' (5 words) template '&*?+*&*': 'kaitaka' 'lauhala' 'metreme' 'neocene' 'tempete' (5 words) template 'Mississippi': 'mississippi' (1 words) template 'abccbccbddb': 'mississippi' (1 words) template '0000': 'mmmm' 'oooo' (2 words) 9 template scans. total time for all scans: 4 secs.