# sort_weekdays_2.pl # method relies on left-anchored nature of "short" day names, # e.g., th/thu/thur/thurs/thursday. # automatic pattern generation. possibility of false-positive # matches exists, e.g., 'wedne' for wednesday. use 5.018; # need reliable regex interaction with lexicals use 5.010; # need regex extended patterns (?|...) branch reset use strict; use warnings; use Test::More 'tests' => 2; use Test::NoWarnings; use Data::Dump qw(dd); # works - false positive matches possible. my ($rx_days) = map qr{ (?i) $_ }xms, join ' | ', # map { dd $_; $_; } # for debug map ".*? \\b (${ \day_parts($_) })", # works # map { dd $_; $_; } # for debug map { @$_ == 3 or die "bad day '$_'"; $_; } # minimal validation map [ m{ \A ([motuwehfrsa]{2}) ([nesdritu]+?) (day) \z }xms ], # works # map [ m{ \A ([a-z]{2}) ([a-z]+?) (day) \z }xms ], # works qw/monday tuesday wednesday thursday friday saturday sunday/ ; # print "\$rx_days $rx_days \n"; # for debug my $test = <<'EOS'; Monday Saturday Thursday Saturday Sat Sun Mon Tue Th nothing on this or the following line should extract shdhsd mond s mondaytuesday mondayxtuesday xsun sunx xsunx saxyzzy Wen mo tu we th fr sa su mon tue wed thu fri sat sun blank lines don't matter mOnDaY TuE wEd ThUrS mO sUn false positive matches are possible with automatic pattern generation satu wednes EOS # day-names extracted in day order, then by order of # appearance in the source string. # (?{ ... }) interface to lexical variable only reliable for perl ver 5.18+ # this problem can be avoided by using package-global variables. my @got; # lexical unreliable in older perls $test =~ m{ \A (?| $rx_days) \b (?{ push @got, $^N }) (*FAIL) }xms; # dd \@got; # for debug is_deeply \@got, [ qw( Monday Mon mo mon mOnDaY mO Tue tu tue TuE we wed wEd wednes Thursday Th th thu ThUrS fr fri Saturday Saturday Sat sa sat satu Sun su sun sUn )], 'extraction sorted by day-order'; sub day_parts { my ($ar_parts, # substrings to assemble to nested regex ) = @_; # starts like [ 'tu', 'es', 'day' ] # intermediate ( 'tu', 'e', 's', 'day' ) # ends like tu (?: e (?: s (?: day)?)?)? my $start = $ar_parts->[0]; my @parts = (split('', $ar_parts->[1]), $ar_parts->[2]); return "$start @{[ _day_parts(@parts) ]}"; } sub _day_parts { return @_ ? "(?: @{[ shift, _day_parts(@_) ]})?" : (); }