use v6; # perl 6. automatic strict, warnings and autodie my $filename = shift @*ARGS or die "You need to pass in a filename."; # pretty similar to perl5, @ARGV is now @*ARGS my $handle = open $filename; # autodie with useful message if file can't be opened. # override with "no autodie" if desired. my %words; for $handle.lines -> $line { # iterate through the file, stuff the line into $line on each iteration %words{ $_ }++ for $line.comb(rx/ \w+ [ \w+]* /); # .comb is sort of the opposite of split. rather than splitting on what # you don't want, comb out everything you do. # the capturing regex looks for words consisting of one or more word characters # optionally followed by any amount of grouped punctuation marks and one or more word # characters. this might not be the best "word" definition but I find it useful. # it will match (*most) email addresses, dotted quad IP address and contractions (can't, won't) # as a single entity } .say for natural_sort(%words.keys); # print the list of words sorted naturally. works pretty well. .say for %words.sort: { $^b.value <=> $^a.value || natural_cmp($^a,$^b) }; # print the list of words sorted by number of times seen with a secondary # natural sort. works but s l o w sub natural_sort { return @_ if @_ < 2; my @words = @_.map({ .lc }); my $index = 0; for @words { .=subst(/(\d+)/, -> $/{ sprintf( "%s%c%s", 0, $0.chars, $0) }, :g); $_ ~= ' ' ~ $index++; }; return @_[@words.sort.map({@(split(' ',$^_))[*-1]})]; } sub natural_cmp ($a, $b) { my ($first, $second) = ($a, $b); my ($one, $two) = ($first, $second).map({ .=subst(/(\d+)/, -> $/{ sprintf( "%s%c%s", 0, $0.chars, $0) }, :g).lc }); return ($one cmp $two); }