sub natural_sort {
return @_ if @_ < 2;
my @words = @_.map({ .lc });
my $index = 0;
for @words {
.=subst(/(\d+)/, -> $/{ sprintf( "%s%c%s", 0, $0.chars, $0) }, :g);
$_ ~= ' ' ~ $index++;
};
return @_[@words.sort.map({@(split(' ',$^_))[*-1]})];
}
####
sub natural_cmp {
my ($one, $two) = @_[0,1].map({ .=subst(/(\d+)/,
-> $/{ sprintf( "%s%c%s", 0, $0.chars, $0) }, :g).lc });
return ($one cmp $two);
}
####
sub natural_cmp ($a, $b) {
my ($first, $second) = ($a, $b);
my ($one, $two) = ($first, $second).map({ .=subst(/(\d+)/,
-> $/{ sprintf( "%s%c%s", 0, $0.chars, $0) }, :g).lc });
return ($one cmp $two);
}
####
use v6;
# perl 6. automatic strict, warnings and autodie
my $filename = shift @*ARGS or die "You need to pass in a filename.";
# pretty similar to perl5, @ARGV is now @*ARGS
my $handle = open $filename;
# autodie with useful message if file can't be opened.
# override with "no autodie" if desired.
my %words;
for $handle.lines -> $line {
# iterate through the file, stuff the line into $line on each iteration
%words{ $_ }++ for $line.comb(rx/ \w+ [ \w+]* /);
# .comb is sort of the opposite of split. rather than splitting on what
# you don't want, comb out everything you do.
# the capturing regex looks for words consisting of one or more word characters
# optionally followed by any amount of grouped punctuation marks and one or more word
# characters. this might not be the best "word" definition but I find it useful.
# it will match (*most) email addresses, dotted quad IP address and contractions (can't, won't)
# as a single entity
}
.say for natural_sort(%words.keys);
# print the list of words sorted naturally. works pretty well.
.say for %words.sort: { $^b.value <=> $^a.value || natural_cmp($^a,$^b) };
# print the list of words sorted by number of times seen with a secondary
# natural sort. works but s l o w
sub natural_sort {
return @_ if @_ < 2;
my @words = @_.map({ .lc });
my $index = 0;
for @words {
.=subst(/(\d+)/, -> $/{ sprintf( "%s%c%s", 0, $0.chars, $0) }, :g);
$_ ~= ' ' ~ $index++;
};
return @_[@words.sort.map({@(split(' ',$^_))[*-1]})];
}
sub natural_cmp ($a, $b) {
my ($first, $second) = ($a, $b);
my ($one, $two) = ($first, $second).map({ .=subst(/(\d+)/,
-> $/{ sprintf( "%s%c%s", 0, $0.chars, $0) }, :g).lc });
return ($one cmp $two);
}