WORD: while (my $word = pop @words) {
# lower-case it
$word = lc $word;
# search for the entry
for my $index (0 .. $#words) {
if ($counta[$index][0] eq $word) {
# when found, update the count and
# go to the next word
$counta[$index][1]++;
next WORD;
}
}
# If there's no entry, add a new entry
push @counta, [ $word, 1 ];
}
####
WORD: while (my $word = pop @words) {
# lower-case it
$word = lc $word;
# search for the entry. When found, update the
# count, if not found create new entry.
$counth{$word}++;
}
####
$ perl t.pl
***** Comparing a list of 100 words 10000 times *****
Rate greparray array hash
greparray 997/s -- -55% -94%
array 2210/s 122% -- -86%
hash 16026/s 1507% 625% --
***** Comparing a list of 1000 words 1000 times *****
Rate greparray array hash
greparray 8.80/s -- -79% -100%
array 41.5/s 372% -- -98%
hash 2070/s 23419% 4887% --
####
#!/usr/bin/perl
use strict;
use warnings;
use Benchmark qw(:all);
use Data::Dumper;
# Dump counts? 1=yes, 0=no
my $showall = 1;
my $letters = join("", "A" .. "Z", "a" .. "z");
my @words;
my %counth;
my @counta;
my @countg;
sub do_hash {
%counth = ();
for my $word (@words) {
$word = lc $word;
$counth{$word}++;
}
}
sub do_array {
@counta = ();
WORD: for my $word (@words) {
$word = lc $word;
for my $index (0 .. $#counta) {
if ($counta[$index][0] eq $word) {
$counta[$index][1]++;
next WORD;
}
}
push @counta, [ $word, 1 ];
}
}
sub do_greparray {
@countg = ();
WORD: for my $word (@words) {
$word = lc $word;
my @entries = grep { $word eq $$_[1] } @countg;
if (@entries) {
$entries[0][1]++;
}
else {
push @countg, [ $word, 1 ];
}
}
}
sub make_word {
# Half the time, reuse a word
if (@words and 0.5 > rand) {
return $words[int rand @words];
}
my $len = int( rand 3 * rand 3 )+1;
my $word;
$word .= substr($letters, int(rand length $letters), 1) for 1 .. $len;
return $word;
}
for my $num_words (100, 1000, 10000, 100000, 1000000) {
push @words, make_word() for 1 .. $num_words;
my $num_iterations = 1000000 / $num_words;
print "\n***** Comparing a list of $num_words words "
. "$num_iterations times *****\n\n";
cmpthese($num_iterations, {
'hash'=>\&do_hash,
'array'=>\&do_array,
'greparray'=>\&do_greparray,
});
}
=h1
# For debugging, enable this chunk (by removing the =h1 and =cut lines).
# It'll show any differences between the final run of the three methods.
for my $ar (@counta) {
my ($word, $cnta) = @$ar;
my $cnth = $counth{$word};
my @ag = grep { $word eq $$_[0] } @countg;
my $cntg = $ag[0][1] // -1;
next if $showall or $cnta != $cnth or $cnta != $cntg;
printf "%-10.10s % 6u % 6u % 6u\n",
$word, $cnth, $cnta, $cntg;
}
=cut