# FIXME: name is preliminary package List::LimitedSize::Fitness; # standard preliminaries use strict; use warnings; # core modules use Carp; use List::Util qw/min max/; # we don't export anything, but Exporter also handles module versions. require Exporter; our @ISA = qw/Exporter/; our $VERSION = 1; ### CONSTRUCTOR ### # FIXME: should the policy be passed in as a string, or some other way? sub new { my $class = shift; my $maximum_size = shift; my $policy = shift // "flexible"; # default to "flexible" # sanity check: sizes <= 0 don't make sense. croak "maximum size must be > 0" unless($maximum_size > 0); # sanity check: ensure a valid policy was passed. croak "invalid policy: $policy" unless($policy eq "strict" or $policy eq "flexible"); # create instance data my $self = { 'maximum-size' => $maximum_size, 'policy' => $policy, 'list' => {}, 'size' => 0, }; # bless instance data into class and return the new object bless $self, $class; return $self; } ### FITNESS ### # returns the worst fitness on the list. sub worst_fitness { my ($self) = @_; return min $self->fitnesses(); } # returns the best fitness on the list. sub best_fitness { my ($self) = @_; return max $self->fitnesses(); } # returns a list of all fitnesses on the list in list context, or the number of fitnesses in scalar context. sub fitnesses { my ($self) = @_; return wantarray ? keys %{ $self->{'list'} } : scalar keys %{ $self->{'list'} }; } ### ITEMS ### # returns a list containing all items on the list. sub _allitems { my ($self) = @_; # list of all items my @allitems = (); # iterate through fitnesses and copy items. foreach my $fitness ($self->fitnesses()) { push @allitems, $self->items($fitness); } # return collected items. return @allitems; } # returns items on the list in list context, or the number of items on the # list in scalar context, restricted to the given fitness if any. sub items { my ($self, $fitness) = @_; # don't do any work in void context return unless defined wantarray; # fitness specified? if(defined $fitness) { # if this fitness doesn't exist at all, return the empty list or 0. unless(exists $self->{'list'}->{$fitness}) { return wantarray ? () : 0; } # fitness exists, return the right data return wantarray ? @{ $self->{'list'}->{$fitness} } : scalar @{ $self->{'list'}->{$fitness} }; } else { # no fitness specified, caller wants all items on list return wantarray ? $self->_allitems() : $self->{'size'}; } } ### ADDING ITEMS ### # add an item to the list with the given fitness # FIXME: should undefined items be forbidden? # FIXME: should we ensure $fitness is a number? sub add { my ($self, $item, $fitness) = @_; croak "Fitness undefined for item $item" unless defined $fitness; if($self->{'policy'} eq "flexible") { $self->_flexible_add($item, $fitness); } else { $self->_strict_add($item, $fitness); } } # add an item to the list with the given fitness, using the "strict" policy. sub _strict_add { my ($self, $item, $fitness) = @_; # easy case: item fits on the list, just put it there. if($self->{'size'} < $self->{'maximum-size'}) { $self->_put($item, $fitness); # item does not fit on list. } else { my $worst_fitness = $self->worst_fitness(); if($fitness < $worst_fitness) { # if fitness is worse than current worst, do nothing. return; } elsif($fitness == $worst_fitness) { # if fitness matchs current worst, do nothing. return; } else { # if fitness exceeds current worst, add item... $self->_put($item, $fitness); # ...and delete one item from worst fitness class. $self->_pop($worst_fitness); } } } # add an item to the list with the given fitness, using the "flexible" policy. sub _flexible_add { my ($self, $item, $fitness) = @_; # easy case: item fits on the list, just put it there. if($self->{'size'} < $self->{'maximum-size'}) { $self->_put($item, $fitness); # item does not fit on list. } else { my $worst_fitness = $self->worst_fitness(); if($fitness < $worst_fitness) { # if fitness is worse than current worst, do nothing. return; } elsif($fitness == $worst_fitness) { # if fitness matchs current worst, simply add item. $self->_put($item, $fitness); } else { # if fitness exceeds current worst, add item... $self->_put($item, $fitness); # ...and delete worst fitness class if we safely can -- i.e. if we # can be sure that doing so doesn't open us up to the # possibility of new members being added to it later on. my $num_worst = $self->items($worst_fitness); if($self->{'size'} - $num_worst >= $self->{'maximum-size'}) { $self->{'size'} -= $self->items($worst_fitness); delete $self->{'list'}->{$worst_fitness}; } } } } # put an item on the list without any further checks. sub _put { my ($self, $item, $fitness) = @_; push @{ $self->{'list'}->{$fitness} }, $item; $self->{'size'}++; } ### REMOVING ITEMS ### # Note: there is no remove(), since removal of items from the list by the # user is explicitely not supported. (For now, unless/until someone # provides a use case, and ideally a patch.) # remove an item with the given fitness from the list (and return it). # No guarantees as to which item you'll get. sub _pop { my ($self, $fitness) = @_; # sanity check: fitness class must exist and not be empty return unless exists $self->{'list'}->{$fitness}; return unless scalar $self->items($fitness); # remove and remember item my $item = pop @{ $self->{'list'}->{$fitness} }; # adjust list size $self->{'size'}--; # if the class the item came from is empty now, remove it entirely delete $self->{'list'}->{$fitness} unless $self->items($fitness); # return removed item return $item; } # obligatory success! 1; #### #!/usr/bin/perl use Modern::Perl '2014'; use English; # make sure STD* uses UTF-8 use open IO => ':encoding(UTF-8)', ':std'; use FindBin; use lib $FindBin::Bin; use List::LimitedSize::Fitness; ### MAIN ### $OUTPUT_AUTOFLUSH = 1; my $num_words = 10; my $list = List::LimitedSize::Fitness->new($num_words, "flexible"); while(<>) { chomp; print "." unless $INPUT_LINE_NUMBER % 10000; $list->add($_, length $_); } say ""; foreach my $fitness (sort { $a <=> $b } $list->fitnesses()) { say "length $fitness"; say join "\n", map { s/_/ /g; "\t" . $_ } $list->items($fitness); } say $list->items() . " words total ($num_words requested)."; #### $ perl longestwords.pl wordsEn.txt .......... length 21 antienvironmentalists antiinstitutionalists counterclassification electroencephalograms electroencephalograph electrotheraputically gastroenterologically internationalizations mechanotheraputically microminiaturizations microradiographically length 22 counterclassifications counterrevolutionaries electroencephalographs electroencephalography length 23 disestablismentarianism electroencephalographic length 25 antidisestablishmentarian length 28 antidisestablishmentarianism 19 words total (10 requested). $