in reply to Re: Hash checking
in thread Hash checking

No need to use a hash ...

unless you are concerned with execution time. grep performs a linear search through the entire array each time through the loop so the search is O(n2). A hash performs an essentially constant time lookup so the search is O(n).


DWIM is Perl's answer to Gödel

Replies are listed 'Best First'.
Re^3: Hash checking
by njcodewarrior (Pilgrim) on Apr 28, 2007 at 18:28 UTC

    Thanks for the reply GrandFather. Not that I didn't believe you, but here's the proof:

    #! /usr/bin/perl
    
    use strict;
    use warnings;
    
    use File::Spec;
    use Data::Dumper;
    use Benchmark qw( timethese cmpthese );
    
    
    my ( undef, undef, $app ) = File::Spec->splitpath( $0 );
    
    open my $DATA, '<', './AXP_FACS.DAT' or die "Error opening file: $!";
    my @faclist = (<$DATA>);
    chomp @faclist;
    close $DATA;
    
    sub grep_by_array {
        my ( $ref ) = @_;
        my @faclist = @$ref;
        my @found;
        foreach my $integer ( 1..5000 ) {
            if ( grep { /\b$integer\b/ } @faclist ) {
                unshift @found, $integer;
            }
        }
    
        return \@found;
    
    }
    
    # Convert the array to a hash with the numbers as keys
    my %list = map { $_ => 1 } @faclist;
    
    sub grep_by_hash {
        my ( $ref ) = @_;
        my %faclist = %$ref;
        my @found;
        foreach my $integer( 1..5000 ) {
            if ( exists $faclist{$integer} ) {
                unshift @found, $integer;
            }
        }
    
        return \@found;
    
    }
    
    # Benchmark the 2 subs
    my $r = timethese( 1000, {
            'array' => sub{ grep_by_array(\@faclist) },
            'hash'  => sub{ grep_by_hash(\%list) },
        }
    );
    
    cmpthese( $r );
    

    RESULTS:

    Benchmark: timing 5000 iterations of array, hash...
         array: 339 wallclock secs (339.02 usr +  0.04 sys = 339.06 CPU) @ 14.75/s (n=5000)
          hash:  8 wallclock secs ( 8.27 usr +  0.00 sys =  8.27 CPU) @ 604.59/s (n=5000)
            Rate array  hash
    array 14.7/s    --  -98%
    hash   605/s 4000%    --
    

    That's quite an improvement using a hash!
    You learn something every day...

    njcodewarrior