Number of Hashes = 305798 Number of Values = 4892785 Number of Hashes Filtered = 268718 Number of Values Filtered = 3435051 Filtering 88% Hashes Filtering 70% Values s/iter map_grep_2 map_grep slice recursive iterative map_grep_2 6.29 -- -1% -8% -10% -16% map_grep 6.22 1% -- -7% -9% -15% slice 5.81 8% 7% -- -2% -9% recursive 5.68 11% 10% 2% -- -7% iterative 5.27 19% 18% 10% 8% -- #### Number of Hashes = 405929 Number of Values = 6494881 Number of Hashes Filtered = 203883 Number of Values Filtered = 660915 Filtering 50% Hashes Filtering 10% Values s/iter iterative map_grep map_grep_2 slice recursive iterative 1.67 -- -1% -1% -7% -12% map_grep 1.65 1% -- -0% -6% -11% map_grep_2 1.65 1% 0% -- -6% -10% slice 1.55 7% 6% 6% -- -5% recursive 1.48 13% 12% 12% 5% -- #### Number of Hashes = 1579338 Number of Values = 6317357 Number of Hashes Filtered = 1369612 Number of Values Filtered = 4362177 Filtering 87% Hashes Filtering 69% Values s/iter map_grep_2 slice map_grep recursive iterative map_grep_2 10.7 -- -0% -3% -6% -6% slice 10.7 0% -- -3% -6% -6% map_grep 10.4 3% 3% -- -3% -3% recursive 10.1 6% 6% 3% -- -0% iterative 10.0 7% 7% 3% 0% -- #### Number of Hashes = 1226812 Number of Values = 4907253 Number of Hashes Filtered = 492351 Number of Values Filtered = 509410 Filtering 40% Hashes Filtering 10% Values s/iter iterative slice map_grep map_grep_2 recursive iterative 2.56 -- -16% -17% -17% -22% slice 2.15 19% -- -1% -1% -7% map_grep 2.13 20% 1% -- -0% -6% map_grep_2 2.12 20% 1% 0% -- -6% recursive 1.99 28% 8% 7% 6% -- #### sub hash_slice { my $n ; my @keysToGet = keys %{$_[1]}; @{$n}{@keysToGet} = @{$_[0]}{@keysToGet}; foreach ( @keysToGet ) { if ( ref $_[1]->{ $_ } eq 'HASH' ) { croak "bad filter: on '$_', expected HASH\n" unless ( ref $_[0]->{ $_ } eq 'HASH' ) ; $n->{$_} = hash_slice( $_[0]->{$_}, $_[1]->{$_} ); } } return $n ; } #### #! /usr/bin/env perl use strict ; use warnings ; use Data::Dumper ; use Carp ; use Test::More ; use Benchmark qw(timethese cmpthese) ; use Deep::Hash::Utils qw(reach nest deepvalue) ; BEGIN { sub round { my $number = shift ; return int( $number + .5 * ( $number <=> 0 ) ) ; } sub _nextUnique { my $uniqueNmbr = 1 ; my $uniqueVal = -1 ; return sub { my $unique = '' ; ++$uniqueVal ; if ( ( int ( $uniqueVal / 256**$uniqueNmbr ) ) == 1 ) { ++$uniqueNmbr ; --$uniqueVal ; for ( my $i = $uniqueNmbr ; $i > 0 ; --$i ) { $unique = $unique . chr( 0 ) ; } return $unique } my $copyUniqueVal = $uniqueVal ; for ( my $i = $uniqueNmbr ; $i > 0 ; --$i ) { my $devider = 256**($i - 1) ; ( my $c, $copyUniqueVal ) = ( int $copyUniqueVal / $devider, $copyUniqueVal % $devider ) ; $unique = chr( $c ) . $unique ; } return $unique ; } } *nextUnique = _nextUnique ; # Test nextUnique # for ( my $i = 0 ; $i < 65538 ; ++$i ) { # my $u = nextUnique() ; # my @ASCII = unpack( "C*", $u ) ; # print $i . " @ASCII" . "\n" ; # } } sub throwDice { my $dice = { # The value and relative chance 1 => 1, 2 => 1, 3 => 1, 4 => 1, 5 => 1 } ; my $retitems = [] ; my $total = 5 ; my $nmbr = 2 ; # Roll two dices for( my $i = 1; $i <= $nmbr ; ++$i ) { my $rnd = rand( $total ) ; my $counter = 0 ; foreach ( keys %{ $dice } ) { $counter = $counter + $dice->{ $_ } ; if ( $rnd < $counter ) { push( @{ $retitems }, $_ ) ; last ; } } } return $retitems ; } # Test throwDice # my $numbers = throwDice() ; # print "numbers = @{$numbers}\n" ; my $nH = 0 ; # Number of Hashes my $nV = 0 ; # Number of Values sub createHash { # This sub is super random in nature, will sometimes take a couple of Mb's and other times Gb's my $depth = $_[0] // 0 ; ++$depth ; my $s = {} ; my $f = {} ; # Test 1 and 2 for ( my $i = 1 ; $i <= 17 ; ++$i ) { # Test 3 and 4 # for ( my $i = 1 ; $i <= 5 ; ++$i ) { my $rnd = throwDice() ; # Test 1 and 2 # Less hashes, more values if ( $rnd->[0] > 4 && $depth <= 10 ) { # Test 3 and 4 # More hashes, less values # if ( $rnd->[0] > 1 && $depth <= 10 ) { my $key = nextUnique() ; my $randval = rand( 1 ) ; # $rnd->[1] > ... # > 0 means 100% chance to add it in the filter, > 1 = 80%, > 2 = 60%, > 3 = 40%, > 4 = 20%, > 5 = 0% # Note that this is not the total chance, the filter may be discarded. # To decrease the chance that an ENTIRE hash branch is rejected in the filter, # on top of this an additional 10% chance multiplier is added ( See: $randval > 0.1 ) # Test 1 and 3 if ( ( $rnd->[1] > 1 ) || ( $randval > 0.1 ) ) { # Test 2 and 4 # if ( ( $rnd->[1] > 4 ) || ( $randval > 0.1 ) ) { ++$nH ; ( $s->{ $key }, $f->{ $key } ) = createHash( $depth ) ; # IF $f->{ $key } IS {} THEN DELETE $f->{ $key } (Not a valid filter for testing) if ( !(%{$f->{ $key }}) ) { delete $f->{ $key } ; } } else { ++$nH ; ( $s->{ $key }, undef ) = createHash( $depth ) ; # undef, hash is discarded (not added to the filter) } } else { my $key = nextUnique() ; my $value = $depth . '.' . $i . "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ; ++$nV ; $s->{ $key } = $value ; # $rnd->[1] > ... # > 0 means 100% chance to add it in the filter, > 1 = 80%, > 2 = 60%, > 3 = 40%, > 4 = 20%, > 5 = 0% # Note that this is not the total chance, the filter may be discarded # Test 1 and 3 if ( $rnd->[1] > 1 ) { # Test 2 and 4 # if ( $rnd->[1] > 4 ) { $f->{ $key } = 1 ; } } } return ($s, $f) ; } my $nHF = 0 ; # Number of Hashes Filtered my $nVF = 0 ; # Number of Values Filtered sub countHashesAndValuesFilter { my $f = shift ; foreach ( keys %{ $f } ) { my $k = $_ ; if ( ref $f->{ $k } eq 'HASH' ) { ++$nHF; countHashesAndValuesFilter( $f->{ $k } ) ; } else { ++$nVF; } } } my ( $source, $filter ) = createHash() ; countHashesAndValuesFilter( $filter ) ; print "Created test hash\n" ; if ( $nH < 100000 || $nV < 100000 ) { # Can probably be set higher, but at least it helps a little bit to get better test data print "Invalid, test again\n" ; exit(0) ; } print "Number of Hashes = $nH\n" ; print "Number of Values = $nV\n" ; print "Number of Hashes Filtered = $nHF\n" ; print "Number of Values Filtered = $nVF\n" ; print "Filtering " . round(($nHF/$nH)*100) . "% Hashes\n" ; print "Filtering " . round(($nVF/$nV)*100) . "% Values\n" ; sleep( 2 ) ; sub hash_filter_recursive { my $source = shift; my $filter = shift; my %output; foreach ( keys %$filter ) { if ( exists $source->{$_} ) { if ( ref $filter->{$_} eq 'HASH' ) { croak "bad filter: on '$_', expected HASH\n" unless ( ref $source->{$_} eq 'HASH' ); $output{$_} = hash_filter_recursive( $source->{$_}, $filter->{$_} ); } else { $output{$_} = $source->{$_}; } } } return \%output; } sub hash_filter_iterative { my $source = shift; my $filter = shift; my $output = {}; my @queue = ( [ $source, $filter, $output ] ); while ( my $a = shift @queue ) { my ( $s, $f, $o ) = @{$a}; foreach ( keys %$f ) { if ( exists $s->{$_} ) { if ( ref $f->{$_} eq 'HASH' ) { croak "bad filter: on '$_', expected HASH\n" unless ( ref $s->{$_} eq 'HASH' ); $o->{$_} = {}; push @queue, [ $s->{$_}, $f->{$_}, $o->{$_} ]; } else { $o->{$_} = $s->{$_}; } } } } return $output; } sub hash_filter_delete { my $source = shift; my $filter = shift; foreach ( keys %$source ) { if ( exists $filter->{$_} ) { if ( ref $filter->{$_} eq 'HASH' ) { croak "bad filter: on '$_', expected HASH\n" unless ( ref $source->{$_} eq 'HASH' ); hash_filter_delete( $source->{$_}, $filter->{$_} ); } } else { delete $source->{$_}; } } return $source; } sub hash_slice { # contributed by Veltro # Veltro: Made more efficient by using @keysToGet instead of another keys instruction. Also added croak in case filter is not valid. my $n ; my @keysToGet = keys %{$_[1]}; @{$n}{@keysToGet} = @{$_[0]}{@keysToGet}; foreach ( @keysToGet ) { if ( ref $_[1]->{ $_ } eq 'HASH' ) { croak "bad filter: on '$_', expected HASH\n" unless ( ref $_[0]->{ $_ } eq 'HASH' ) ; $n->{$_} = hash_slice( $_[0]->{$_}, $_[1]->{$_} ); } } return $n ; } sub map_grep { # contributed by shmem my $source = shift; my $filter = shift; return { map { ref $filter->{$_} eq 'HASH' ? ref $source->{$_} eq 'HASH' ? ( $_, map_grep( $source->{$_}, $filter->{$_} ) ) : croak "bad filter: on '$_', expected HASH\n" : ( $_, $source->{$_} ) } grep { exists $source->{$_} } keys %$filter }; } sub map_grep_2 { # contributed by shmem return { map { ref $_[1]->{$_} eq 'HASH' ? ref $_[0]->{$_} eq 'HASH' ? ( $_, map_grep_2( $_[0]->{$_}, $_[1]->{$_} ) ) : croak "bad filter: on '$_', expected HASH\n" : ( $_, $_[0]->{$_} ) } grep { exists $_[0]->{$_} } keys %{ $_[1] } }; } sub hash_deep_utils { # contributed by mr_ron my ( $source, $filter ) = @_; my %rc; while ( my @l = reach($filter) ) { pop @l; if ( defined( my $source_val = deepvalue( $source, @l ) ) ) { # hint: work around nest behavior on even vs odd key count nest( \%rc, @l )->{ $l[$#l] } = $source_val; } } \%rc; } my $t = { source => $source, filter => $filter, output => map_grep_2($source, $filter) } ; $nHF = 0 ; $nVF = 0 ; # Using this function again to do some additional checks countHashesAndValuesFilter( $t->{ output } ) ; print "Values should be the same as filter values:\n" ; print "Number of hashes in output: $nHF\n" ; print "Number of values in output: $nVF\n" ; # Veltro: Removed hash_filter_delete, I don't trust that it will leave the test hash in tact is_deeply( $_->( $t->{source}, $t->{filter} ), $t->{output} ) foreach ( \&hash_filter_recursive, \&hash_filter_iterative, \&hash_slice, \&map_grep, \&map_grep_2 ); cmpthese( 10, { recursive => sub { hash_filter_recursive( $t->{source}, $t->{filter} ) }, iterative => sub { hash_filter_iterative( $t->{source}, $t->{filter} ) }, # delete => sub { hash_filter_delete( $t->{source}, $t->{filter} ) }, map_grep_2 => sub { map_grep_2( $t->{source}, $t->{filter} ) }, map_grep => sub { map_grep( $t->{source}, $t->{filter} ) }, slice => sub { hash_slice( $t->{source}, $t->{filter} ) }, } ); # Check if the data is still the same after testing is_deeply( $_->( $t->{source}, $t->{filter} ), $t->{output} ) foreach ( \&hash_filter_recursive, \&hash_filter_iterative, \&hash_slice, \&map_grep, \&map_grep_2 ); done_testing();