Number of Hashes = 305798
Number of Values = 4892785
Number of Hashes Filtered = 268718
Number of Values Filtered = 3435051
Filtering 88% Hashes
Filtering 70% Values
s/iter map_grep_2 map_grep slice recursive iterative
map_grep_2 6.29 -- -1% -8% -10% -16%
map_grep 6.22 1% -- -7% -9% -15%
slice 5.81 8% 7% -- -2% -9%
recursive 5.68 11% 10% 2% -- -7%
iterative 5.27 19% 18% 10% 8% --
####
Number of Hashes = 405929
Number of Values = 6494881
Number of Hashes Filtered = 203883
Number of Values Filtered = 660915
Filtering 50% Hashes
Filtering 10% Values
s/iter iterative map_grep map_grep_2 slice recursive
iterative 1.67 -- -1% -1% -7% -12%
map_grep 1.65 1% -- -0% -6% -11%
map_grep_2 1.65 1% 0% -- -6% -10%
slice 1.55 7% 6% 6% -- -5%
recursive 1.48 13% 12% 12% 5% --
####
Number of Hashes = 1579338
Number of Values = 6317357
Number of Hashes Filtered = 1369612
Number of Values Filtered = 4362177
Filtering 87% Hashes
Filtering 69% Values
s/iter map_grep_2 slice map_grep recursive iterative
map_grep_2 10.7 -- -0% -3% -6% -6%
slice 10.7 0% -- -3% -6% -6%
map_grep 10.4 3% 3% -- -3% -3%
recursive 10.1 6% 6% 3% -- -0%
iterative 10.0 7% 7% 3% 0% --
####
Number of Hashes = 1226812
Number of Values = 4907253
Number of Hashes Filtered = 492351
Number of Values Filtered = 509410
Filtering 40% Hashes
Filtering 10% Values
s/iter iterative slice map_grep map_grep_2 recursive
iterative 2.56 -- -16% -17% -17% -22%
slice 2.15 19% -- -1% -1% -7%
map_grep 2.13 20% 1% -- -0% -6%
map_grep_2 2.12 20% 1% 0% -- -6%
recursive 1.99 28% 8% 7% 6% --
####
sub hash_slice {
my $n ;
my @keysToGet = keys %{$_[1]};
@{$n}{@keysToGet} = @{$_[0]}{@keysToGet};
foreach ( @keysToGet ) {
if ( ref $_[1]->{ $_ } eq 'HASH' ) {
croak "bad filter: on '$_', expected HASH\n" unless ( ref $_[0]->{ $_ } eq 'HASH' ) ;
$n->{$_} = hash_slice( $_[0]->{$_}, $_[1]->{$_} );
}
}
return $n ;
}
####
#! /usr/bin/env perl
use strict ;
use warnings ;
use Data::Dumper ;
use Carp ;
use Test::More ;
use Benchmark qw(timethese cmpthese) ;
use Deep::Hash::Utils qw(reach nest deepvalue) ;
BEGIN {
sub round {
my $number = shift ;
return int( $number + .5 * ( $number <=> 0 ) ) ;
}
sub _nextUnique {
my $uniqueNmbr = 1 ;
my $uniqueVal = -1 ;
return sub {
my $unique = '' ;
++$uniqueVal ;
if ( ( int ( $uniqueVal / 256**$uniqueNmbr ) ) == 1 ) {
++$uniqueNmbr ;
--$uniqueVal ;
for ( my $i = $uniqueNmbr ; $i > 0 ; --$i ) {
$unique = $unique . chr( 0 ) ;
}
return $unique
}
my $copyUniqueVal = $uniqueVal ;
for ( my $i = $uniqueNmbr ; $i > 0 ; --$i ) {
my $devider = 256**($i - 1) ;
( my $c, $copyUniqueVal ) = ( int $copyUniqueVal / $devider, $copyUniqueVal % $devider ) ;
$unique = chr( $c ) . $unique ;
}
return $unique ;
}
}
*nextUnique = _nextUnique ;
# Test nextUnique
# for ( my $i = 0 ; $i < 65538 ; ++$i ) {
# my $u = nextUnique() ;
# my @ASCII = unpack( "C*", $u ) ;
# print $i . " @ASCII" . "\n" ;
# }
}
sub throwDice {
my $dice = {
# The value and relative chance
1 => 1,
2 => 1,
3 => 1,
4 => 1,
5 => 1
} ;
my $retitems = [] ;
my $total = 5 ;
my $nmbr = 2 ; # Roll two dices
for( my $i = 1; $i <= $nmbr ; ++$i ) {
my $rnd = rand( $total ) ;
my $counter = 0 ;
foreach ( keys %{ $dice } ) {
$counter = $counter + $dice->{ $_ } ;
if ( $rnd < $counter ) {
push( @{ $retitems }, $_ ) ;
last ;
}
}
}
return $retitems ;
}
# Test throwDice
# my $numbers = throwDice() ;
# print "numbers = @{$numbers}\n" ;
my $nH = 0 ; # Number of Hashes
my $nV = 0 ; # Number of Values
sub createHash {
# This sub is super random in nature, will sometimes take a couple of Mb's and other times Gb's
my $depth = $_[0] // 0 ;
++$depth ;
my $s = {} ;
my $f = {} ;
# Test 1 and 2
for ( my $i = 1 ; $i <= 17 ; ++$i ) {
# Test 3 and 4
# for ( my $i = 1 ; $i <= 5 ; ++$i ) {
my $rnd = throwDice() ;
# Test 1 and 2
# Less hashes, more values
if ( $rnd->[0] > 4 && $depth <= 10 ) {
# Test 3 and 4
# More hashes, less values
# if ( $rnd->[0] > 1 && $depth <= 10 ) {
my $key = nextUnique() ;
my $randval = rand( 1 ) ;
# $rnd->[1] > ...
# > 0 means 100% chance to add it in the filter, > 1 = 80%, > 2 = 60%, > 3 = 40%, > 4 = 20%, > 5 = 0%
# Note that this is not the total chance, the filter may be discarded.
# To decrease the chance that an ENTIRE hash branch is rejected in the filter,
# on top of this an additional 10% chance multiplier is added ( See: $randval > 0.1 )
# Test 1 and 3
if ( ( $rnd->[1] > 1 ) || ( $randval > 0.1 ) ) {
# Test 2 and 4
# if ( ( $rnd->[1] > 4 ) || ( $randval > 0.1 ) ) {
++$nH ;
( $s->{ $key }, $f->{ $key } ) = createHash( $depth ) ;
# IF $f->{ $key } IS {} THEN DELETE $f->{ $key } (Not a valid filter for testing)
if ( !(%{$f->{ $key }}) ) {
delete $f->{ $key } ;
}
} else {
++$nH ;
( $s->{ $key }, undef ) = createHash( $depth ) ;
# undef, hash is discarded (not added to the filter)
}
} else {
my $key = nextUnique() ;
my $value = $depth . '.' . $i . "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ;
++$nV ;
$s->{ $key } = $value ;
# $rnd->[1] > ...
# > 0 means 100% chance to add it in the filter, > 1 = 80%, > 2 = 60%, > 3 = 40%, > 4 = 20%, > 5 = 0%
# Note that this is not the total chance, the filter may be discarded
# Test 1 and 3
if ( $rnd->[1] > 1 ) {
# Test 2 and 4
# if ( $rnd->[1] > 4 ) {
$f->{ $key } = 1 ;
}
}
}
return ($s, $f) ;
}
my $nHF = 0 ; # Number of Hashes Filtered
my $nVF = 0 ; # Number of Values Filtered
sub countHashesAndValuesFilter {
my $f = shift ;
foreach ( keys %{ $f } ) {
my $k = $_ ;
if ( ref $f->{ $k } eq 'HASH' ) {
++$nHF;
countHashesAndValuesFilter( $f->{ $k } ) ;
} else {
++$nVF;
}
}
}
my ( $source, $filter ) = createHash() ;
countHashesAndValuesFilter( $filter ) ;
print "Created test hash\n" ;
if ( $nH < 100000 || $nV < 100000 ) {
# Can probably be set higher, but at least it helps a little bit to get better test data
print "Invalid, test again\n" ;
exit(0) ;
}
print "Number of Hashes = $nH\n" ;
print "Number of Values = $nV\n" ;
print "Number of Hashes Filtered = $nHF\n" ;
print "Number of Values Filtered = $nVF\n" ;
print "Filtering " . round(($nHF/$nH)*100) . "% Hashes\n" ;
print "Filtering " . round(($nVF/$nV)*100) . "% Values\n" ;
sleep( 2 ) ;
sub hash_filter_recursive {
my $source = shift;
my $filter = shift;
my %output;
foreach ( keys %$filter ) {
if ( exists $source->{$_} ) {
if ( ref $filter->{$_} eq 'HASH' ) {
croak "bad filter: on '$_', expected HASH\n"
unless ( ref $source->{$_} eq 'HASH' );
$output{$_} = hash_filter_recursive( $source->{$_}, $filter->{$_} );
}
else {
$output{$_} = $source->{$_};
}
}
}
return \%output;
}
sub hash_filter_iterative {
my $source = shift;
my $filter = shift;
my $output = {};
my @queue = ( [ $source, $filter, $output ] );
while ( my $a = shift @queue ) {
my ( $s, $f, $o ) = @{$a};
foreach ( keys %$f ) {
if ( exists $s->{$_} ) {
if ( ref $f->{$_} eq 'HASH' ) {
croak "bad filter: on '$_', expected HASH\n"
unless ( ref $s->{$_} eq 'HASH' );
$o->{$_} = {};
push @queue, [ $s->{$_}, $f->{$_}, $o->{$_} ];
}
else {
$o->{$_} = $s->{$_};
}
}
}
}
return $output;
}
sub hash_filter_delete {
my $source = shift;
my $filter = shift;
foreach ( keys %$source ) {
if ( exists $filter->{$_} ) {
if ( ref $filter->{$_} eq 'HASH' ) {
croak "bad filter: on '$_', expected HASH\n"
unless ( ref $source->{$_} eq 'HASH' );
hash_filter_delete( $source->{$_}, $filter->{$_} );
}
}
else {
delete $source->{$_};
}
}
return $source;
}
sub hash_slice {
# contributed by Veltro
# Veltro: Made more efficient by using @keysToGet instead of another keys instruction. Also added croak in case filter is not valid.
my $n ;
my @keysToGet = keys %{$_[1]};
@{$n}{@keysToGet} = @{$_[0]}{@keysToGet};
foreach ( @keysToGet ) {
if ( ref $_[1]->{ $_ } eq 'HASH' ) {
croak "bad filter: on '$_', expected HASH\n" unless ( ref $_[0]->{ $_ } eq 'HASH' ) ;
$n->{$_} = hash_slice( $_[0]->{$_}, $_[1]->{$_} );
}
}
return $n ;
}
sub map_grep {
# contributed by shmem
my $source = shift;
my $filter = shift;
return {
map {
ref $filter->{$_} eq 'HASH'
? ref $source->{$_} eq 'HASH'
? ( $_, map_grep( $source->{$_}, $filter->{$_} ) )
: croak "bad filter: on '$_', expected HASH\n"
: ( $_, $source->{$_} )
} grep { exists $source->{$_} } keys %$filter
};
}
sub map_grep_2 {
# contributed by shmem
return {
map {
ref $_[1]->{$_} eq 'HASH'
? ref $_[0]->{$_} eq 'HASH'
? ( $_, map_grep_2( $_[0]->{$_}, $_[1]->{$_} ) )
: croak "bad filter: on '$_', expected HASH\n"
: ( $_, $_[0]->{$_} )
} grep { exists $_[0]->{$_} } keys %{ $_[1] }
};
}
sub hash_deep_utils {
# contributed by mr_ron
my ( $source, $filter ) = @_;
my %rc;
while ( my @l = reach($filter) ) {
pop @l;
if ( defined( my $source_val = deepvalue( $source, @l ) ) ) {
# hint: work around nest behavior on even vs odd key count
nest( \%rc, @l )->{ $l[$#l] } = $source_val;
}
}
\%rc;
}
my $t = {
source => $source,
filter => $filter,
output => map_grep_2($source, $filter)
} ;
$nHF = 0 ;
$nVF = 0 ;
# Using this function again to do some additional checks
countHashesAndValuesFilter( $t->{ output } ) ;
print "Values should be the same as filter values:\n" ;
print "Number of hashes in output: $nHF\n" ;
print "Number of values in output: $nVF\n" ;
# Veltro: Removed hash_filter_delete, I don't trust that it will leave the test hash in tact
is_deeply( $_->( $t->{source}, $t->{filter} ), $t->{output} ) foreach ( \&hash_filter_recursive, \&hash_filter_iterative, \&hash_slice, \&map_grep, \&map_grep_2 );
cmpthese(
10,
{
recursive => sub { hash_filter_recursive( $t->{source}, $t->{filter} ) },
iterative => sub { hash_filter_iterative( $t->{source}, $t->{filter} ) },
# delete => sub { hash_filter_delete( $t->{source}, $t->{filter} ) },
map_grep_2 => sub { map_grep_2( $t->{source}, $t->{filter} ) },
map_grep => sub { map_grep( $t->{source}, $t->{filter} ) },
slice => sub { hash_slice( $t->{source}, $t->{filter} ) },
}
);
# Check if the data is still the same after testing
is_deeply( $_->( $t->{source}, $t->{filter} ), $t->{output} ) foreach ( \&hash_filter_recursive, \&hash_filter_iterative, \&hash_slice, \&map_grep, \&map_grep_2 );
done_testing();