Number of Hashes = 305798 Number of Values = 4892785 Number of Hashes Filtered = 268718 Number of Values Filtered = 3435051 Filtering 88% Hashes Filtering 70% Values s/iter map_grep_2 map_grep slice recursive iterative map_grep_2 6.29 -- -1% -8% -10% -16% map_grep 6.22 1% -- -7% -9% -15% slice 5.81 8% 7% -- -2% -9% recursive 5.68 11% 10% 2% -- -7% iterative 5.27 19% 18% 10% 8% -- ##

##

Number of Hashes = 405929
Number of Values = 6494881
Number of Hashes Filtered = 203883
Number of Values Filtered = 660915
Filtering 50% Hashes
Filtering 10% Values
           s/iter  iterative   map_grep map_grep_2      slice  recursive
iterative    1.67         --        -1%        -1%        -7%       -12%
map_grep     1.65         1%         --        -0%        -6%       -11%
map_grep_2   1.65         1%         0%         --        -6%       -10%
slice        1.55         7%         6%         6%         --        -5%
recursive    1.48        13%        12%        12%         5%         --


##

##

Number of Hashes = 1579338
Number of Values = 6317357
Number of Hashes Filtered = 1369612
Number of Values Filtered = 4362177
Filtering 87% Hashes
Filtering 69% Values
           s/iter map_grep_2      slice   map_grep  recursive  iterative
map_grep_2   10.7         --        -0%        -3%        -6%        -6%
slice        10.7         0%         --        -3%        -6%        -6%
map_grep     10.4         3%         3%         --        -3%        -3%
recursive    10.1         6%         6%         3%         --        -0%
iterative    10.0         7%         7%         3%         0%         --


##

##

Number of Hashes = 1226812
Number of Values = 4907253
Number of Hashes Filtered = 492351
Number of Values Filtered = 509410
Filtering 40% Hashes
Filtering 10% Values
           s/iter  iterative      slice   map_grep map_grep_2  recursive
iterative    2.56         --       -16%       -17%       -17%       -22%
slice        2.15        19%         --        -1%        -1%        -7%
map_grep     2.13        20%         1%         --        -0%        -6%
map_grep_2   2.12        20%         1%         0%         --        -6%
recursive    1.99        28%         8%         7%         6%         --


##

##

sub hash_slice {
	my $n ;
	my @keysToGet = keys %{$_[1]};
	@{$n}{@keysToGet} = @{$_[0]}{@keysToGet};
	foreach ( @keysToGet ) {
		if ( ref $_[1]->{ $_ } eq 'HASH' ) {
			croak "bad filter: on '$_', expected HASH\n" unless ( ref $_[0]->{ $_ } eq 'HASH' ) ;
			$n->{$_} = hash_slice( $_[0]->{$_}, $_[1]->{$_} );
		}
	}
	return $n ;
}


##

##

#! /usr/bin/env perl

use strict ;
use warnings ;
use Data::Dumper ;

use Carp ;
use Test::More ;
use Benchmark qw(timethese cmpthese) ;

use Deep::Hash::Utils qw(reach nest deepvalue) ;

BEGIN {
	sub round {
	   my $number = shift ;
	   return int( $number + .5 * ( $number <=> 0 ) ) ;
	}
	sub _nextUnique {
		my $uniqueNmbr = 1 ;
		my $uniqueVal = -1 ;
		return sub {	
			my $unique = '' ;
			++$uniqueVal ;
			if ( ( int ( $uniqueVal / 256**$uniqueNmbr ) ) == 1 ) {
				++$uniqueNmbr ;
				--$uniqueVal ;
				for ( my $i = $uniqueNmbr ; $i > 0 ; --$i ) {
					$unique = $unique . chr( 0 ) ;
				}
				return $unique
			}
			my $copyUniqueVal = $uniqueVal ;
			for ( my $i = $uniqueNmbr ; $i > 0 ; --$i ) {
				my $devider = 256**($i - 1) ;
				( my $c, $copyUniqueVal ) = ( int $copyUniqueVal / $devider, $copyUniqueVal % $devider ) ;
				$unique = chr( $c ) . $unique ;
			}
			return $unique ;
		}
	}
	*nextUnique = _nextUnique ;
	
	# Test nextUnique
	# for ( my $i = 0 ; $i < 65538 ; ++$i ) {
	# 	my $u = nextUnique() ;
	# 	my @ASCII = unpack( "C*", $u ) ;
	# 	print $i . " @ASCII" . "\n" ;
	# }
}

sub throwDice {
	my $dice = {
		# The value and relative chance
		1 => 1,
		2 => 1,
		3 => 1,
		4 => 1,
		5 => 1
	} ;
	my $retitems = [] ;
	my $total = 5 ;
	my $nmbr = 2 ; # Roll two dices
	for( my $i = 1; $i <= $nmbr ; ++$i ) {
		my $rnd = rand( $total ) ;
		my $counter = 0 ;
		foreach ( keys %{ $dice } ) {
			$counter = $counter + $dice->{ $_ } ;
			if ( $rnd < $counter ) {
				push( @{ $retitems }, $_ ) ;
				last ;
			}
		}
	}
	return $retitems ;
}
# Test throwDice
# my $numbers = throwDice() ;
# print "numbers = @{$numbers}\n" ;

my $nH = 0 ; # Number of Hashes
my $nV = 0 ; # Number of Values
sub createHash {
	# This sub is super random in nature, will sometimes take a couple of Mb's and other times Gb's
	my $depth = $_[0] // 0 ;
	++$depth ;
	my $s = {} ;
	my $f = {} ;

	# Test 1 and 2
	for ( my $i = 1 ; $i <= 17 ; ++$i ) {

	# Test 3 and 4
	# for ( my $i = 1 ; $i <= 5 ; ++$i ) {

		my $rnd = throwDice() ;

		# Test 1 and 2
		# Less hashes, more values
		if ( $rnd->[0] > 4 && $depth <= 10 ) {
		
		# Test 3 and 4
		# More hashes, less values
		# if ( $rnd->[0] > 1 && $depth <= 10 ) {
		
			my $key = nextUnique() ;
			my $randval = rand( 1 ) ;

			# $rnd->[1] > ...
			# > 0 means 100% chance to add it in the filter, > 1 = 80%, > 2 = 60%, > 3 = 40%, > 4 = 20%, > 5 = 0%
			# Note that this is not the total chance, the filter may be discarded.
			# To decrease the chance that an ENTIRE hash branch is rejected in the filter,
			# on top of this an additional 10% chance multiplier is added ( See: $randval > 0.1 )

			# Test 1 and 3
			if ( ( $rnd->[1] > 1 ) || ( $randval > 0.1 ) ) {
			
			# Test 2 and 4
			# if ( ( $rnd->[1] > 4 ) || ( $randval > 0.1 ) ) {

				++$nH ;
				( $s->{ $key }, $f->{ $key } ) = createHash( $depth ) ;
				# IF $f->{ $key } IS {} THEN DELETE $f->{ $key } (Not a valid filter for testing)
				if ( !(%{$f->{ $key }}) ) {
					delete $f->{ $key } ;
				}
			} else {
				++$nH ;
				( $s->{ $key }, undef ) = createHash( $depth ) ;
				# undef, hash is discarded (not added to the filter)
			}
		} else {
			my $key = nextUnique() ;
			my $value = $depth . '.' . $i . "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ;
			++$nV ;
			$s->{ $key } = $value ;

			# $rnd->[1] > ...
			# > 0 means 100% chance to add it in the filter, > 1 = 80%, > 2 = 60%, > 3 = 40%, > 4 = 20%, > 5 = 0%
			# Note that this is not the total chance, the filter may be discarded

			# Test 1 and 3
			if ( $rnd->[1] > 1 ) {

			# Test 2 and 4
			# if ( $rnd->[1] > 4 ) {

				$f->{ $key } = 1 ;
			}
		}
	}
	return ($s, $f) ;
}

my $nHF = 0 ; # Number of Hashes Filtered
my $nVF = 0 ; # Number of Values Filtered
sub countHashesAndValuesFilter {
	my $f = shift ;
	foreach ( keys %{ $f } ) {
		my $k = $_ ;
		if ( ref $f->{ $k } eq 'HASH' ) {
			++$nHF;
			countHashesAndValuesFilter( $f->{ $k } ) ;
		} else {
			++$nVF;
		}
	}
}

my ( $source, $filter ) = createHash() ;
countHashesAndValuesFilter( $filter ) ;
print "Created test hash\n" ;
if ( $nH < 100000 || $nV < 100000 ) {
	# Can probably be set higher, but at least it helps a little bit to get better test data
	print "Invalid, test again\n" ;
	exit(0) ;
}
print "Number of Hashes = $nH\n" ;
print "Number of Values = $nV\n" ;
print "Number of Hashes Filtered = $nHF\n" ;
print "Number of Values Filtered = $nVF\n" ;
print "Filtering " . round(($nHF/$nH)*100) . "% Hashes\n" ;
print "Filtering " . round(($nVF/$nV)*100) . "% Values\n" ;
sleep( 2 ) ;

sub hash_filter_recursive {
  my $source = shift;
  my $filter = shift;

  my %output;

  foreach ( keys %$filter ) {
    if ( exists $source->{$_} ) {

      if ( ref $filter->{$_} eq 'HASH' ) {
        croak "bad filter: on '$_', expected HASH\n"
          unless ( ref $source->{$_} eq 'HASH' );

        $output{$_} = hash_filter_recursive( $source->{$_}, $filter->{$_} );
      }
      else {
        $output{$_} = $source->{$_};
      }

    }
  }

  return \%output;
}

sub hash_filter_iterative {

  my $source = shift;
  my $filter = shift;

  my $output = {};

  my @queue = ( [ $source, $filter, $output ] );

  while ( my $a = shift @queue ) {
    my ( $s, $f, $o ) = @{$a};
    foreach ( keys %$f ) {
      if ( exists $s->{$_} ) {

        if ( ref $f->{$_} eq 'HASH' ) {
          croak "bad filter: on '$_', expected HASH\n"
            unless ( ref $s->{$_} eq 'HASH' );

          $o->{$_} = {};
          push @queue, [ $s->{$_}, $f->{$_}, $o->{$_} ];
        }
        else {
          $o->{$_} = $s->{$_};
        }

      }
    }
  }

  return $output;
}

sub hash_filter_delete {
  my $source = shift;
  my $filter = shift;

  foreach ( keys %$source ) {
    if ( exists $filter->{$_} ) {

      if ( ref $filter->{$_} eq 'HASH' ) {
        croak "bad filter: on '$_', expected HASH\n"
          unless ( ref $source->{$_} eq 'HASH' );

        hash_filter_delete( $source->{$_}, $filter->{$_} );
      }

    }
    else {

      delete $source->{$_};

    }
  }

  return $source;
}

sub hash_slice {

	# contributed by Veltro
	
	# Veltro: Made more efficient by using @keysToGet instead of another keys instruction. Also added croak in case filter is not valid.

	my $n ;
	my @keysToGet = keys %{$_[1]};
	@{$n}{@keysToGet} = @{$_[0]}{@keysToGet};
	foreach ( @keysToGet ) {
		if ( ref $_[1]->{ $_ } eq 'HASH' ) {
			croak "bad filter: on '$_', expected HASH\n" unless ( ref $_[0]->{ $_ } eq 'HASH' ) ;
			$n->{$_} = hash_slice( $_[0]->{$_}, $_[1]->{$_} );
		}
	}
	return $n ;
}

sub map_grep {

  # contributed by shmem

  my $source = shift;
  my $filter = shift;

  return {
    map {
          ref $filter->{$_} eq 'HASH'
        ? ref $source->{$_} eq 'HASH'
          ? ( $_, map_grep( $source->{$_}, $filter->{$_} ) )
          : croak "bad filter: on '$_', expected HASH\n"
        : ( $_, $source->{$_} )
    } grep { exists $source->{$_} } keys %$filter
  };
}

sub map_grep_2 {

  # contributed by shmem

  return {
    map {
          ref $_[1]->{$_} eq 'HASH'
        ? ref $_[0]->{$_} eq 'HASH'
          ? ( $_, map_grep_2( $_[0]->{$_}, $_[1]->{$_} ) )
          : croak "bad filter: on '$_', expected HASH\n"
        : ( $_, $_[0]->{$_} )
    } grep { exists $_[0]->{$_} } keys %{ $_[1] }
  };
}

sub hash_deep_utils {

  # contributed by mr_ron

  my ( $source, $filter ) = @_;
  my %rc;

  while ( my @l = reach($filter) ) {
    pop @l;
    if ( defined( my $source_val = deepvalue( $source, @l ) ) ) {

      # hint: work around nest behavior on even vs odd key count
      nest( \%rc, @l )->{ $l[$#l] } = $source_val;
    }
  }

  \%rc;
}

my $t = {
  source => $source,
  filter => $filter,
  output => map_grep_2($source, $filter)
} ;

$nHF = 0 ;
$nVF = 0 ;
# Using this function again to do some additional checks
countHashesAndValuesFilter( $t->{ output } ) ;
print "Values should be the same as filter values:\n" ;
print "Number of hashes in output: $nHF\n" ;
print "Number of values in output: $nVF\n" ;

# Veltro: Removed hash_filter_delete, I don't trust that it will leave the test hash in tact
is_deeply( $_->( $t->{source}, $t->{filter} ), $t->{output} ) foreach ( \&hash_filter_recursive, \&hash_filter_iterative, \&hash_slice, \&map_grep, \&map_grep_2 );

cmpthese(
  10,
  {
    recursive  => sub { hash_filter_recursive( $t->{source}, $t->{filter} ) },
    iterative  => sub { hash_filter_iterative( $t->{source}, $t->{filter} ) },
    # delete     => sub { hash_filter_delete( $t->{source},    $t->{filter} ) },
    map_grep_2 => sub { map_grep_2( $t->{source},            $t->{filter} ) },
	map_grep   => sub { map_grep( $t->{source},              $t->{filter} ) },
    slice      => sub { hash_slice( $t->{source},            $t->{filter} ) },
  }
);

# Check if the data is still the same after testing
is_deeply( $_->( $t->{source}, $t->{filter} ), $t->{output} ) foreach ( \&hash_filter_recursive, \&hash_filter_iterative, \&hash_slice, \&map_grep, \&map_grep_2 );

done_testing();