Category: Utility
Author/Contact Info Michael K. Neylon (mneylon-pm@masemware.com)
Description: Merges two arbitarily deep/nested hashes. Note that as of this current version, there may be problems with circular references, something that I wish to fix at some point, but at the current time, I'm looking for feedback while I think of a way to overcome this problem.

As always, comments and critique are appriciated. I am looking for comments for I upload this to CPAN.

#!/usr/bin/perl -w

package Hash::Merge;

#=====================================================================
+========
#
# $Id: Merge.pm,v 0.01.1.1 2001/10/23 03:01:34 mneylon Exp $
# $Revision: 0.01.1.1 $
# $Author: mneylon $
# $Date: 2001/10/23 03:01:34 $
# $Log: Merge.pm,v $
# Revision 0.01.1.1  2001/10/23 03:01:34  mneylon
# Slight fixes
#
# Revision 0.01  2001/10/23 03:00:21  mneylon
# Initial Release to PerlMonks
#
#
#=====================================================================
+========

use strict;
#use warnings;

BEGIN {
    use Exporter   ();
    use vars       qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
    $VERSION     = sprintf( "%d.%02d", q($Revision: 0.01.1.1 $) =~ /\s
+(\d+)\.(\d+)/ );
    @ISA         = qw(Exporter);
    @EXPORT      = qw();
    @EXPORT_OK   = qw( merge );
    %EXPORT_TAGS = ( );
}

my %left_precedent = (
    SCALAR => {
        SCALAR => sub { $_[0] },
        ARRAY  => sub { $_[0] },
        HASH   => sub { $_[0] } },
    ARRAY => {
        SCALAR => sub { [ @{$_[0]}, $_[1] ] },
        ARRAY  => sub { [ @{$_[0]}, @{$_[1]} ] },
        HASH   => sub { [ @{$_[0]}, values %{$_[1]} ] } },
    HASH => {
        SCALAR => sub { $_[0] },
        ARRAY  => sub { $_[0] },
        HASH   => sub { _merge_hashes( $_[0], $_[1] ) } }
);

my %right_precedent = (
    SCALAR => {
        SCALAR => sub { $_[1] },
        ARRAY  => sub { [ $_[0], @{$_[1]} ] },
        HASH   => sub { $_[1] } },
    ARRAY => {
        SCALAR => sub { $_[1] },
        ARRAY  => sub { [ @{$_[0]}, @{$_[1]} ] },
        HASH   => sub { $_[1] } },
    HASH => {
        SCALAR => sub { $_[1] },
        ARRAY  => sub { [ values %{$_[0]}, @{$_[1]} ] },
        HASH   => sub { _merge_hashes( $_[0], $_[1] ) } }
);

my %storage_precedent = (
    SCALAR => {
        SCALAR => sub { $_[0] },
        ARRAY  => sub { [ $_[0], @{$_[1]} ] },
        HASH   => sub { $_[1] } },
    ARRAY => {
        SCALAR => sub { [ @{$_[0]}, $_[1] ] },
        ARRAY  => sub { [ @{$_[0]}, @{$_[1]} ] },
        HASH   => sub { $_[1] } },
    HASH => {
        SCALAR => sub { $_[0] },
        ARRAY  => sub { $_[0] },
        HASH   => sub { _merge_hashes( $_[0], $_[1] ) } }
);

my %retainment_precedent = (
    SCALAR => {
        SCALAR => sub { [ $_[0], $_[1] ] },
        ARRAY  => sub { [ $_[0], @{$_[1]} ] },
        HASH   => sub { _merge_hashes( _hashify( $_[0] ), $_[1] ) } },
    ARRAY => {
        SCALAR => sub { [ @{$_[0]}, $_[1] ] },
        ARRAY  => sub { [ @{$_[0]}, @{$_[1]} ] },
        HASH   => sub { _merge_hashes( _hashify( $_[0] ), $_[1] ) } },
    HASH => {
        SCALAR => sub { _merge_hashes( $_[0], _hashify( $_[1] ) ) },
        ARRAY  => sub { _merge_hashes( $_[0], _hashify( $_[1] ) ) },
        HASH   => sub { _merge_hashes( $_[0], $_[1] ) } }
);

my %behaviors = (
    LEFT_PRECEDENT => \%left_precedent,
    RIGHT_PRECEDENT => \%right_precedent,
    STORAGE_PRECEDENT => \%storage_precedent,
    RETAINMENT_PRECEDENT => \%retainment_precedent 
);

my $merge_behavior = 'LEFT_PRECEDENT';
my $merge_matrix = \%{ $behaviors{ $merge_behavior } };

sub set_behavior {
    my $value = uc(shift);
    die "Behavior must be one of : " , join ' ', keys %behaviors 
        unless exists $behaviors{ $value };
    $merge_behavior = $value;
    $merge_matrix = \%{ $behaviors{ $merge_behavior } };
}

sub get_behavior {
    return $merge_behavior;
}

sub specify_behavior {
    my $matrix = shift;
    my $name = shift || "user defined";
    my @required = qw ( SCALAR ARRAY HASH );

    foreach my $left ( @required ) {
        foreach my $right ( @required ) {
            die "Behavior does not specify action for $left merging wi
+th $right"
                unless exists $matrix->{ $left }->{ $right };
        }
    }

    $merge_behavior = $name;
    $merge_matrix = $matrix;
}

sub merge {
    my ( $left, $right ) = ( shift, shift );

    my ( $lefttype, $righttype );
    if ( UNIVERSAL::isa( $left, 'HASH' ) ) { 
        $lefttype = 'HASH';
    } elsif ( UNIVERSAL::isa( $left, 'ARRAY' ) ) {
        $lefttype = 'ARRAY';
    } else {
        $lefttype = 'SCALAR';
    }

    if ( UNIVERSAL::isa( $right, 'HASH' ) ) { 
        $righttype = 'HASH';
    } elsif ( UNIVERSAL::isa( $right, 'ARRAY' ) ) {
        $righttype = 'ARRAY';
    } else {
        $righttype = 'SCALAR';
    }
    
    return &{ $merge_matrix->{ $lefttype }->{ $righttype }}
        ( $left, $right );
}    

# This does a straight merge of hashes, delegating the merge-specific 
# work to 'merge'

sub _merge_hashes {
    my ( $left, $right ) = ( shift, shift );
    die "Arguments for _merge_hashes must be hash references" unless 
        UNIVERSAL::isa( $left, 'HASH' ) && UNIVERSAL::isa( $right, 'HA
+SH' );
    
    my %newhash;
    foreach my $leftkey ( keys %$left ) {
        if ( exists $right->{ $leftkey } ) {
            $newhash{ $leftkey } = 
                merge ( $left->{ $leftkey }, $right->{ $leftkey } )
        } else {
            $newhash{ $leftkey } = $left->{ $leftkey };
        }
    }
    foreach my $rightkey ( keys %$right ) { 
        if ( !exists $left->{ $rightkey } ) {
            $newhash{ $rightkey } = $right->{ $rightkey }
        }
    }
    return \%newhash;
}

# Given a scalar or an array, creates a new hash where for each item i
+n
# the passed scalar or array, the key is equal to the value.  Returns
# this new hash
 
sub _hashify {
    my $arg = shift;
    die "Arguement for _hashify must not be a HASH ref" if
        UNIVERSAL::isa( $arg, 'HASH' );
    
    my %newhash;
    if ( UNIVERSAL::isa( $arg, 'ARRAY' ) ) {
        foreach my $item ( @$arg ) {
            $newhash{ $item } = $item;
        }
    } else {
        $newhash{ $arg } = $arg;
    }
    return \%newhash;
}

1;
__END__

=head1 NAME

Hash::Merge - Merges artibrarily deep hashes into a single hash

=head1 SYNOPSIS

  use Hash::Merge qw( merge );
  my %a = ( foo => 1,
            bar => [ a, b, e ],
            querty => { bob => alice } );
  my %b = ( foo => 2, 
            bar => [ c, d ],
            querty => { ted => margeret } );

  my %c = merge( \%a, \%b );

  Hash::Merge->set_behavior( RIGHT_PRECEDENCE );

  # This is the same as above

  Hash::Merge->specify_behavior( {
      SCALAR => {
        SCALAR => sub { $_[1] },
        ARRAY  => sub { [ $_[0], @{$_[1]} ] },
        HASH   => sub { $_[1] } },
    ARRAY => {
        SCALAR => sub { $_[1] },
        ARRAY  => sub { [ @{$_[0]}, @{$_[1]} ] },
        HASH   => sub { $_[1] } },
    HASH => {
        SCALAR => sub { $_[1] },
        ARRAY  => sub { [ values %{$_[0]}, @{$_[1]} ] },
        HASH   => sub { Hash::Merge::_merge_hashes( $_[0], $_[1] ) } }
  }, "My Behavior" );

=head1 DESCRIPTION

Hash::Merge merges two arbitrarily deep hashes into a single hash.  Th
+at
is, at any level, it will add non-conflicting key-value pairs from one
hash to the other, and follows a set of specific rules when there are 
+key
value conflicts (as outlined below).  The hash is followed recursively
+,
so that deeply nested hashes that are at the same level will be merged
+ 
when the parent hashes are merged.

Values in hashes are considered to be either ARRAY references, 
HASH references, or otherwise are treated as SCALARs.

Because there are a number of possible ways that one may want to merge
values when keys are conflicting, Hash::Merge provides several preset
methods for your convinence, as well as a way to define you own.  
These are (currently):

=over

=item Left Precedence - The values buried in the left hash will never
be lost; any values that can be added from the right hash will be
attempted.

=item Right Precedence - Same as Left Precedence, but with the right
hash values never being lost

=item Storage Precedence - If conflicting keys have two different 
storage mediums, the 'bigger' medium will win; arrays are preferred ov
+er
scalars, hashes over either.  The other medium will try to be fitted i
+n
the other, but if this isn't possible, the data is dropped.

=item Retainment Precedece - No data will be lost; scalars will be joi
+ned
with arrays, and scalars and arrays will be 'hashified' to fit them in
+to
a hash.

=back

Specific descriptions of how these work are detailed below.

=over 

=item merge ( <hashref>, <hashref> )

Merges two hashes given the rules specified.  Returns a reference to 
the new hash.

=item _hashify( <scalar>|<arrayref> ) -- INTERNAL FUNCTION

Returns a reference to a hash created from the scalar or array referen
+ce, 
where, for the scalar value, or each item in the array, there is a key
and it's value equal to that specific value.  Example, if you pass sca
+lar
'3', the hash will be { 3 => 3 }.

=item _merge_hashes( <hashref>, <hashref> ) -- INTERNAL FUNCTION

Actually does the key-by-key evaluation of two hashes and returns 
the new merged hash.  Note that this recursively calls C<merge>.

=item set_behavior( <scalar> )

Specify which built-in behavior for merging that is desired.  The scal
+ar
must be one of those given below.

=item get_behavior( )

Returns the behavior that is currently in use by Hash::Merge.

=item specify_behavior( <hashref>, [<name>] )

Specify a custom merge behavior for Hash::Merge.  This must be a hashr
+ef
defined with (at least) 3 keys, SCALAR, ARRAY, and HASH; each of those
keys must have another hashref with (at least) the same 3 keys defined
+.
Furthermore, the values in those hashes must be coderefs.  These will 
+be
called with two arguments, the left and right values for the merge.  
Your coderef should return either a scalar or an array or hash referen
+ce
as per your planned behavior.  If necessary, use the functions
_hashify and _merge_hashes as helper functions for these.  For example
+,
if you want to add the left SCALAR to the right ARRAY, you can have yo
+ur
behavior specification include:

   %spec = ( ...SCALAR => { ARRAY => sub { [ $_[0], @$_[1] ] }, ... } 
+} );

=back

=head1 BUILT-IN BEHAVIORS

Here is the specifics on how the current internal behaviors are called
+, 
and what each does.  Assume that the left value is given as $a, and
the right as $b (these are either scalars or appropriate references)

LEFT TYPE   RIGHT TYPE      LEFT_PRECEDENT       RIGHT_PRECEDENT
 SCALAR      SCALAR            $a                   $b
 SCALAR      ARRAY             $a                   ( $a, @$b )
 SCALAR      HASH              $a                   %$b
 ARRAY       SCALAR            ( @$a, $b )          $b
 ARRAY       ARRAY             ( @$a, @$b )         ( @$a, @$b )
 ARRAY       HASH              ( @$a, values %$b )  %$b 
 HASH        SCALAR            %$a                  $b
 HASH        ARRAY             %$a                  ( values %$a, @$b 
+)
 HASH        HASH              merge( %$a, %$b )    merge( %$a, %$b )

LEFT TYPE   RIGHT TYPE  STORAGE_PRECEDENT   RETAINMENT_PRECEDENT
 SCALAR      SCALAR     $a                  ( $a ,$b )
 SCALAR      ARRAY      ( $a, @$b )         ( $a, @$b )
 SCALAR      HASH       %$b                 merge( hashify( $a ), %$b 
+)
 ARRAY       SCALAR     ( @$a, $b )         ( @$a, $b )
 ARRAY       ARRAY      ( @$a, @$b )        ( @$a, @$b )
 ARRAY       HASH       %$b                 merge( hashify( @$a ), %$b
+ )
 HASH        SCALAR     %$a                 merge( %$a, hashify( $b ) 
+)
 HASH        ARRAY      %$a                 merge( %$a, hashify( @$b )
+ )
 HASH        HASH       merge( %$a, %$b )   merge( %$a, %$b )

(*) note that merge calls _merge_hashes, hashify calls _hashify.

=head1 AUTHOR

Michael K. Neylon <lt>mneylon-pm@masemware.com<gt>

=cut
Replies are listed 'Best First'.
Re: Hash::Merge
by princepawn (Parson) on Oct 23, 2001 at 07:36 UTC
    looks good... what drove the necessity for development?
      A while back, jcwren was looking for something that merged hashes for a project of his, but couldn't find anything. I was surprised not to find anything either, so I planned this out and put it together for both his use and the perl community as a whole (Obvious extention: XML::Merge, which also, surprisingly, missing).

      In jc's case, he's taking an existing XML structure, hashified, and adding updates from another XML structure, such that in the code above, he'd use RIGHT_PRECEDENT to have the updates either insert themselves where needed, or replace old values.

      -----------------------------------------------------
      Dr. Michael K. Neylon - mneylon-pm@masemware.com || "You've left the lens cap of your mind on again, Pinky" - The Brain
      "I can see my house from here!"
      It's not what you know, but knowing how to find it if you don't know that's important

Re: Hash::Merge
by demerphq (Chancellor) on Oct 29, 2001 at 18:28 UTC
    Just a thought but handling circular refrences shouldnt be too hard (at least in terms of identification purposes) by using overload::StrVal(). That way even if the value has an object that overloads stringify you can still get access to the underlying implementation type and memory slot.

    Hmmm, but you knew that already didnt you. :-)

    Yves / DeMerphq
    --
    Have you registered your Name Space?

Re: Hash::Merge
by cosmicperl (Chaplain) on Apr 17, 2008 at 09:37 UTC
    Hi Michael,
      I've ran into a situation where I've needed to use your module. The problem I found with it is that it doesn't merge arrays, just replaces one with the other. I had a situation where I had two array references such as:-
    $a1 = [undef, item2]; $a2 = [item1, undef];
    I was ending up with
    $a3 = [item1, undef];
    When I wanted
    $a3 = [item1, item2];
    I've written an update that merges the arrays if items are undef like the example above. So far I've only tested with LEFT_PRECEDENT, but I don't see why it wouldn't work well for the others.

    My changes to LEFT_PRECEDENT:-
    'LEFT_PRECEDENT' => { 'SCALAR' => { 'SCALAR' => sub { $_[0] }, 'ARRAY' => sub { $_[0] }, 'HASH' => sub { $_[0] }, }, 'ARRAY' => { 'SCALAR' => sub { [ @{$_[0]}, $_[1] ] }, 'ARRAY' => sub { [ @{$_[0]}, @{$_[1]} ] }, 'HASH' => sub { [ @{$_[0]}, values %{$_[1]} ] }, }, 'HASH' => { 'SCALAR' => sub { $_[0] }, 'ARRAY' => sub { $_[0] }, 'HASH' => sub { _merge_hashes( $_[0], $_[1] ) }, }, },
    New sub:-
    sub _merge_arrays { my ( $left, $right ) = ( shift, shift ); if( ref $left ne 'ARRAY' || ref $right ne 'ARRAY' ) { carp 'Arguments for _merge_arrays must be array references'; return; } my $length = $#$left; $length = $#$right if ($#$right > $length); my @newarray; for (my $num = 0; $num <= $length; $num++) { if ( defined $left->[$num] && defined $right->[$num] ) { @newarray[ $num ] = merge ( $left->[ $num ], $right->[ $num] ); } elsif ( defined $left->[$num] && !(defined $right->[$num]) ) { @newarray[ $num ] = $left->[ $num ]; } elsif ( !(defined $left->[$num]) && defined $right->[$num] ) { @newarray[ $num ] = $right->[ $num ]; } else { $newarray[$num] = $clone_behavior ? _my_clone( $left->[$num] ) : $left->[$num]; } }#for return \@newarray; }
    I'm guessing that this would make a good option for people. I wont implement it fully as it's your module after all. But if you do it implement it I'd greatly appreciate a mention, love reading my name on CPAN ;)

    By the way I noticed that there is now a Pure Perl Clone module that doesn't need to be required in so I guess that you could use one or the other depending on availability to fix your ActiveState Clone problem.

    Hope this helps.