Re^4: Augmenting and reducing data structures

Replies are listed 'Best First'.
Re^5: Augmenting and reducing data structures by sciurius (Beadle) on Apr 24, 2021 at 13:25 UTC
There are a couple of additional constraints that make the tast easier: The corresponding data types of the old and new structures match; There are no undefined values involved. Nevertheless further research has show that there are a lot of edge cases that make the task complicated. I'm investigating the Struct::Diff approach, as well as several other suggestions from this thread. Thanks a lot for the input!	[reply]
Re^6: Augmenting and reducing data structures by LanX (Saint) on Apr 24, 2021 at 15:16 UTC
I hope you understood my point that writing a validator as a first step would not only constitute the foundation but more than half the work needed to any possible diff-solution. Anything else is doomed to fail. Cheers Rolf _{(addicted to the Perl Programming Language :) Wikisyntax for the Monastery}	[reply]
Re^7: Augmenting and reducing data structures by sciurius (Beadle) on Apr 26, 2021 at 19:22 UTC
Here is a working proof of concept implementation. For an industry quality general purpose implementation some edge cases may need attention. #!/usr/bin/perl use strict; use warnings; use utf8; package Config; use constant DEBUG => 0; use Scalar::Util qw(reftype); use List::Util qw(any); # Create a Config object out of a hash. # # A Config object is a hash with a predefined set of keys and values. # Valid values are hashes, arrays and strings (scalars). Undefined # values do not occur. # The hash keys are fixed (i.e. no new keys, no delete keys). # Arrays can grow and shrink. sub new { my ( $pkg, $init ) = @_; bless { %$init } => $pkg; } # Augmentation. # # Given a Config object, augment its contents from a hash so, that all # values from the hash update the corresponding values in the Config # object. # # For example, if the Config object contains # # { a => { h => 1, i => 1 } } # # and the hash contains # # { a => { i => 2 } } # # then in the Config object the value for key 'i' of key 'a' will be # updated to the value 2. # # When an array starts with value "append" or "prepend", the new # values are appended resp. prepended to the existing values. For # example, if the Config has # # { b => [ "c", "d" ] } # # and the hash contains # # { b => [ "append", "x" ] } # # the result will be # # { b => [ "c", "d", "x" ] } sub augment : method { my ( $self, $hash ) = @_; # my $locked = $self->is_locked; # $self->unlock if $locked; $self->_augment( $hash, "" ); # $self->lock if $locked; $self; } sub _augment { my ( $self, $hash, $path ) = @_; for my $key ( keys(%$hash) ) { # No new keys... warn("Config error: unknown item $path$key\n") unless exists $self->{$key}; # Hash -> Hash. # Hash -> Array. if ( ref($hash->{$key}) eq 'HASH' ) { if ( ref($self->{$key}) eq 'HASH' ) { # Hashes. Recurse. _augment( $self->{$key}, $hash->{$key}, "$path$key." ) +; } elsif ( ref($self->{$key}) eq 'ARRAY' ) { ...; # TODO # Hash -> Array. # Update single array element using a hash index. foreach my $ix ( keys(%{$hash->{$key}}) ) { die unless $ix =~ /^\d+$/; $self->{$key}->[$ix] = $hash->{$key}->{$ix}; } } else { # Overwrite. $self->{$key} = $hash->{$key}; } } # Array -> Array. elsif ( ref($hash->{$key}) eq 'ARRAY' and ref($self->{$key}) eq 'ARRAY' ) { # Arrays. Overwrite or append. if ( @{$hash->{$key}} ) { my @v = @{ $hash->{$key} }; if ( $v[0] eq "append" ) { shift(@v); # Append the rest. push( @{ $self->{$key} }, @v ); } elsif ( $v[0] eq "prepend" ) { shift(@v); # Prepend the rest. unshift( @{ $self->{$key} }, @v ); } else { # Overwrite. $self->{$key} = $hash->{$key}; } } else { # Overwrite. $self->{$key} = $hash->{$key}; } } else { # Overwrite. $self->{$key} = $hash->{$key}; } } return $self; } # Reduction. # # Given two Config objects 'actual' and 'original', derive the # (minimal) hash that can be used to augment 'original' to 'actual'. # # In this implementation, 'self' is the actual Config object, 'orig' # is the original object. Upon completion, 'self' will be the reduced # hash. sub reduce : method { my ( $self, $orig ) = @_; # my $locked = $self->is_locked; warn("O: ", qd($orig,1), "\n") if DEBUG; warn("N: ", qd($self,1), "\n") if DEBUG; my $state = _reduce( $self, $orig, "" ); # $self->lock if $locked; warn("== ", qd($self,1), "\n") if DEBUG; return $self; } sub _ref { reftype($_[0]) // ref($_[0]); } # Note: This implementation is a proof of concept. It is not optimized # and contains edge cases that may need additional attention. sub _reduce { my ( $self, $orig, $path ) = @_; my $state; if ( _ref($self) eq 'HASH' && _ref($orig) eq 'HASH' ) { warn("D: ", qd($self,1), "\n") if DEBUG && !%$orig; return 'D' unless %$orig; my %hh = map { $_ => 1 } keys(%$self), keys(%$orig); for my $key ( sort keys(%hh) ) { warn("Config error: unknown item $path$key\n") unless exists $self->{$key}; unless ( defined $orig->{$key} ) { warn("D: $path$key\n") if DEBUG; delete $self->{$key}; $state //= 'M'; next; } # Hash -> Hash. if ( _ref($orig->{$key}) eq 'HASH' and _ref($self->{$key}) eq 'HASH' or _ref($orig->{$key}) eq 'ARRAY' and _ref($self->{$key}) eq 'ARRAY' ) { # Recurse. my $m = _reduce( $self->{$key}, $orig->{$key}, "$path$ +key." ); delete $self->{$key} if $m eq 'D' \|\| $m eq 'I'; $state //= 'M' if $m ne 'I'; } elsif ( ($self->{$key}//'') eq ($orig->{$key}//'') ) { warn("I: $path$key\n") if DEBUG; delete $self->{$key}; } else { # Overwrite. warn("M: $path$key => $self->{$key}\n") if DEBUG; $state //= 'M'; } } return $state // 'I'; } if ( _ref($self) eq 'ARRAY' && _ref($orig) eq 'ARRAY' ) { # Arrays. if ( any { _ref($_) } @$self ) { # Complex arrays. Recurse. for ( my $key = 0; $key < @$self; $key++ ) { my $m = _reduce( $self->[$key], $orig->[$key], "$path$ +key." ); #delete $self->{$key} if $m eq 'D'; # TODO $state //= 'M' if $m ne 'I'; } return $state // 'I'; } # Simple arrays (only scalar values). if ( my $dd = @$self - @$orig ) { $path =~ s/\.$//; if ( $dd > 0 ) { # New is larger. Check for prepend/append. # Deal with either one, not both. Maybe later. my $t; for ( my $ix = 0; $ix < @$orig; $ix++ ) { next if $orig->[$ix] eq $self->[$ix]; $t++; last; } unless ( $t ) { warn("M: $path append @{$self}[-$dd..-1]\n") if DE +BUG; splice( @$self, 0, $dd, "append" ); return 'M'; } undef $t; for ( my $ix = $dd; $ix < @$self; $ix++ ) { next if $orig->[$ix-$dd] eq $self->[$ix]; $t++; last; } unless ( $t ) { warn("M: $path prepend @{$self}[0..$dd-1]\n") if D +EBUG; splice( @$self, $dd ); unshift( @$self, "prepend" ); return 'M'; } warn("M: $path => @$self\n") if DEBUG; $state = 'M'; } else { warn("M: $path => @$self\n") if DEBUG; $state = 'M'; } return $state // 'I'; } # Equal length arrays with scalar values. my $t; for ( my $ix = 0; $ix < @$orig; $ix++ ) { next if $orig->[$ix] eq $self->[$ix]; warn("M: $path$ix => $self->[$ix]\n") if DEBUG; $t++; last; } if ( $t ) { warn("M: $path\n") if DEBUG; return 'M'; } warn("I: $path\[]\n") if DEBUG; return 'I'; } # Two scalar values. $path =~ s/\.$//; if ( $self eq $orig ) { warn("I: $path\n") if DEBUG; return 'I'; } warn("M $path $self\n") if DEBUG; return 'M'; } # For debugging messages. sub qd { my ( $val, $compact ) = @_; use Data::Dumper qw(); local $Data::Dumper::Sortkeys = 1; local $Data::Dumper::Indent = 1; local $Data::Dumper::Quotekeys = 0; local $Data::Dumper::Deparse = 1; local $Data::Dumper::Terse = 1; local $Data::Dumper::Trailingcomma = !$compact; local $Data::Dumper::Useperl = 1; local $Data::Dumper::Useqq = 0; # I want unicode visible my $x = Data::Dumper::Dumper($val); if ( $compact ) { $x =~ s/^bless\( (.*), '[\w:]+' \)$/$1/s; $x =~ s/\s+/ /gs; } defined wantarray ? $x : warn($x,"\n"); } # Testing code. package main; use Test::More tests => 3; # Original content. my $orig = Config->new ( { a => { b => [ 'c', 'd' ], e => [[ 'f' ]] }, g => { h => 1, i => +1 } } ); # Actual content, initially a copy of original content. my $actual = Config->new ( { a => { b => [ 'c', 'd' ], e => [[ 'f' ]] }, g => { h => 1, i => +1 } } ); # Augmentation hash. my $aug = { a => { b => [ 'prepend', 'x' ], e => [ [ 'g' ] ] }, g => { + i => 2 } }; # Expected new content. my $new = Config->new ( { a => { b => [ 'x', 'c', 'd' ], e => [[ 'g' ]] }, g => { h => 1, +i => 2 } } ); is_deeply( $orig, $actual, "orig = actual" ); $actual->augment($aug); is_deeply( $actual, $new, "augmented" ); $actual->reduce($orig); is_deeply( $actual, $aug, "reduced" ); [download]	[reply] [d/l]

There are a couple of additional constraints that make the tast easier:

The corresponding data types of the old and new structures match;
There are no undefined values involved.

Nevertheless further research has show that there are a lot of edge cases that make the task complicated.

I'm investigating the Struct::Diff approach, as well as several other suggestions from this thread. Thanks a lot for the input!

[reply]

Anything else is doomed to fail.

Cheers Rolf
_{(addicted to the Perl Programming Language :)

Wikisyntax for the Monastery}

[reply]

Here is a working proof of concept implementation. For an industry quality general purpose implementation some edge cases may need attention.

#!/usr/bin/perl

use strict;
use warnings;
use utf8;

package Config;

use constant DEBUG => 0;

use Scalar::Util qw(reftype);
use List::Util qw(any);

# Create a Config object out of a hash.
#
# A Config object is a hash with a predefined set of keys and values.
# Valid values are hashes, arrays and strings (scalars). Undefined
# values do not occur.
# The hash keys are fixed (i.e. no new keys, no delete keys).
# Arrays can grow and shrink.

sub new {
    my ( $pkg, $init ) = @_;
    bless { %$init } => $pkg;
}

# Augmentation.
#
# Given a Config object, augment its contents from a hash so, that all
# values from the hash update the corresponding values in the Config
# object.
#
# For example, if the Config object contains
#
#    { a => { h => 1, i => 1 } }
#
# and the hash contains
#
#    { a => { i => 2 } }
#
# then in the Config object the value for key 'i' of key 'a' will be
# updated to the value 2.
#
# When an array starts with value "append" or "prepend", the new
# values are appended resp. prepended to the existing values. For
# example, if the Config has
#
#    { b => [ "c", "d" ] }
#
# and the hash contains
#
#    { b => [ "append", "x" ] }
#
# the result will be
#
#    { b => [ "c", "d", "x" ] }

sub augment : method {
    my ( $self, $hash ) = @_;

    # my $locked = $self->is_locked;
    # $self->unlock if $locked;

    $self->_augment( $hash, "" );

    # $self->lock if $locked;

    $self;
}

sub _augment {
    my ( $self, $hash, $path ) = @_;

    for my $key ( keys(%$hash) ) {

        # No new keys...
        warn("Config error: unknown item $path$key\n")
          unless exists $self->{$key};

        # Hash -> Hash.
        # Hash -> Array.
        if ( ref($hash->{$key}) eq 'HASH' ) {
            if ( ref($self->{$key}) eq 'HASH' ) {

                # Hashes. Recurse.
                _augment( $self->{$key}, $hash->{$key}, "$path$key." )
+;
            }
            elsif ( ref($self->{$key}) eq 'ARRAY' ) {
                ...; # TODO
                # Hash -> Array.
                # Update single array element using a hash index.
                foreach my $ix ( keys(%{$hash->{$key}}) ) {
                    die unless $ix =~ /^\d+$/;
                    $self->{$key}->[$ix] = $hash->{$key}->{$ix};
                }
            }
            else {
                # Overwrite.
                $self->{$key} = $hash->{$key};
            }
        }

        # Array -> Array.
        elsif ( ref($hash->{$key}) eq 'ARRAY'
                and ref($self->{$key}) eq 'ARRAY' ) {

            # Arrays. Overwrite or append.
            if ( @{$hash->{$key}} ) {
                my @v = @{ $hash->{$key} };
                if ( $v[0] eq "append" ) {
                    shift(@v);
                    # Append the rest.
                    push( @{ $self->{$key} }, @v );
                }
                elsif ( $v[0] eq "prepend" ) {
                    shift(@v);
                    # Prepend the rest.
                    unshift( @{ $self->{$key} }, @v );
                }
                else {
                    # Overwrite.
                    $self->{$key} = $hash->{$key};
                }
            }
            else {
                # Overwrite.
                $self->{$key} = $hash->{$key};
            }
        }

        else {
            # Overwrite.
            $self->{$key} = $hash->{$key};
        }
    }

    return $self;
}

# Reduction.
#
# Given two Config objects 'actual' and 'original', derive the
# (minimal) hash that can be used to augment 'original' to 'actual'.
#
# In this implementation, 'self' is the actual Config object, 'orig'
# is the original object. Upon completion, 'self' will be the reduced
# hash.

sub reduce : method {
    my ( $self, $orig ) = @_;

    # my $locked = $self->is_locked;

    warn("O: ", qd($orig,1), "\n") if DEBUG;
    warn("N: ", qd($self,1), "\n") if DEBUG;
    my $state = _reduce( $self, $orig, "" );

    # $self->lock if $locked;

    warn("== ", qd($self,1), "\n") if DEBUG;
    return $self;
}

sub _ref {
    reftype($_[0]) // ref($_[0]);
}

# Note: This implementation is a proof of concept. It is not optimized
# and contains edge cases that may need additional attention.

sub _reduce {

    my ( $self, $orig, $path ) = @_;
    my $state;

    if ( _ref($self) eq 'HASH' && _ref($orig) eq 'HASH' ) {

        warn("D: ", qd($self,1), "\n")  if DEBUG && !%$orig;
        return 'D' unless %$orig;

        my %hh = map { $_ => 1 } keys(%$self), keys(%$orig);
        for my $key ( sort keys(%hh) ) {

            warn("Config error: unknown item $path$key\n")
              unless exists $self->{$key};

            unless ( defined $orig->{$key} ) {
                warn("D: $path$key\n") if DEBUG;
                delete $self->{$key};
                $state //= 'M';
                next;
            }

            # Hash -> Hash.
            if (     _ref($orig->{$key}) eq 'HASH'
                 and _ref($self->{$key}) eq 'HASH'
                 or
                     _ref($orig->{$key}) eq 'ARRAY'
                 and _ref($self->{$key}) eq 'ARRAY' ) {
                # Recurse.
                my $m = _reduce( $self->{$key}, $orig->{$key}, "$path$
+key." );
                delete $self->{$key} if $m eq 'D' || $m eq 'I';
                $state //= 'M' if $m ne 'I';
            }

            elsif ( ($self->{$key}//'') eq ($orig->{$key}//'') ) {
                warn("I: $path$key\n") if DEBUG;
                delete $self->{$key};
            }
            else {
                # Overwrite.
                warn("M: $path$key => $self->{$key}\n") if DEBUG;
                $state //= 'M';
            }
        }
        return $state // 'I';
    }

    if ( _ref($self) eq 'ARRAY' && _ref($orig) eq 'ARRAY' ) {

        # Arrays.
        if ( any { _ref($_) } @$self ) {
            # Complex arrays. Recurse.
            for ( my $key = 0; $key < @$self; $key++ ) {
                my $m = _reduce( $self->[$key], $orig->[$key], "$path$
+key." );
                #delete $self->{$key} if $m eq 'D'; # TODO
                $state //= 'M' if $m ne 'I';
            }
            return $state // 'I';
        }

        # Simple arrays (only scalar values).
        if ( my $dd = @$self - @$orig ) {
            $path =~ s/\.$//;
            if ( $dd > 0 ) {
                # New is larger. Check for prepend/append.
                # Deal with either one, not both. Maybe later.
                my $t;
                for ( my $ix = 0; $ix < @$orig; $ix++ ) {
                    next if $orig->[$ix] eq $self->[$ix];
                    $t++;
                    last;
                }
                unless ( $t ) {
                    warn("M: $path append @{$self}[-$dd..-1]\n") if DE
+BUG;
                    splice( @$self, 0, $dd, "append" );
                    return 'M';
                }
                undef $t;
                for ( my $ix = $dd; $ix < @$self; $ix++ ) {
                    next if $orig->[$ix-$dd] eq $self->[$ix];
                    $t++;
                    last;
                }
                unless ( $t ) {
                    warn("M: $path prepend @{$self}[0..$dd-1]\n") if D
+EBUG;
                    splice( @$self, $dd );
                    unshift( @$self, "prepend" );
                    return 'M';
                }
                warn("M: $path => @$self\n") if DEBUG;
                $state = 'M';
            }
            else {
                warn("M: $path => @$self\n") if DEBUG;
                $state = 'M';
            }
            return $state // 'I';
        }

        # Equal length arrays with scalar values.
        my $t;
        for ( my $ix = 0; $ix < @$orig; $ix++ ) {
            next if $orig->[$ix] eq $self->[$ix];
            warn("M: $path$ix => $self->[$ix]\n") if DEBUG;
            $t++;
            last;
        }
        if ( $t ) {
            warn("M: $path\n") if DEBUG;
            return 'M';
        }
        warn("I: $path\[]\n") if DEBUG;
        return 'I';
    }

    # Two scalar values.
    $path =~ s/\.$//;
    if ( $self eq $orig ) {
        warn("I: $path\n") if DEBUG;
        return 'I';
    }

    warn("M $path $self\n") if DEBUG;
    return 'M';
}

# For debugging messages.
sub qd {
    my ( $val, $compact ) = @_;
    use Data::Dumper qw();
    local $Data::Dumper::Sortkeys  = 1;
    local $Data::Dumper::Indent    = 1;
    local $Data::Dumper::Quotekeys = 0;
    local $Data::Dumper::Deparse   = 1;
    local $Data::Dumper::Terse     = 1;
    local $Data::Dumper::Trailingcomma = !$compact;
    local $Data::Dumper::Useperl = 1;
    local $Data::Dumper::Useqq     = 0; # I want unicode visible
    my $x = Data::Dumper::Dumper($val);
    if ( $compact ) {
        $x =~ s/^bless\( (.*), '[\w:]+' \)$/$1/s;
        $x =~ s/\s+/ /gs;
    }
    defined wantarray ? $x : warn($x,"\n");
}

# Testing code.

package main;

use Test::More tests => 3;

# Original content.
my $orig = Config->new
  ( { a => { b => [ 'c', 'd' ], e => [[ 'f' ]] }, g => { h => 1, i => 
+1 } } );

# Actual content, initially a copy of original content.
my $actual = Config->new
  ( { a => { b => [ 'c', 'd' ], e => [[ 'f' ]] }, g => { h => 1, i => 
+1 } } );

# Augmentation hash.
my $aug = { a => { b => [ 'prepend', 'x' ], e => [ [ 'g' ] ] }, g => {
+ i => 2 } };

# Expected new content.
my $new = Config->new
  ( { a => { b => [ 'x', 'c', 'd' ], e => [[ 'g' ]] }, g => { h => 1, 
+i => 2 } } );

is_deeply( $orig, $actual, "orig = actual" );

$actual->augment($aug);
is_deeply( $actual, $new, "augmented" );

$actual->reduce($orig);
is_deeply( $actual, $aug, "reduced" );
[download]

[reply]
[d/l]