# Listing One:
%hash = (
"123" => "abc",
"456" => { # I'm chosing for a hashref here because
"abc" => undef, # eventually we'll possibly replacing those
"xyz" => undef, # undefs with something else.
}
"789" => "abc"
);
####
# Listing Two:
%hash = (
"123" => {"abc" => "zzz"},
"456" => {
"abc" => ["xxx", "yyy"],
"xyz" => undef # Or [], or 0, or "", or whatever
},
"789" => {"abc" => "xxx"}
);
####
use strict;
use warnings;
use Data::Dump 'pp'; # Or Data::Dumper, but I prefer the pp output.
# Note that I'm using scalars here to represent the files.
# I'm using Perl's capability to use references-to-scalars
# as if they were files.
my $setOne = < 456->abc
xxx ==> 789->abc
yyy ==> 456->abc
yyy ==> 456->abc
zzz ==> 123->abc
EOF
;
my %hash;
# See? Here I'm opening a file handle to read from $setOne.
# Typically, you'd put "somefilename.txt" in place of \$setOne,
# but this code if for demonstration purposes only.
open my $fileOne, "<", \$setOne or die "Couldn't read set one: $!";
while (my $line = <$fileOne>) {
chomp $line; # Goodbye, newlines characters.
# It would seem like the order of values and keys is turned around
# in the file for set one. That, or I misinterpreted your intention.
my ($value, $key) = split(/\s+/, $line);
if (ref $hash{$key}) {
# If $hash{"123"} already is a reference, then we can just add the new
# key to it.
# Note that the name '$value' is a little misleading here, since we'll
# be using it as a key... Well, such is life.
$hash{$key}->{$value} = undef;
} elsif (exists $hash{$key}) {
# $hash{"123"} is already there, but we got another value for it.
# So we need a hash ref. Let's make one.
$hash{$key} = {$hash{$key} => undef, $value => undef};
} else {
# $hash{"123"} isn't there yet, so here we go rather plainly:
$hash{$key} = $value;
}
}
close $fileOne;
# Let's see what we've got so far.
pp \%hash;
# print Dumper \%hash; # Again, I prefer pp, but there are always multiple options.
# Fine then. Let's read that other file.
open my $fileTwo, "<", \$setTwo or die "Couldn't read set two: $!";
while (my $line = <$fileTwo>) {
chomp $line;
my ($newvalue, $key, $oldvalue) = $line =~ m/^(.+)\s+==>\s+(.+)->(.+)$/;
if (ref $hash{$key}) {
# If $hash{"123"} is a reference...
if (ref $hash{$key}->{$oldvalue}) {
# If $hash{"123"}->{"abc"} is also a reference
# Jump right to the next iteration if we've run into a duplicate.
# Since your original example didn't specify that it should take
# duplicates into account.
next if grep {$_ eq $newvalue} @{$hash{$key}->{$oldvalue}};
push @{$hash{$key}->{$oldvalue}}, $newvalue;
} elsif (exists $hash{$key}->{$oldvalue}) {
# $hash{"123"}->{"abc"} is not a reference, even though
# it already exists.
if (defined $hash{$key}->{$oldvalue}) {
# This means that $hash{"123"}->{"abc"} already has a value,
# so what we really need here is to convert it into an array ref
# so that it can hold multiple values.
$hash{$key}->{$oldvalue} = [$hash{$key}->{$oldvalue}, $newvalue];
} else {
# $hash{"123"}->{"abc"} is undef, so we can simply replace
# the undef with the new value.
$hash{$key}->{$oldvalue} = $newvalue;
}
}
} else {
# $hash{"123"} is not a reference yet. Let's turn it into one.
$hash{$key} = {$oldvalue => $newvalue};
}
}
close $fileTwo;
# Final results:
pp \%hash;