use strict; use warnings; # Set up data and print it out. # my @array1 = qw(test test2 test2 test3 test4 test4); my @array2 = qw(test test test2.1 test4.1 test4.2 test4.3); print qq{\n}, q {Array @array1}, qq{\n}, qq{ @array1\n}, q {Array @array2}, qq{\n}, qq{ @array2\n\n}; # Pass references to @array1 and @array2 to nonUniqIntersect() # which returns three list references, non-unique intersection, # remainders from @arrau1 and remainders from @array2. # my ($rlNonUniqIntersect, $rlFirstLeftOver, $rlSecondLeftOver) = nonUniqIntersect(\@array1, \@array2); # Print results. # print qq{Non-unique intersection\n}, qq{ @$rlNonUniqIntersect\n\n}, q {Left-over from @array1}, qq{\n}, qq{ @$rlFirstLeftOver\n\n}, q {Left-over from @array2}, qq{\n}, qq{ @$rlSecondLeftOver\n\n}; # Fond non-unique intersection. # sub nonUniqIntersect { # Get first and second lists. Initialise list reference # for intersection and hash tables used to tag which # elements from each list have found a match. # my @first = @{$_[0]}; my @second = @{$_[1]}; my $rlNonUniqInt = []; my %firstTags = map {$_ => 0} 0 .. $#first; my %secondTags = map {$_ => 0} 0 .. $#second; # Iterate over the first list. # FIRST: for my $idx1 (0 .. $#first) { # Look for exact matches by iterating over the # second list. # EXACT_SECOND: for my $idx2 (0 .. $#second) { # Skip if this element already matched elsewhere. # next EXACT_SECOND if $secondTags{$idx2}; # Is this an exact match? # if ($first[$idx1] eq $second[$idx2]) { # Yes, push onto intersection list, tag the # elements in each list and move to next # element of first list. # push @$rlNonUniqInt, $first[$idx1]; $firstTags{$idx1} ++; $secondTags{$idx2} ++; next FIRST; } } # We haven't found an exact match so look for the # shortest possible non-exact match. Make a regex # for this element of first list. Initialise list # of possible matches. # my $rxFirst = qr{^$first[$idx1]}; my @possMatches = (); # Iterate over secondlist again looking for # non-exact matches. # POSS_SECOND: for my $idx2 (0 .. $#second) { # Skip if this element already matched elsewhere. # next POSS_SECOND if $secondTags{$idx2}; # If we find a match, push the length and index # onto the @possMatches list. # next POSS_SECOND unless $second[$idx2] =~ $rxFirst; push @possMatches, [length $second[$idx2], $idx2]; } # Go to next element of first list if no possible # matches found. Otherwise, sort possible matches # by length and take the index of the first (shortest) # and push the corresponding element onto the intersection # list, updating the tags as well. # next FIRST unless @possMatches; my $shortestMatchIdx2 = ( map {$_->[1]} sort {$a->[0] <=> $b->[0]} @possMatches )[0]; push @$rlNonUniqInt, $first[$idx1]; $firstTags{$idx1} ++; $secondTags{$shortestMatchIdx2} ++; } # Splice out elements from each list that have been # paired off leaving those that didn't have matches. # foreach my $idx (sort {$b <=> $a} keys %firstTags) { splice @first, $idx, 1 if $firstTags{$idx}; } foreach my $idx (sort {$b <=> $a} keys %secondTags) { splice @second, $idx, 1 if $secondTags{$idx}; } # Return three list references. # return ($rlNonUniqInt, \@first, \@second); } #### Array @array1 test test2 test2 test3 test4 test4 Array @array2 test test test2.1 test4.1 test4.2 test4.3 Non-unique intersection test test2 test4 test4 Left-over from @array1 test2 test3 Left-over from @array2 test test4.3