use strict;
use warnings;

# See http://en.wikipedia.org/wiki/Canonicalization
sub canonicalize {
    my ($string) = @_;
    
    # Remove everything except certain characters.
    $string =~ tr{A-Za-z0-9 }{}cd;

    # Make case-insensitive (if you want)
#    $string = lc $string;

    return $string;
}

sub match_up_canonically {
    my ( $lines1_aref, $lines2_aref ) = @_;

    my %where;
    $where{ canonicalize($_) } |= 1 for @{ $lines1_aref };
    $where{ canonicalize($_) } |= 2 for @{ $lines2_aref };


    my ( @matches, @nonmatches1, @nonmatches2 );
    for ( @{ $lines1_aref} ) {
        my $n = $where{ canonicalize($_) };
    
        if    ( $n == 3 ) {
            push @matches, $_;
        }
        elsif ( $n == 1 ) {
            push @nonmatches1, $_;
        }
        else {
            die "Can't happen";
        }
    
    }
    for ( @{ $lines2_aref} ) {
        my $n = $where{ canonicalize($_) };
    
        if    ( $n == 3 ) {
            # Do nothing!
            # The matched lines already printed in the @lines1 loop.
    
            # ...or...
            
            # Print the matched lines again, because they may be
            # different, just not different in a way that matters.
#            push @matches, $_;
        }
        elsif ( $n == 2 ) {
            push @nonmatches2, $_;
        }
        else {
            die "Can't happen";
        }
    }
    
    return( \@matches, \@nonmatches1, \@nonmatches2 );
}

my @lines1 = (
    'able baker charlie',
    'roger, fox, dog',
    'Gomez Morticia Cousin-Itt',
    'Wednesday Pugsley Lurch',
);

my @lines2 = (
    'Gomez Morticia Cousin_ITT',
    'roger; fox; dog',
    'Wednesday Pugsley Fester',
    'able baker charlie',
);

my ( $m, $n1, $n2 ) = match_up_canonically( \@lines1, \@lines2 );

print join "\n", 'Matched:',      @{ $m  }, "\n";
print join "\n", 'Non-matched1:', @{ $n1 }, "\n";
print join "\n", 'Non-matched2:', @{ $n2 }, "\n";