Input: @FileNames : A list of candidate files Step 1: Build a hash to group potentially similar strings { local $/ = \32; for my $curFile (@FileNames) { open INF, '<', $curFile or die... my $key = ; close INF or die... # list of files matching the first 32 chars push @{$Candidates{$key}{LIST}}, $curFile; # regex for matching similar groups my $tmp = $key; $tmp =~ s/N/./; $Candidates{$key}{REGEX} = qr/$r/; } } Step 2: Merge compatible groups for my $key (keys %Candidates) { $Candidates{$key}{SIMILAR} = grep { /$Candidates{$key}{REGEX}/ && ($_ ne $key) } keys %Candidates; } Step 3: Remove unmatchable items my %UnmatchableKeys; for my $key (keys %Candidates) { next if exists $Candidates{$key}{SIMILAR}; next if scalar @{$Candidates}{$key}{LIST}} > 1; $UnmatchableKeys{$key} = 1; } Step 4: Generate lists of items needing to be compared for my $key (keys %Candidates} { next if exists $UnmatchableKeys{$key}; my @similarFiles = (@{$Candidates}{$key}{LIST}}); for my $others (keys %{$Candidates{$key}{SIMILAR}}) { push @similarFiles, @{$Candidates{$key}{LIST}}; } print "GROUP: ", join(", ", sort @similarFiles), "\n"; }