open (ALIGNED, "<:encoding(UTF-8)", "${filename}.txt") or die "Can't open aligned file for reading: $!"; open (ALIGNED_MOD, ">:encoding(UTF-8)", "${filename}_mod.txt") or die "Can't open file for writing: $!"; if ($delete_dupes eq "y") { my %seen; # hash that contains uique records (hash lookups are faster than array lookups) my $key; # key to be put in hash while () { /^([^\t]*\t[^\t]*)/; # only watch first two fields chomp ($key = $1); # only watch first two fields print ALIGNED_MOD $_ if (! $seen{ $key }++); # add to hash, and if new, print to file } my $unfiltered_number = $.; my $filtered_number = keys %seen; print "\n\n-------------------------------------------------"; print "\n\nSegment numbers before and after filtering out dupes: $unfiltered_number -> $filtered_number\n"; print LOG "\nFiltered out dupes: $unfiltered_number -> $filtered_number"; undef %seen; # free up memory close ALIGNED; close ALIGNED_MOD; rename ("${filename}_mod.txt", "${filename}.txt") or die "Can't rename file: $!"; }