1374:1-202 gb|AE000516.2| Mycobacterium tuberculosis CDC1551, complete genome 34.5 69 3.6 202 4403837 14/48 29% 25/48 52% 38 181 1895058 1895201 1374:1-202 gb|AE000516.2| Mycobacterium tuberculosis CDC1551, complete genome 34.1 68 5.0 2 1450:1-202 emb|BX248345.1| Mycobacterium bovis subsp. bovis AF2122/97 complete genome; segment 12/14 70.3 147 6e-11 202 308050 28/59 47% 43/59 72% 17 193 168681 168505 #### for (my $i=0; $i<@parsed_file; $i++) { my @record = $parsed_file[$i]; my $record = join ('', @record); @record = split (/\t/, $record); $num = $freq{$record[0]}{"freq"}++; $freq{$array[0]}{"value"}[$num] = $_; my @id; push (@id, $record[0]); } # i sort based on id to extract unique id's my @sorted_array = sort {$freq{$b}{"freq"} <=> $freq{$a}{"freq"}} keys %freq; ##print "$sorted_array[0]\n"; for (my $i=0; $i<@parsed_file; $i++) { my @hit = $parsed_file[$i]; my $hit = join ('', @record); @hit = split (/\t/, $record); my $c=0; my $id2 = $hit[0]; foreach my $id (@sorted_array) { if ($id == $id2) { ++$c; } # try to match unique id's to the file and print the first instance found, but it prints everything if ($c == 1) { print "$parsed_file[$i]\n"; } } }