#### Per line #### my $ptt_file="/g/Viruses/prophage_data/prophage_region.ptt1"; open ($fh, '<', $ppt_file); ## precompile a regex to capture the geneid on each line ## I assume the gene id is the first thing on each line my $gene_id; my $rx_find_geneid = qr/^(\w+) (?{ $gene_id = $1; })/x; ## I prefer to avoid $_ for clarity my $saved_lines = ''; while (my $line = <$fh>) { ## run precompiled regex $line =~ /$rx_find_geneid/; ## check to see if it exists in the hash ## if not, save it if (! exists $geneids_to_remove->{$gene_id}) { $saved_lines .= $line; } } close ($fh);