## open the file and read in data my $list_file = '/g/Viruses/prophage_data/emptySeqList_aa.txt'; ## try to use single quotes when ## you don't need string interpolation, ## e.g., no variables or "\n" open (my $fh, '<', $list_file); ## it is often preferable to use a ## variable to store a filehandle my @lines = <$fh>; # reads entire file in one go ## This is technically bad form, ## but assuming your file isn't too big, it's fine close ($fh); my $text = join ('', @lines); # combines all lines into one string ## Here's where your file format will change the code ## I'm assuming nothing is in the file but gene ids, ## and that each id consists of letters, numbers, and underscores. ## This regex will identify all geneids (using \w+) ## and store them as hash keys. my $geneids_to_remove = {}; # create a hash reference $text =~ s/(\w+) (?{ # in regex code $geneids_to_remove->{$1} = 1; # store geneids in a hash }) //gx;