Here is the final ugly code. #!/usr/bin/perl -w use strict; use Data::Dumper; my %info = (); my ($gi, $humangi, $accession); my $data = '/DATA/proteinfile.txt'; open INFILE, '<', $data or die "Failed at opening $data!\n"; # Construct the hash with GIs as keys and sequences as values while ( ) { my $line = $_; chomp($line); last if m!END!; if($line=~m/HUMAN/){ ($humangi) = ($line=~m/^\S+\|(\d+)/); ($accession) = ($line=~m/^\S+\|\d+\|\w+\|(\S{6}?)/); } if($line=~m/^\S+\|(\d+)/) { if(defined($1)) { $gi=$1; } } else { $info{$gi} = $line; } } #print Dumper (\%info); print "$humangi\n"; print "$accession\n"; close(INFILE); my $data2 = '/DATA/variantlist.txt'; open INFILE2, '<', $data2 or die "Failed at opening $data2!\n"; my $data3 = '/DATA/VariantOutput.txt'; open OUTFILE, '>', $data3 or die "Failed at opening $data3!\n"; print OUTFILE "This is [GI: $humangi] and [Accession: $accession]\nVARIANT\t\tPOTENTIAL\t\tPD\n"; while ( ){ # Grab a variant from the file (in this example: P82L) my $line2 = $_; chomp($line2); my $Variant = $line2; # Split the variant into three parts my ($source, $position, $sink) = split(/(\d+)(\w)/, $Variant); #print "$source\t$position\t$sink\n"; # Check whether HS has the source (i.e., P) at the given position (i.e., 82) my $temp = $info{$humangi}; #print "Temp contains $temp" . "\n"; my @char = split //, $temp; #print "Now \@char contains: @char"; #print "Inside the temp: $char[0] and $char[1]\n"; my $target = $char[$position-1]; #print "This is the target: $target" . "\n"; if ( $target eq $source) { print "Yep!\n"; } my @VariantList = (); my @PDList = (); # Scan the rest of the sequences to check what amino acid they have at the given position for my $gi ( keys %info ) { my $value = $info{$gi}; my @char2 = split //, $value; my $potential = $char2[$position-1]; push (@VariantList, $potential); if ($potential eq $sink){ # Note the cases where we observe the sink (i.e., L) at this position my $pd = "$potential" . "{" . "$gi" . "}"; push (@PDList, $pd) #print "A pathogenic deviation has been found at site $position - from $source to $sink !\n" . " And the corresponding gi for this deviation is: $gi\n"; } } print OUTFILE "$Variant\t\t@VariantList\t\t@PDList\n"; } close(INFILE2);