The code -- revised version by GrandFather. #!/usr/bin/perl use strict; use warnings; use Data::Dumper; my $data = '/DATA/alignment.fas'; open my $inFile, '<', $data or die "Failed at opening $data: $!\n"; # Populate the info hash with GIs as keys and sequences as values my $humanGi; my $accession; my $gi; # Current gi while reading sequences my %info; while (<$inFile>) { my $line = $_; chomp $line; last if m!END!; if ($line =~ m/(HUMAN|Homo)/) { ($humanGi, $accession) = $line =~ m/^\S+\|(\d+)\|\w+\|(\S{6}?)/; } if ($line =~ m/^\S+\|(\d+)/) { $gi = $1 if defined $1; } else { $info{$gi} = $line; } } print Dumper (\%info); close $inFile; my $data2 = '/DATA/variantList.txt'; open $inFile, '<', $data2 or die "Failed at opening $data2: $!\n"; my $data3 = '/DATA/pathogenList.txt'; open my $outFile, '>', $data3 or die "Failed at opening $data3: $!\n"; print $outFile "This is [GI: $humanGi] and [Accession: $accession]\nVARIANT\t\tPOTENTIAL\t\tPD\n"; while (defined (my $Variant = <$inFile>)) { # Grab a variant from the file (in this example: P82L) chomp $Variant; my ($source, $position, $sink) = split /(\d+)(\w)/, $Variant; # Check whether HS has the source (i.e., P) at the given position (i.e., 82) #my @char = split //, $info{$humanGi}; #my $target = $char[$position - 1]; my @VariantList; my @PDList; # Scan the rest of the sequences to check what amino acid they have at # the given position foreach my $gi (keys %info) { my @char2 = split //, $info{$gi}; my $potential = $char2[$position - 1]; push @VariantList, "${potential}{$gi}"; if ($potential eq $sink) { # Note the cases where we observe the sink (i.e., L) at this position push @PDList, "${potential}{$gi}"; } } print $outFile "$Variant\t@VariantList\t@PDList\n"; } close $inFile; close $outFile;