The code works for some input and not for the rest! I have manipulated the input file to have an insight into what might cause the problem, yet I have nothing. I'll post it to here hoping that someone might have an idea.
Essentially, the problem is that the script takes the first 22 input items (of two lines each) and ignores the rest. All these items have the same format. I could not figure out why there is such limitation on the input while there is no difference at all between various input items.
I hope the files do not violate any space limitation (if there is one).
The code -- revised version by GrandFather. #!/usr/bin/perl use strict; use warnings; use Data::Dumper; my $data = '/DATA/alignment.fas'; open my $inFile, '<', $data or die "Failed at opening $data: $!\n"; # Populate the info hash with GIs as keys and sequences as values my $humanGi; my $accession; my $gi; # Current gi while reading sequences my %info; while (<$inFile>) { my $line = $_; chomp $line; last if m!END!; if ($line =~ m/(HUMAN|Homo)/) { ($humanGi, $accession) = $line =~ m/^\S+\|(\d+)\|\w+\|(\S{6}?) +/; } if ($line =~ m/^\S+\|(\d+)/) { $gi = $1 if defined $1; } else { $info{$gi} = $line; } } print Dumper (\%info); close $inFile; my $data2 = '/DATA/variantList.txt'; open $inFile, '<', $data2 or die "Failed at opening $data2: $!\n"; my $data3 = '/DATA/pathogenList.txt'; open my $outFile, '>', $data3 or die "Failed at opening $data3: $!\n"; print $outFile "This is [GI: $humanGi] and [Accession: $accession]\nVARIANT\t\tPO +TENTIAL\t\tPD\n"; while (defined (my $Variant = <$inFile>)) { # Grab a variant from the file (in this example: P82L) chomp $Variant; my ($source, $position, $sink) = split /(\d+)(\w)/, $Variant; # Check whether HS has the source (i.e., P) at the given position +(i.e., 82) #my @char = split //, $info{$humanGi}; #my $target = $char[$position - 1]; my @VariantList; my @PDList; # Scan the rest of the sequences to check what amino acid they hav +e at # the given position foreach my $gi (keys %info) { my @char2 = split //, $info{$gi}; my $potential = $char2[$position - 1]; push @VariantList, "${potential}{$gi}"; if ($potential eq $sink) { # Note the cases where we observe the sink (i.e., L) at th +is position push @PDList, "${potential}{$gi}"; } } print $outFile "$Variant\t@VariantList\t@PDList\n"; } close $inFile; close $outFile;
First input. Variant list. A5V A5S A5T C7F V8E L9Q L9V G13R V15G V15M G17S F21C E22K E22G Q23L G38R L39R L39V G42D G42S H44R F46C H47R H49R H49Q E50K T55R N66S L68R G73S D77Y H81A L85F L85V G86R N87S V88A A90T A90V D91A D91V G94A G94C G94D G94R G94V V98M E101G E101K D102N D102G I105F S106L L107V G109V I113M I113T I114T G115A R116G V119L D125V D125G D126H L127S S135N N140K L145F L145S A146T C147R G148R V149G V149I I150T I152T
and the second input file. >gi|134611|sp|P00441.2|_Homo_sapiens MATKAVCVLKGDGPVQGIINFEQKESNGPVKVWGSIKGLTEGLHGFHVHEFGDNTAGCTSAGPHFNPLSR +KHGGPKDEERHVGDLGNVTADKDGVADVSIEDSVISLSGDHCIIGRTLVVHEKADDLGKGGNEESTKTG +NAGSRLACGVIGIAQ >gi|112419222|Xenopus_laevis AMVKAVCVLAGSGDVKGVVRFEQQDD-GDVTVEGKIEGLTDGNHGFHIHVFGDNTNGCLSAGPHFNPQNK +NHGSPKDADRHVGDLGNVTA-EGGVAQFKFTDPQISLKGERSIIGRTAVVHEKQDDLGKGGDDESLKTG +NAGGRLACGVIGFCP >gi|62858937|_Xenopus_(Silurana)_tropi... -MVRAVCVLAGSGDVKGVVHFQQQDE-GPVTVEGKIYGLTDGKHGFHIHEFGDNTNGCISAGPHFNPESK +THGAPEDAVRHVGDLGNVTA-KDGVAEFKLTDSLISLKGNHSIIGRCAVVHEKEDDLGKGGNDESLKTG +NAGGRLACGVIGLCQ >gi|226372562|_Rana_catesbeiana --MKAICVLKGSSEVTGVVRFEQEED-GPVTVTGQITGLTDGKHGFHIHTYGDNTDGCVSAGPHFNPQGK +THGGPDDEVRHVGDLGNVTS-AGGVADINIKDKLISLKGEHSIIGRTAVVHEKEDDLGKGGDNESLITG +NAGGRLACGVIGICQ >gi|116048074|_Scyliorhinus_torazame --MKAICVLKGTGEVTGTVQFDQAGG-GPVTVKGSITGLTPGKHGFHVHAFGDNTNGCISAGPHYNPFLK +THGGPGDEERHVGDLGNVEANGDGVATFEIQDNQLHLSGERSIIGRTLVVHEKEDDLGKGEDEESTRTG +NAGSRLACGVIGIAK >gi|216963348|_Ctenopharyngodon_idella -------------------YFEQEGEKSPVTLSGEITGLTAGKHGFHVHAFGDNTNGCISAGPHFNPYSK +NHGGPTDSERHVGDLGNVIAGENGVAKIDIVDKMLTLSGPDSIIGRTMVIHEKEDDLGKGGNEESLKTG +NAGGRLACGVIGITQ >gi|226232347|_Pimephales_promelas ---------------------------------------------------------------HFNPHTQ +NHGGPTDSARHVGDLGNVTAGENGVAKIDIVDKMLTLSGQHSIIGRTMVIHEKEDDLGKGGNE------ +--------------- >gi|238801237|_Hemibarbus_mylodon MAKKAVCVLKGTGEVTGTVFFEQETDGSPVKLSGTISGLTAGKHGFHVHVFGDNTNGCISAGPHFNPHNK +NHGGPTDGDRHVGDLGNVTAGESGVAKIDIVDKMLTLSGQHSIIGRTMVIHEKEDDLGKGGNEESLKTG +NAGGRLACGVIGITG >gi|47227092|_Tetraodon_nigroviridis MVIKAVCVLKGAGETSGTVYFEQQDEKAPVKLTGEIKGLTAGEHGFHVHAFGDNTNGCISAGPHYNPHNK +THAGPNDENRHVGDLGNVTAEADQIAKIDITDSVISLHGKFSIIGRTMVIHEKADDLGKGGNEESLKTG +NAGGRLACGVIGITQ >gi|225706520|_Osmerus_mordax MVLKAVCVLKGTGEVTGTVFFEQEGDNGPVKLTGEISGLTPGEHGFHVHAFGDNTNGCISAGPHFNPHSK +THGGPTDDVRHVGDLGNVTAGQDNVAKISIQDKHLTLNGVHSIIGRTMVIHEKADDLGKGGNEESLKTG +NAGGRLACGVIGITQ >gi|185132317|_Oncorhynchus_mykiss MAMKAVCVLKGTGEVTGTVFFEQEGADGPVKLIGEISGLAPGEHGFHVHAYGDNTNGCMSAGPHFNPHNQ +THGGPTDAVRHVGDLGNVTAGADNVAKINIQDKMLTLTGPDSIIGRTMVIHEKADDLGKGGNEESLKTG +NAGGRQACGVIGIAQ >gi|56790262|_Danio_rerio MVNKAVCVLKGTGEVTGTVYFNQEGEKKPVKVTGEITGLTPGKHGFHVHAFGDNTNGCISAGPHFNPHDK +THGGPTDSVRHVGDLGNVTADASGVAKIEIEDAMLTLSGQHSIIGRTMVIHEKEDDLGKGGNEESLKTG +NAGGRLACGVIGITQ >gi|185135289|_Salmo_salar MALKAVCVLKGTGEVTGTVFFEQEGDGAPVKLTGEIAGLTPGEHGFHVHAFGDNTNGCMSAGPHFNPHNH +THGGPTDTVRHVGDLGNVTAAADSVAKINIQDEILSLAGPHSIIGRTMVIHEKADDLGKGDNEESRKTG +NAGSRLACGVIGIAQ >gi|134284932|_Carassius_auratus ---------------------------------------------FHVHAFGDNTNGCTSAGPHYNPHNQ +THGGPTDSVRHVGDLGNV--------------------------------------------------- +--------------- >gi|110180503|_Oryzias_javanicus ----------------------------------------PGEHGFHVHAFGDNTNGCISAGPHFNPYGK +DHAGPTDEHRHVGDLGNVTANAENVAKLDFTDKVITLAGPHSIIGRTMVIHEKKDDLGKGGNEESLKTG +NA------------- >gi|229365862|_Anoplopoma_fimbria MVVKAVCVLKGAGETSGVVHFEQEGDTAAVKLTGEIIGLTPGEHGFHVHAFGDNTNGCISAGPHFNPHNN +THAGPTDEQRHVGDLGNVTAGGDNIAKIDITDKIITLTGQHSIIGRTMVIHEKADDLGKGGNDESLKTG +NAGARLACGVIGIAQ >gi|226934254|_Dicentrarchus_labrax ---------------------------------------------------------------------- +---------RHVGDLGDVTAGGDNIAKIDITDKMLTLTGPLFIIGRTMVIHEKADDLGKGGNEESLKTG +--------------- >gi|54873355|_Sebastes_schlegelii ---------------------------------GEIKGLTPGEHGFHVHAFGDNTNGCISAGPHFNPHGK +DHAGPTDQERHVGDLGNVTAGAANVAKIDITDKMLTLTGPLSIIRRTMVIHEKKDDLGKGGNEESLKTG +NAGG----------- >gi|62550923|_Sparus_aurata -------------------------------------------------------------------HGK +NHGGPTDAERHVGDLGNVTAGADNVAKIDITDKMLTLSGPLSIIGRTMVIHEKVDDLGKGGNEE----- +--------------- >gi|27462182|_Pagrus_major MVQKAVCVLKGAGETTGVVHFEQESESAPVTLKGEISGLTPDEHGFHVHAFGDNTNGCISAGPHFNPHNK +NHAGPTDAERHVGDLGNVTAGADNVAKIDITDKMLTLNGPFSIIGRTMVIHEKADDLGKGGNEESLKTG +NAGGRLACGVIGICQ >gi|12733941|_Platichthys_flesus -----------------------------------IAGLAPGEHGFHVHSFGDNTNGCMSAGPHFNPHGK +NHAGPTDADRHVGDLGNVTAGADNVAEINISDKMLTLNGPNSIIGRTMVIHEKADDLGKGGNDESLKTG +NA------------- >gi|151549024|Paralichthys_olivaceus ------------------------------------------EHGFHVHAFGDNTNGCISAGPHFNPHGK +NHAGPTDAERHVGDLGNVTAGKDNVAEINISDKIITLFGAHSIIGRTMVIHEKADDLGKGGNEESLKTG +NAGARLACGVIG--- >gi|57908848|_Trematomus_bernacchii ---KAVCVFKGTGEASGTVFFEQENDSAPVKLTGEIKGLTPGEHGFHVHAFGDNTNGCISAGPHFNPHNK +THAGPTDEDRHVGDLGNVTAAADNVAKLNITDKMITLAGQYSIIGRTMVIHEKADDLGKGGNDESLKTG +NAGGRLACGVIGIAQ >gi|57908852|Chionodraco_hamatus ---KAVCVFKGAGEASGTVFFEQETDSCPVKLTGEIKGLTPGEHGFHVHAFGDNTNGCISAGPHFNPHNK +THAGPTDENRHVGDLGNVTAAADNVAKLDITDKMITLAGQYSIIGRTMVIHEKADDLGKGGNDESLKTG +NAGGRLACGVIGIAQ >gi|157152709|_Takifugu_obscurus MAMKAVCVLKGAGDTSGTVYFEQENESAPVKLTGEIKGLTPGEHGFHVHAFGDNTNGCISAGPHYNPHNK +THAGPTDADRHVGDLGNVTAGADNIAKIDIKDSMLTLTGPYSIIGRTMVIHEKADDLGKGGNEESLKTG +NAGGRLACGVIGITQ >gi|67772081|_Siniperca_chuatsi --------------------------------------FTPGEHGSHVHVFGDNTNGCISAGPHYNPHGK +NHAGPNDAERHVGDLGNVTAGADNVAKIDITDKMPSLTGPYSIIGRTMVIHEKADDLGKGGNEESLKTG +NAGGRLACGVIGITQ >gi|40218091|_Oreochromis_mossambicus MVLKAVCVLKGTGDTSGTVYFEQENDSAPVKLTGEIKGLTPGEHGFHVHAFGDNTNGCISAGPHFNPYNK +NHGGPKDAERHVGDLGNVTAGADNVAKIEITDKVITLTGRDSIIGRTMVIHEKVDDLXKGGNEESLKTG +NAGGRLACGVIGITQ >gi|37542151|_Epinephelus_malabaricus MVLKAVCVLKGAGETSGTVYFEQETDSAPVKLTGEIKGLTPGEHGFQVHAFGDNTNGCISAGPHFNPHNK +HHAGPTDAERHVGDLGNVTAGGDNVAKIDITDKIITLNGPYSIIGRTMVIHEKADDLGTGGNEESLKTG +NAGGRLACGVIGISQ >gi|56785775|Epinephelus_coioides MDLKAVCVLKGAGETSGTVYFEQESDSAPVKLTGEIKGLTPGEHGFHVHAFGDNTNGCISAGPHFNPHNK +QHAGPTDADRHVGDLGNVTAGGDNVAKIDITDKMLTLNGPYSIIGRTMVIHEKADDLGRGGNDESLKTG +NAGGRLACGVIGIAQ >gi|47607437|_Oplegnathus_fasciatus MVLKAVCVLKGAGETTGTVYFEQESDSAPVKLTGEIKGLTPGEHGFHVHAFGDNTNGCISAGPHFNPHNK +NHAGPNDAERHVGDLGNVTAGADNVAKIDIKDHIITLTGPDSIIGRTMVIHEKADDLGKGGNEESLKTG +NAGGRLACGVIGITQ >gi|115392225|_Rachycentron_canadum MVLKAVCVLKGAGETTGTVYFEQESDSAPVKVTGEIKGLTPGEHGFHVHAFGDNTNGCISAGPHFNPHNK +NHAGPNDEERHIGDLGNVTAGADNVAKVDITDKMLTLNGPYSIIGRTMVIHEKADDLGKGGNEESLKTG +NAGGRLACGVIGIAQ >gi|224044145|_Taeniopygia_guttata AAMRAVCVMQGEGAVKGVIHFEQQGT-GPVKVTGEITGLADGEHGFHVHEFGDNTNGCTSAGPHFNPEQK +KHGGPSDAERHVGDLGNVTA-KGGVAQVSIQDSVISLSGPHCIIGRTMVVHERRDDLGRGGNDESLLTG +NAGPRLACGVIGIAK >gi|45384218|_Gallus_gallus ATLKAVCVMKGDAPVEGVIHFQQQGS-GPVKVTGKITGLSDGDHGFHVHEFGDNTNGCTSAGAHFNPEGK +QHGGPKDADRHVGDLGNVTA-KGGVAEVEIEDSVISLTGPHCIIGRTMVVHAKSDDLGRGGDNESKLTG +NAGPRLACGVIGIAK >gi|29373121|_Melopsittacus_undulatus ATLKAVCVMKGEGPVQGVIHFQQQGN-GPVKVTGKISGLADGDHGFHVHEFGDNTNGCTSAGPHFNPEGK +QHGGPSDAERHVGDLGNVTA-KGGVAEVAIEDSIISLSGPHSIVGRTMVVHEKCDDLGRGGDNESKLTG +NAGPRLACGVIGIAK >gi|89515076|_Bufo_gargarizans -MVKAICVLKGNGPVHGIVGFNQDG--GEVTVKGTINGLTDGLHGFHIHVYGDNTNGCMSAGPHFNPHGK +SHGAPEDEERHVGDLGNITS-KDGVAEFEFKDKIISLEGEHNIIGRTAVVHEKADDLGKGGDNESKVTG +NAGGRLACGVIGICQ >gi|226844835|_Trachemys_scripta_elegans ---------------------------------------------------------CTSAGAHFNPNGK +NHGGPQDKERHVGDLGNVIANKDGVAEVSIKDSLISLTGPLSIIGRTMVVHEKEDDLGKGNN------- +--------------- >gi|265797|_Caretta_caretta ---------------------------ATVKAVCVLKGEDPVKEPVKGPVKEPVKGIIYFEQQGN-GPVT +LSGSITGLTEGKHGFHVHEFGDNTNGCTSAGAHFNPPGKNHGGPQDNERHVGDLGNVIANKEGVAEVCI +KDSLISLTGSQSIIG >gi|126352669|_Equus_caballus MALKAVCVLKGDGPVHGVIHFEQQQEGGPVVLKGFIEGLTKGDHGFHVHEFGDNTQGCTTAGAHFNPLSK +KHGGPKDEERHVGDLGNVTADENGKADVDMKDSVISLSGKHSIIGRTMVVHEKQDDLGKGGNEESTKTG +NAGSRLACGVIGIAP >gi|126325231|_Monodelphis_domestica MVLKAVCVLKGDGPVQGTIFFEQKQVGEPVELSGSIKGLAEGDHGFHVHEFGDNTQGCTSAGAHFNPHSK +KHGGPTDEERHVGDLGNVTANKDGVATVSIKDSHIELSGPMSIIGRTMVVHEKADDLGKGGNAESEKTG +NAGPRLACGVIGIAK >gi|130497065|_Oryctolagus_cuniculus MATKAVCVLKGDGPVEATIHFEQKGT-GPVVVKGRITGLTEGLHEFHVHQFGDNRQGCTSAGPHFNPLSK +KHGGPKDEERHVGDLGNVTAGSNGVADVLIEDSVISLSGDMSVIGRTLVVHEKEDDLGKGGNDESTKTG +NAGSRLACGVIGISP >gi|74136167|_Macaca_mulatta MAMKAVCVLKGDSPVQGTINFEQKESNGPVKVWGSITGLTEGLHGFHVHQFGDNTQGCTSAGPHFNPLSR +QHGGPKDEERHVGDLGNVTAGKDGVAKVSFEDSVISLSGDHSIIGRTLVVHEKADDLGKGGNEESKKTG +NAGGRLACGVIGIAQ >gi|84579183|_Macaca_fascicularis MAMKAVCVLKGDSPVQGTINFEQKESNGPVKVWGSITGLTEGLHGYHVHQFGDNTQGCTSAGPHFNPLSR +QHGGPKDEERHVGDLGNVTAGKDGVAKVSFEDSVISLSGDHSIIGRTLVVHEKADDLGKGGNEESKKTG +NAGGRLACGVIGIAH >gi|197102620|_Pongo_abelii MATKAVCVLKGDSPVKGIINFEQKERNGPVKVWGSIEGLTEGLHGFHVHEFGDNTVGCTSAGPHFNPLSR +KHGGPKDEERHVGDLGNVTADKDGVVSVSIEDSVISLSGDHCIIGRTLVVHEKADDLGKGGNEESTKTG +NAGSRLACGVIGIAQ >gi|223633904|_Ovis_aries MATKAVCVLKGDGPVQGTIRFEAKGD--KVVVTGSITGLTEGDHGFHVHQFGDNTQGCTSAGPHFNPLSK +KHGGPKDEERHVGDLGNVKADKNGVAIVDIVDPLISLSGEYSIIGRTMVVHERPDDLGRGGNEESTKTG +NAGGRLACGVIGIAP >gi|194672519|_Bos_taurus MATKAVCVLKGDGPVQGTIHFEAKGN--TVVVTGSITGLTEGDHGFHVHQFGDNTQGCTSAGPHFNPLSK +KHSGPKDEERHVGDLGNVTADKNGVAVVDIVDSLISLSGEYSIIGRTMVVHEKPDDLGRGGNEESTKTG +NAGSRLACGVIGIAK >gi|2660692|_Cervus_elaphus MATKAVCVMKGDGPVQGTIRFEAKGN--TVVVTGSITGLTEGDHGFHVHQFGDNTQGCTSAGPHFNPLSK +KHGGPKDEERHVGDLGNVTADKNGVAKVDIVDSLISLSGEHSIIGRTMVVHEKPDDLGRGGNEESTKTG +NARNRLACGVIGIAQ >gi|39578718|_Cavia_porcellus -ATKAVCVLKGDGPVQGIIHFEQKAN-GPVVVKGRITGLVEGKHGFHVHEFGDNTQGCTSAGPHFNPLSK +KHGGPQDEERHVGDLGNVTAGADGVANVSIEDSLISLSGANSIIGRTMVVHEKPDDLGKGGNEESTKTG +NAGSRLACGVIGIAQ >gi|15082144|_Sus_scrofa ---KAVCVLKGDGPVQGTIYFELKGE-KTVLVTGTIKGLAEGDHGFHVHQFGDNTQGCTSAGPHFNPESK +KHGGPKDQERHVGDLGNVTAGKDGVATVYIEDSVIALSGDHSIIGRTMVVHEKPDDLGRGGNEESTKTG +NAGSRLACGVIG--- >gi|281348263|_Ailuropoda_melanoleuca --------------------------------------------------------GCTSAGPHFNPLSK +KHGGPKDEERHVGDLGNVTAGKDGVATVSLEDSLIALSGDHSIIGRTMVVHEKRDDLGKGGNEESTQTG +NAGSRLACGVIGIAK >gi|8394328|_Rattus_norvegicus MAMKAVCVLKGDGPVQGVIHFEQKASGEPVVVSGQITGLTEGEHGFHVHQYGDNTQGCTTAGPHFNPHSK +KHGGPADEERHVGDLGNVAAGKDGVANVSIEDRVISLSGEHSIIGRTMVVHEKQDDLGKGGNEESTKTG +NAGSRLACGVIGIAQ >gi|45597447|_Mus_musculus MAMKAVCVLKGDGPVQGTIHFEQKASGEPVVLSGQITGLTEGQHGFHVHQYGDNTQGCTSAGPHFNPHSK +KHGGPADEERHVGDLGNVTAGKDGVANVSIEDRVISLSGEHSIIGRTMVVHEKQDDLGKGGNEESTKTG +NAGSRLACGVIGIAQ >gi|55925004|_Mus_spretus ------------------------------------------------HQYGDNTQGCTSAGPHFNPHS- +--------------------------------------------------------------------- +--------------- END
In reply to Re^6: Use of uninitialized value in string eq
by sophix
in thread Use of uninitialized value in string eq
by sophix
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |