#!/usr/bin/env perl use strict; use warnings; use autodie; my $dir = '.'; opendir(my $dh, $dir); for (readdir $dh) { next unless /^(.+?)\.fasta/; my ($id_path, $fasta_path) = ("$dir/$1.txt", "$dir/$_"); next unless -e $id_path; my %ids = map +($_ => 1), split ' ', do { local $/; open my $fh, '<', $id_path; <$fh>; }; { open my $fh, '<', $fasta_path; local $/ = '>'; while (<$fh>) { chomp; if (/\A[^|]+\|([^)]+)/) { print { _get_out_fh($dir, $1) } "$/$_" if $ids{$1}; } } } } _close_all_out_fhs(); closedir $dh; { my %out_fh_for; sub _get_out_fh { my ($dir, $id) = @_; unless (exists $out_fh_for{$id}) { open $out_fh_for{$id}, '>', "$dir/$id.fasta"; } return $out_fh_for{$id}; } sub _close_all_out_fhs { close $_ for values %out_fh_for; return; } } #### ken@titan ~/tmp/pm_11145943 $ ls -l total 8 -rwxr-xr-x 1 ken None 976 Aug 5 08:26 fasta_munge.pl -rw-r--r-- 1 ken None 257 Aug 5 08:28 one.fasta -rw-r--r-- 1 ken None 23 Aug 5 05:32 one.txt -rw-r--r-- 1 ken None 402 Aug 5 08:28 two.fasta -rw-r--r-- 1 ken None 23 Aug 5 05:35 two.txt ken@titan ~/tmp/pm_11145943 $ cat one.fasta >one:VFG000033(gb|WP_002208793) PGRTPGVERVAHRDDDEHSWHLYAIRIHPQAPLKCDDFIVRMTENGIGCSVHYVPLHLQP YWRDRYGLTPDMYPHSQAAFEGMASLPIYSRMTDADVQRVIASVRQLLRP >one:VFG000036(gb|NP_490509) ALLISLMALGVKAGDEVITTSFTFVATAEVIALLGAKPVFVDVEPDTCNIKVSEIEAKIT PRTKAIIPVSLYGQCGDMDEV ken@titan ~/tmp/pm_11145943 $ cat one.txt WP_002208793 NP_490509 ken@titan ~/tmp/pm_11145943 $ cat two.fasta >two:VFG000033(gb|WP_002208793) PGRTPGVERVAHRDDDEHSWHLYAIRIHPQAPLKCDDFIVRMTENGIGCSVHYVPLHLQP YWRDRYGLTPDMYPHSQAAFEGMASLPIYSRMTDADVQRVIASVRQLLRP >two:VFG000032(gb|WP_002208792) PGRTPGVERVAHRDDDEHSWHLYAIRIHPQAPLKCDDFIVRMTENGIGCSVHYVPLHLQP YWRDRYGLTPDMYPHSQAAFEGMASLPIYSRMTDADVQRVIASVRQLLRP >two:VFG000036(gb|NP_490509) ALLISLMALGVKAGDEVITTSFTFVATAEVIALLGAKPVFVDVEPDTCNIKVSEIEAKIT PRTKAIIPVSLYGQCGDMDEV ken@titan ~/tmp/pm_11145943 $ cat two.txt WP_002208792 NP_490509 ken@titan ~/tmp/pm_11145943 $ ./fasta_munge.pl ken@titan ~/tmp/pm_11145943 $ ls -l total 11 -rwxr-xr-x 1 ken None 976 Aug 5 08:26 fasta_munge.pl -rw-r--r-- 1 ken None 224 Aug 5 08:31 NP_490509.fasta -rw-r--r-- 1 ken None 257 Aug 5 08:28 one.fasta -rw-r--r-- 1 ken None 23 Aug 5 05:32 one.txt -rw-r--r-- 1 ken None 402 Aug 5 08:28 two.fasta -rw-r--r-- 1 ken None 23 Aug 5 05:35 two.txt -rw-r--r-- 1 ken None 145 Aug 5 08:31 WP_002208792.fasta -rw-r--r-- 1 ken None 145 Aug 5 08:31 WP_002208793.fasta ken@titan ~/tmp/pm_11145943 $ cat NP_490509.fasta >one:VFG000036(gb|NP_490509) ALLISLMALGVKAGDEVITTSFTFVATAEVIALLGAKPVFVDVEPDTCNIKVSEIEAKIT PRTKAIIPVSLYGQCGDMDEV >two:VFG000036(gb|NP_490509) ALLISLMALGVKAGDEVITTSFTFVATAEVIALLGAKPVFVDVEPDTCNIKVSEIEAKIT PRTKAIIPVSLYGQCGDMDEV ken@titan ~/tmp/pm_11145943 $ cat WP_002208792.fasta >two:VFG000032(gb|WP_002208792) PGRTPGVERVAHRDDDEHSWHLYAIRIHPQAPLKCDDFIVRMTENGIGCSVHYVPLHLQP YWRDRYGLTPDMYPHSQAAFEGMASLPIYSRMTDADVQRVIASVRQLLRP ken@titan ~/tmp/pm_11145943 $ cat WP_002208793.fasta >one:VFG000033(gb|WP_002208793) PGRTPGVERVAHRDDDEHSWHLYAIRIHPQAPLKCDDFIVRMTENGIGCSVHYVPLHLQP YWRDRYGLTPDMYPHSQAAFEGMASLPIYSRMTDADVQRVIASVRQLLRP ken@titan ~/tmp/pm_11145943 $