file1 >Chr1.ID=PAC:19650373;Name=AT1G11900.1;pacid=19650373;longest=1;Parent=AT1G11900.len361.i1.1_111 GTATGAATTCCAAAAATCCAGAACCGTTTTCGTGATTCATGTTATGCTCTCGTTGTTGTTTTCTGATTGTTACTGCTCAGCGAGTTTCTTCTATCAATGTTTGATTCGATGAAGATGCGAAATTTCGAACCATTGCTGTTCTTCTGAGTTTGATCGTTTTTTAGTTTCGGGGCTTTCACGCTTCAGCTAGTGTTTGATTACGAAGTTTTCTGATTAAATGTGTGAGTTTTTTTGTAGTCATCTCGAAAACTGAGAAATCCATTTTTATAGATTTACATTGTTCATAGTTATATGTGGAAGTTGATGATTGATGGTGATTCTGCAAATTGATGATTTGGTTTTCTGTTTATTGGCATTGCAG >Chr1.ID=PAC:19657430;Name=AT1G76660.1;pacid=19657430;longest=1;Parent=AT1G76660.len490.i1.2_394 GTTTCTGTTTAATTCTCTCTATTTTCGTTTGATTTCGACTTCTTGAGCTTTTACTTCTCTCTGTCTCAGTTCTAACTTCTTCAGATTTTAAAGCTTTCGTTTTTTTGGCAAGTTGTTTTTTTTTCCTACTTAGATCTGACTACTCCGACTCTGTTCACACTAATGTTCGTTAGGGTTTATGTTGAATCTCTCCTTTGATCATTATGTTATTGTAAAAATCCCAGCTTTATGCTAAATCGAGCTAGTGATTCTTGAGAATTGAACAAAAAAGTTTTACATTTTTCTGAATTGCCATTCAATTAGAAGAAGAAAAAATTCAACCTTTTACTGGTTATGATCTAGATTCGATGCGTGTAAGCTATAAGATCACCATTTCGTGCTTTAGATCCATAATCATTGATTCACTATATGGCAATTATCTTCTTGCTTCACAGATCTCTTTTACACTTACATGTCAAGTGTCTGAGTGTGTGTGTGTGTCCTTTTGCAG >Chr1.ID=PAC:19657550;Name=AT1G53750.1;pacid=19657550;longest=1;Parent=AT1G53750.len344.i1.9_229 GTAAGCCTTCCCCTTTTAGAACCCTAAGTTTTATTGGGGTTTTCGATTTTTACTCTTCTGATTCATCGGAGAATTCGGATCTACACTAGATTTTAGTTACTCGAATGTGAGGGTTTCGTCTCTTTGCAAACCAATTTGATGTTTCCTCCTGAGCTAGATATGTTCTTGATGAGCTTGATTTTTCTACTTGGTTCAGTTTTTTTTGCTAATTACTACTTATATGAGTGAATCTGCCTCACTGTTTGAATTTATTCCAAGTGGAAATATTCATAGTCATGCTTTGTTGTATCTGTTATCTCTCCATATGTTGTGTTGCTGACCTTGTTAAATCTCATTCTCTGCAG >Chr1.ID=PAC:19650963;Name=AT1G47740.1;pacid=19650963;longest=1;Parent=AT1G47740.len1091.i1.4_277 GTAAGTTAATCTTCTCTTCTGAAAATTGAATTTGGTGTATCAATTCTTACATTATCTTGAAGATTCATCTCTGAATTTCTCAAATTTATGGGGGTTTTTTGTTTGTCGGAATTGCCGGAGAAATTGGAAAAAACGAGATCTTTGAGTAAAGGGTTTGTTTAATCTTTAGTCTTTATTGCTTTCCTTAAGCTAATTTTGGCAGATCTGGAACATAAACCCTAGAACAAGACCAAATCAGTGTCTCCTTACTCTTAGGGATTTTAGTCTCTGTGATACCTTAAATGTGTTTATAAATTGACTGTGCTTAATGGGTCACATTTGATTTGCAATAAAAGTTTCACAATTCTTCCATTTTCAGTAATGTAAGCTCCAGTTTTCAAGATTTACTATTTTGGGGAACTAGTTAGGTTTGTAGGTTTATTAGATCTTAGAAACACTAATGTAATGCTGTTTGTTTGGTACTCTTTAATATTCACTATTCATCTTAATGTGGAATCAAATTTGCTTTTTTTGGTCAGACTCAATTAGTTGGAATGAGTTGTTGAACCATTGACTTGTCTCCTAAGCTCTTACTTAATCTATCTGTATCATCTTCTCCTGTCTATTTCTCTTTATTTAATACATAACTCGTTCATGATTGCATCTGACATGGTAGCAACTCTTTGTGAACAAGACTTGATTTTATAACACTGTAACATGACCACACTTTTTCCTTTTGATCTCTGTATATTTGGGGAAGTAAGGATTTGGTTTAACAATGATACAAAATCACAGTTTAGATGACTCAGTCTTGTCTTTTATCATTAAGATGGAAAAAATGAGACCAATCTTGTCTTGCTTTATTCTAAGATGCATGCTCTATTATATTTTATGCTTTTGTATATATAACTGATTGAAGCCGGCCAATGGAGATTGGTGCTGACTTTTTAAATGAGCTGTTGTTGTATTTCAGCCAACTAGCATAAGATAAAATAAAATAGGAAAATTTTTCATTTCAGTCTCTTAAGTTCAATGAAAACATAATGTGGCACAGAGGTCTTTGCTTTTGTACCTTTCAGAATCTTTTATTGATTGATGATGTTTACATACAG >Chr1.ID=PAC:19652608;Name=AT1G31420.1;pacid=19652608;longest=1;Parent=AT1G31420.len415.i1.13_217 GTTCTTATAATTCTTACTAATTCTGTTTGCTTTTAAAGCTATAACCTTTGATATTGTTGGAAAGAGTGGGGTTTTGGGTCTTTTGCTGAATCGTTTTTTGGATTTGTTATATTGTTCGAATCTTCAGTTGTTATTGTGTTATAGGATTGGATGGTATCTGGGAATTTCGTAATCTATGTTAAGCTAGAGCTGTTTTTGAGCTCTTTTGTTGATGATGATTTTGAGATTGTTGGCCGAATTTAGCTCTCGTTTTCTGATTTTAGCAATTGGAAAGTGTGTATTGGTTCTTGTGAGGCAATTTCACTGTTTTGAGTACTCAAAATGTAGATGAGAGCATGCATAAGTTGTGTGGAGACTGAGCTTAATGTGTAGTGTAATTGACAATTAGTTTTGTGGGCTTTCCTTTGTTTTTCAG #### file2 >Chr1.ID=PAC:19650373;Name=AT1G11900.1;pacid=19650373;longest=1;Parent=AT1G11900.len361.i1.1_111 100 >Chr1.ID=PAC:19657430;Name=AT1G76660.1;pacid=19657430;longest=1;Parent=AT1G76660.len490.i1.2_394 34 >Chr1.ID=PAC:19652608;Name=AT1G31420.1;pacid=19652608;longest=1;Parent=AT1G31420.len415.i1.13_217 76 #### #!/usr/bin/perl use strict; use warnings; use diagnostics; use FAlite; die "usage: $0 \n" unless @ARGV == 2; open(FASTA, $ARGV[0]) or die; my $fasta = new FAlite(\*FASTA); while (my $entry = $fasta->nextEntry) { my $gseq = $entry->seq; my ($chrom) = $entry->def =~ /(^\S+)/; my @a1 = $chrom; my %hash; $hash{$_}++ for @a1; my $header; open(LIST, $ARGV[1]) or die; while () { next if /^#/; my ($header, $score) = split; my @a2 = $header; for my $item (@a2) { if ($hash{$item}) {print "$chrom\n$gseq\n"}; } } } close FASTA; close LIST; __END__ #### package FAlite; use strict; sub new { my ($class, $fh) = @_; if (ref $fh !~ /GLOB/) {die ref $fh, "\n", "FAlite ERROR: expect a GLOB reference\n"} my $this = bless {}; $this->{FH} = $fh; while(<$fh>) {last if $_ =~ /\S/} # not supposed to have blanks, but... my $firstline = $_; if (not defined $firstline) {warn "FAlite: Empty\n"; return $this} if ($firstline !~ /^>/) {warn "FAlite: Not FASTA formatted\n"; return $this} $this->{LASTLINE} = $firstline; chomp $this->{LASTLINE}; return $this; } sub nextEntry { my ($this) = @_; return 0 if not defined $this->{LASTLINE}; my $fh = $this->{FH}; my $def = $this->{LASTLINE}; my @seq; my $lines_read = 0; while(<$fh>) { $lines_read++; if ($_ =~ /^>/) { $this->{LASTLINE} = $_; chomp $this->{LASTLINE}; last; } push @seq, $_; } return 0 if $lines_read == 0; chomp @seq; my $entry = FAlite::Entry::new($def, \@seq); return $entry; } package FAlite::Entry; use overload '""' => 'all'; sub new { my ($def, $seqarry) = @_; my $this = bless {}; $this->{DEF} = $def; $this->{SEQ} = join("", @$seqarry); $this->{SEQ} =~ s/\s//g; # just in case more spaces return $this; } sub def {shift->{DEF}} sub seq {shift->{SEQ}} sub all {my $e = shift; return $e->{DEF}."\n".$e->{SEQ}."\n"} 1;