file1
>Chr1.ID=PAC:19650373;Name=AT1G11900.1;pacid=19650373;longest=1;Parent=AT1G11900.len361.i1.1_111
GTATGAATTCCAAAAATCCAGAACCGTTTTCGTGATTCATGTTATGCTCTCGTTGTTGTTTTCTGATTGTTACTGCTCAGCGAGTTTCTTCTATCAATGTTTGATTCGATGAAGATGCGAAATTTCGAACCATTGCTGTTCTTCTGAGTTTGATCGTTTTTTAGTTTCGGGGCTTTCACGCTTCAGCTAGTGTTTGATTACGAAGTTTTCTGATTAAATGTGTGAGTTTTTTTGTAGTCATCTCGAAAACTGAGAAATCCATTTTTATAGATTTACATTGTTCATAGTTATATGTGGAAGTTGATGATTGATGGTGATTCTGCAAATTGATGATTTGGTTTTCTGTTTATTGGCATTGCAG
>Chr1.ID=PAC:19657430;Name=AT1G76660.1;pacid=19657430;longest=1;Parent=AT1G76660.len490.i1.2_394
GTTTCTGTTTAATTCTCTCTATTTTCGTTTGATTTCGACTTCTTGAGCTTTTACTTCTCTCTGTCTCAGTTCTAACTTCTTCAGATTTTAAAGCTTTCGTTTTTTTGGCAAGTTGTTTTTTTTTCCTACTTAGATCTGACTACTCCGACTCTGTTCACACTAATGTTCGTTAGGGTTTATGTTGAATCTCTCCTTTGATCATTATGTTATTGTAAAAATCCCAGCTTTATGCTAAATCGAGCTAGTGATTCTTGAGAATTGAACAAAAAAGTTTTACATTTTTCTGAATTGCCATTCAATTAGAAGAAGAAAAAATTCAACCTTTTACTGGTTATGATCTAGATTCGATGCGTGTAAGCTATAAGATCACCATTTCGTGCTTTAGATCCATAATCATTGATTCACTATATGGCAATTATCTTCTTGCTTCACAGATCTCTTTTACACTTACATGTCAAGTGTCTGAGTGTGTGTGTGTGTCCTTTTGCAG
>Chr1.ID=PAC:19657550;Name=AT1G53750.1;pacid=19657550;longest=1;Parent=AT1G53750.len344.i1.9_229
GTAAGCCTTCCCCTTTTAGAACCCTAAGTTTTATTGGGGTTTTCGATTTTTACTCTTCTGATTCATCGGAGAATTCGGATCTACACTAGATTTTAGTTACTCGAATGTGAGGGTTTCGTCTCTTTGCAAACCAATTTGATGTTTCCTCCTGAGCTAGATATGTTCTTGATGAGCTTGATTTTTCTACTTGGTTCAGTTTTTTTTGCTAATTACTACTTATATGAGTGAATCTGCCTCACTGTTTGAATTTATTCCAAGTGGAAATATTCATAGTCATGCTTTGTTGTATCTGTTATCTCTCCATATGTTGTGTTGCTGACCTTGTTAAATCTCATTCTCTGCAG
>Chr1.ID=PAC:19650963;Name=AT1G47740.1;pacid=19650963;longest=1;Parent=AT1G47740.len1091.i1.4_277
GTAAGTTAATCTTCTCTTCTGAAAATTGAATTTGGTGTATCAATTCTTACATTATCTTGAAGATTCATCTCTGAATTTCTCAAATTTATGGGGGTTTTTTGTTTGTCGGAATTGCCGGAGAAATTGGAAAAAACGAGATCTTTGAGTAAAGGGTTTGTTTAATCTTTAGTCTTTATTGCTTTCCTTAAGCTAATTTTGGCAGATCTGGAACATAAACCCTAGAACAAGACCAAATCAGTGTCTCCTTACTCTTAGGGATTTTAGTCTCTGTGATACCTTAAATGTGTTTATAAATTGACTGTGCTTAATGGGTCACATTTGATTTGCAATAAAAGTTTCACAATTCTTCCATTTTCAGTAATGTAAGCTCCAGTTTTCAAGATTTACTATTTTGGGGAACTAGTTAGGTTTGTAGGTTTATTAGATCTTAGAAACACTAATGTAATGCTGTTTGTTTGGTACTCTTTAATATTCACTATTCATCTTAATGTGGAATCAAATTTGCTTTTTTTGGTCAGACTCAATTAGTTGGAATGAGTTGTTGAACCATTGACTTGTCTCCTAAGCTCTTACTTAATCTATCTGTATCATCTTCTCCTGTCTATTTCTCTTTATTTAATACATAACTCGTTCATGATTGCATCTGACATGGTAGCAACTCTTTGTGAACAAGACTTGATTTTATAACACTGTAACATGACCACACTTTTTCCTTTTGATCTCTGTATATTTGGGGAAGTAAGGATTTGGTTTAACAATGATACAAAATCACAGTTTAGATGACTCAGTCTTGTCTTTTATCATTAAGATGGAAAAAATGAGACCAATCTTGTCTTGCTTTATTCTAAGATGCATGCTCTATTATATTTTATGCTTTTGTATATATAACTGATTGAAGCCGGCCAATGGAGATTGGTGCTGACTTTTTAAATGAGCTGTTGTTGTATTTCAGCCAACTAGCATAAGATAAAATAAAATAGGAAAATTTTTCATTTCAGTCTCTTAAGTTCAATGAAAACATAATGTGGCACAGAGGTCTTTGCTTTTGTACCTTTCAGAATCTTTTATTGATTGATGATGTTTACATACAG
>Chr1.ID=PAC:19652608;Name=AT1G31420.1;pacid=19652608;longest=1;Parent=AT1G31420.len415.i1.13_217
GTTCTTATAATTCTTACTAATTCTGTTTGCTTTTAAAGCTATAACCTTTGATATTGTTGGAAAGAGTGGGGTTTTGGGTCTTTTGCTGAATCGTTTTTTGGATTTGTTATATTGTTCGAATCTTCAGTTGTTATTGTGTTATAGGATTGGATGGTATCTGGGAATTTCGTAATCTATGTTAAGCTAGAGCTGTTTTTGAGCTCTTTTGTTGATGATGATTTTGAGATTGTTGGCCGAATTTAGCTCTCGTTTTCTGATTTTAGCAATTGGAAAGTGTGTATTGGTTCTTGTGAGGCAATTTCACTGTTTTGAGTACTCAAAATGTAGATGAGAGCATGCATAAGTTGTGTGGAGACTGAGCTTAATGTGTAGTGTAATTGACAATTAGTTTTGTGGGCTTTCCTTTGTTTTTCAG
####
file2
>Chr1.ID=PAC:19650373;Name=AT1G11900.1;pacid=19650373;longest=1;Parent=AT1G11900.len361.i1.1_111 100
>Chr1.ID=PAC:19657430;Name=AT1G76660.1;pacid=19657430;longest=1;Parent=AT1G76660.len490.i1.2_394 34
>Chr1.ID=PAC:19652608;Name=AT1G31420.1;pacid=19652608;longest=1;Parent=AT1G31420.len415.i1.13_217 76
####
#!/usr/bin/perl
use strict;
use warnings;
use diagnostics;
use FAlite;
die "usage: $0 \n" unless @ARGV == 2;
open(FASTA, $ARGV[0]) or die;
my $fasta = new FAlite(\*FASTA);
while (my $entry = $fasta->nextEntry) {
my $gseq = $entry->seq;
my ($chrom) = $entry->def =~ /(^\S+)/;
my @a1 = $chrom;
my %hash;
$hash{$_}++ for @a1;
my $header;
open(LIST, $ARGV[1]) or die;
while () {
next if /^#/;
my ($header, $score) = split;
my @a2 = $header;
for my $item (@a2) {
if ($hash{$item}) {print "$chrom\n$gseq\n"};
}
}
}
close FASTA;
close LIST;
__END__
##
##
package FAlite;
use strict;
sub new {
my ($class, $fh) = @_;
if (ref $fh !~ /GLOB/)
{die ref $fh, "\n", "FAlite ERROR: expect a GLOB reference\n"}
my $this = bless {};
$this->{FH} = $fh;
while(<$fh>) {last if $_ =~ /\S/} # not supposed to have blanks, but...
my $firstline = $_;
if (not defined $firstline) {warn "FAlite: Empty\n"; return $this}
if ($firstline !~ /^>/) {warn "FAlite: Not FASTA formatted\n"; return $this}
$this->{LASTLINE} = $firstline;
chomp $this->{LASTLINE};
return $this;
}
sub nextEntry {
my ($this) = @_;
return 0 if not defined $this->{LASTLINE};
my $fh = $this->{FH};
my $def = $this->{LASTLINE};
my @seq;
my $lines_read = 0;
while(<$fh>) {
$lines_read++;
if ($_ =~ /^>/) {
$this->{LASTLINE} = $_;
chomp $this->{LASTLINE};
last;
}
push @seq, $_;
}
return 0 if $lines_read == 0;
chomp @seq;
my $entry = FAlite::Entry::new($def, \@seq);
return $entry;
}
package FAlite::Entry;
use overload '""' => 'all';
sub new {
my ($def, $seqarry) = @_;
my $this = bless {};
$this->{DEF} = $def;
$this->{SEQ} = join("", @$seqarry);
$this->{SEQ} =~ s/\s//g; # just in case more spaces
return $this;
}
sub def {shift->{DEF}}
sub seq {shift->{SEQ}}
sub all {my $e = shift; return $e->{DEF}."\n".$e->{SEQ}."\n"}
1;