Hello,
I want to find the sequences in tmp01 which are containing in tmp02, then print all of them out. Tab seperated tmp02 file.
Thanks in advance!
tmp01
ATCCCACCGCTGCCACCA
ACCCTGCTCGCTGCGCCA
TCCCCGGCACCTCCACCA
TCCCCGGCATCTCCACCA
ATCCTGCCGACTACGCCA
TCGATTCCCGGCCCATGCACCA
TCGATTCCCGGCCAACGCACCA
GTCCCACCAGAGTCGCCA
ACCCCACTCCTGGTACCA
GTCCCTTCGTGGTCGCCA
tmp02
AACCCCATCCCACCGCTGCCACCA 1
AACCCCATCCTCGTCGCC 1
AACCCCATGAAATAAGAG 2
AACCCCATGATCAGGACAAG 1
AACCCCATTAAAAAATGG 1
AACTGGATTCTCTGAAATCCCACCGCTGCCACCA 1
AACTGGATTGTCTGTTTGT 1
AACTGGCAAGTTCAGGCATG 1
AACTGGCACACACAACC 1
AACTGGCACACACAACCT 1
open(IN1,"tmp01") || die "Cannot open this file";
@lines1 = <IN1>;
open(IN2,"tmp02") || die "Cannot open this file";
@lines2 = <IN2>;
open(OUT,">tmp03") || die "Cannot open this file";
for $item1(@lines1){
chomp $item1;
#print OUT $item1,"\t";
@tmp1=split(/\t+/, $item1);
for $item2(@lines2){
chomp $item2;
@tmp2=split(/\t+/, $item2);
if ($tmp1[0] =~m/ *$tmp2[0]*){
print OUT $item1,"\t",$item2;
#last;
}
$i++
}
print OUT "\n";
}
close(IN1);
close(IN2);
close(OUT);