#! usr/bin/perl -w
use warnings;
use strict;
use Bio::Seq;
use Bio::SeqIO;
#print "What is the filepath of the input file?\n";
my $file = $ARGV[0];
#print "What is the name of the output file?\n";
my $output = $ARGV[1];
my $trash = $ARGV[2];
my $seq_in = Bio::SeqIO->new(-file => $file, -format => "fasta");
open OUTPUT, ">$output";
open TRASH, ">$trash";
my $temp_seq_counter = 0;
my $match_counter = 0;
my $seq;
my $tag_id;
my $position;
my $tag_name;
while(my $seq_obj = $seq_in->next_seq){
$temp_seq_counter += 1;
my $temp_seq = $seq_obj->seq;
my $temp_seq_name = $seq_obj->id;
my $tag_file = Bio::SeqIO->new(-file => " "fasta");
while(my $tag_obj = $tag_file->next_seq){
my $tag = $tag_obj->seq;
print "tag = ",$tag,"\n";
my $RC_tag = reverse $tag =~ tr/ACTGactg/TGACtgac/;
if($temp_seq =~ m/$tag/g || $temp_seq =~ m/$RC_tag/g){
$position = pos($temp_seq);
my $length = length($temp_seq);
$seq = $seq_obj->subseq($position,$length);
$tag_name = $tag_obj->id;
$match_counter = 1;
}
}
if($match_counter == 1){
print OUTPUT ">",$temp_seq_name," Tag: ",$tag_name," ending at ",$position,"\n",$seq,"\n";
}
else{
print TRASH ">",$temp_seq_name," ",$seq_obj->desc,"\n",$temp_seq,"\n";
}
$match_counter = 0;
}
print $temp_seq_counter," seqeuences were tested and ",$match_counter," seqeuences have a tag.\n";
close OUTPUT;
close TRASH;
####
>GJVIMO101AUT0H length=45 xy=0234_0223 region=1 run=R_2010_07_01_11_09_50_
ACGACACGTATACGTGCGTGTCGCGTCTCTCAGCACACAGAGTAG
>GJVIMO101ANKZK length=45 xy=0151_1902 region=1 run=R_2010_07_01_11_09_50_
ACGACACGTATCGCGCGCGNGCGCGCGCGCGCGCGCGCGCGCGCG
>GJVIMO101AOIE9 length=41 xy=0162_0179 region=1 run=R_2010_07_01_11_09_50_
ACGACACGTATCTCATTGTGCTCAAGGCCTGAGCACAATGA
>GJVIMO101ALCLG length=100 xy=0126_0114 region=1 run=R_2010_07_01_11_09_50_
ACGACACGTATGCTGCTGGTGCTGCTGTAACAGTTCCTGCTGATGCTGCAAGTGCTGCTG
CTGTAACTGTTGCTGCTGTAATCTCTGCTGCTGCTGCTGT
####
ACGACACGTAT