#! usr/bin/perl -w use warnings; use strict; use Bio::Seq; use Bio::SeqIO; #print "What is the filepath of the input file?\n"; my $file = $ARGV[0]; #print "What is the name of the output file?\n"; my $output = $ARGV[1]; my $trash = $ARGV[2]; my $seq_in = Bio::SeqIO->new(-file => $file, -format => "fasta"); open OUTPUT, ">$output"; open TRASH, ">$trash"; my $temp_seq_counter = 0; my $match_counter = 0; my $seq; my $tag_id; my $position; my $tag_name; while(my $seq_obj = $seq_in->next_seq){ $temp_seq_counter += 1; my $temp_seq = $seq_obj->seq; my $temp_seq_name = $seq_obj->id; my $tag_file = Bio::SeqIO->new(-file => " "fasta"); while(my $tag_obj = $tag_file->next_seq){ my $tag = $tag_obj->seq; print "tag = ",$tag,"\n"; my $RC_tag = reverse $tag =~ tr/ACTGactg/TGACtgac/; if($temp_seq =~ m/$tag/g || $temp_seq =~ m/$RC_tag/g){ $position = pos($temp_seq); my $length = length($temp_seq); $seq = $seq_obj->subseq($position,$length); $tag_name = $tag_obj->id; $match_counter = 1; } } if($match_counter == 1){ print OUTPUT ">",$temp_seq_name," Tag: ",$tag_name," ending at ",$position,"\n",$seq,"\n"; } else{ print TRASH ">",$temp_seq_name," ",$seq_obj->desc,"\n",$temp_seq,"\n"; } $match_counter = 0; } print $temp_seq_counter," seqeuences were tested and ",$match_counter," seqeuences have a tag.\n"; close OUTPUT; close TRASH;