twaddlac has asked for the wisdom of the Perl Monks concerning the following question:
I am having a little trouble getting the match regex to work for whatever reason. I have no idea how this isn't working and my head is spinning about it. If you can, could you look at my code and see if you find anything strange about it? Thank you very much!
Sample input:#! usr/bin/perl -w use warnings; use strict; use Bio::Seq; use Bio::SeqIO; #print "What is the filepath of the input file?\n"; my $file = $ARGV[0]; #print "What is the name of the output file?\n"; my $output = $ARGV[1]; my $trash = $ARGV[2]; my $seq_in = Bio::SeqIO->new(-file => $file, -format => "fasta"); open OUTPUT, ">$output"; open TRASH, ">$trash"; my $temp_seq_counter = 0; my $match_counter = 0; my $seq; my $tag_id; my $position; my $tag_name; while(my $seq_obj = $seq_in->next_seq){ $temp_seq_counter += 1; my $temp_seq = $seq_obj->seq; my $temp_seq_name = $seq_obj->id; my $tag_file = Bio::SeqIO->new(-file => "</home/Alan/Desktop/seque +nce_data/INFLUENZA_01_07_2010/MIDS.fasta", -format => "fasta"); while(my $tag_obj = $tag_file->next_seq){ my $tag = $tag_obj->seq; print "tag = ",$tag,"\n"; my $RC_tag = reverse $tag =~ tr/ACTGactg/TGACtgac/; if($temp_seq =~ m/$tag/g || $temp_seq =~ m/$RC_tag/g){ $position = pos($temp_seq); my $length = length($temp_seq); $seq = $seq_obj->subseq($position,$length); $tag_name = $tag_obj->id; $match_counter = 1; } } if($match_counter == 1){ print OUTPUT ">",$temp_seq_name," Tag: ",$tag_name," ending at + ",$position,"\n",$seq,"\n"; } else{ print TRASH ">",$temp_seq_name," ",$seq_obj->desc,"\n",$temp_s +eq,"\n"; } $match_counter = 0; } print $temp_seq_counter," seqeuences were tested and ",$match_counter, +" seqeuences have a tag.\n"; close OUTPUT; close TRASH;
Pattern to match:>GJVIMO101AUT0H length=45 xy=0234_0223 region=1 run=R_2010_07_01_11_09 +_50_ ACGACACGTATACGTGCGTGTCGCGTCTCTCAGCACACAGAGTAG >GJVIMO101ANKZK length=45 xy=0151_1902 region=1 run=R_2010_07_01_11_09 +_50_ ACGACACGTATCGCGCGCGNGCGCGCGCGCGCGCGCGCGCGCGCG >GJVIMO101AOIE9 length=41 xy=0162_0179 region=1 run=R_2010_07_01_11_09 +_50_ ACGACACGTATCTCATTGTGCTCAAGGCCTGAGCACAATGA >GJVIMO101ALCLG length=100 xy=0126_0114 region=1 run=R_2010_07_01_11_0 +9_50_ ACGACACGTATGCTGCTGGTGCTGCTGTAACAGTTCCTGCTGATGCTGCAAGTGCTGCTG CTGTAACTGTTGCTGCTGTAATCTCTGCTGCTGCTGCTGT
ACGACACGTAT
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Regex Match Problem
by umasuresh (Hermit) on Aug 24, 2010 at 17:55 UTC | |
|
Re: Regex Match Problem
by mwah (Hermit) on Aug 24, 2010 at 19:07 UTC |