#### # This subroutine takes a file containing any number of FASTA sequences, and # by reading the sequences in a line at a time, searches all the sequences for # matches to binding sites entered. The header line of the FASTA sequence is # set to a key in a hash, and each site searched for is set to a key in a sub hash # for each Fasta ID. The position of the 1st base of the matches is set to the # values of each subhash. Only the position of the matches is # saved, no actual text. Any formatting or output of the matches needs to be # done elsewhere. #### sub SEARCHFASTA { my $site = 0; my $fasta_id = shift; my $position = 0; my $buffer = ''; my %matches = (); open(DNA,"<", "$filename") or croak("Cannot open $filename:$!\n"); seek(DNA,$index_hash{$fasta_id},0); GETSLINE: while($newline = ) { # commented out debugging statement: print "\$newline is: $newline"; # If new fasta header or blank line, if($newline =~ /(^>)|(^$)/) { chomp $newline; if ($newline eq $fasta_id) { print "Currently searching sequence:\n $newline\n"; unless ($buffer eq '') { $i = 0; # commented out debugging statement: print "in unless buffer loop\n"; while ($i < $num) { while($buffer =~ /$pattern[$i]/gxsi) { # commented out debugging statement: print " match is $1\n"; push @{$matches{$i}}, $position + $-[0]; } $i++; } $buffer = ''; $position = 0; } } # if the newline is not the fasta_id, then we finished searching it, and # need to exit the loop structure, and close the file. if ($newline ne $fasta_id) { last GETSLINE; } } chomp $newline; $buffer .= $newline; if (length($buffer) < $span ) { next GETSLINE; } # Search for the DNA pattern $i = 0; while ($i < $num) { while($buffer =~ /$pattern[$i]/gsxi) { push @{$matches{$i}}, $position + $-[0]; } $i++; } # Reset the position counter # (will be accurate after you reset the buffer, next) $position += (length($buffer) - $lg[0]+1); # Reset the buffer # Discard the first $max worth of data in the buffer $buffer = substr($buffer, (length($buffer) - $lg[0] + 1)); } # This erases some of the larger variables from the search routine $buffer = ''; $position = 0; for my $tfbs (keys %matches) { foreach (@{$matches{$tfbs}}) { $_ -= length($fasta_id); } } close(DNA) or croak("Cannot close $filename:$!\n"); return %matches; }