#!/usr/bin/perl
use strict;
use warnings;
# 'shift' removes items from @ARGV
my $nomefile = shift;
open my $fh, '<', $nomefile or die "Unable to open $nomefile. $!";
print "Insert query:\n";
chomp(my $query = uc <STDIN>);
my $sequenza;
chomp(my $id = <$fh>); # reads first line ('should' be ID)
print "\nResults for $nomefile\n\n";
while (my $line = <$fh>) {
chomp $line;
if (substr($line, 0, 1) eq '>') {
find_matches($id, $sequenza, $query);
$sequenza = '';
$id = $line;
}
else {
$sequenza .= uc $line;
}
find_matches($id, $sequenza, $query) if eof;
}
close $fh or die "Unable to close $nomefile. $!";
sub find_matches {
my ($id, $sequenza, $query) = @_;
my @pos;
while ($sequenza =~ /(?=$query)/g) {
push @pos, $-[0] + 1;
}
if (@pos) {
printf "%s compare %d volte in posizione @pos with ID %s\n",
$query, scalar @pos, $id;
}
else {
print "$query does not appear in the sequence with ID $id\n";
}
}
For 3 runs of the program (using the same data file I used in my first post).
C:\Old_Data\perlp>perl t33.pl fasta.txt
Insert query:
ttag
Results for fasta.txt
TTAG compare 1 volte in posizione 108 with ID >chr1
TTAG does not appear in the sequence with ID >chrM
C:\Old_Data\perlp>perl t33.pl fasta.txt
Insert query:
aaaaa
Results for fasta.txt
AAAAA compare 3 volte in posizione 59 130 131 with ID >chr1
AAAAA does not appear in the sequence with ID >chrM
C:\Old_Data\perlp>perl t33.pl fasta.txt
Insert query:
tagcgat
Results for fasta.txt
TAGCGAT does not appear in the sequence with ID >chr1
TAGCGAT does not appear in the sequence with ID >chrM
 
And it seems that the position of the oligos found in the sequence are shifted by one, i know that we count from zero but i can't figure out how to change it
This code push @pos, $-[0] + 1; will give the position as though counting from 1 (instead of 0). It adds 1 to the offset of the beginning of the pattern match.
The @- special variable can be found here Variables related to regular expressions. Scroll down to •@LAST_MATCH_START.
It says $-[0] is the offset of the start of the last successful match. $-[n] is the offset of the start of the substring matched by n-th subpattern, or undef if the subpattern did not match. |