use strict; my @bases; my @list; my @List; my %oligo = (); my %pos = (); my $base; my $list; my $oligo; my $f; my $nomefile; my $line; my $piece; my $query; my $len; my $sequenza; my $i; my $rev; my $revcomp; my $length; print("Insert query:\n"); $query = ; chomp($query); $len = length($query); $query = uc $query; @bases = qw(A G C T); #bases @list = qw(A G C T); #seed for ($i = 0; $i < ($len - 1); $i++) { foreach $base (@bases) { foreach $list (@list) { $oligo{$base.$list} = 1; } @List = keys %oligo; } @list = keys %oligo; %oligo = (); #print "@list xxx \n"; } %oligo = (); @List = sort @List; $nomefile = $ARGV[0]; open $f, "< $nomefile" or die "cannot open $nomefile: $!"; $line = <$f>; chomp($line); if(substr($line,0,1) ne ">") { print STDERR ("Input file $nomefile not in FASTA format\n"); exit; } $sequenza = <$f>; while($line = <$f>) { chomp($line); if(substr($line, 0, 1) ne ">") { $sequenza = $sequenza.$line; } } $sequenza = uc $sequenza; for($i = 0; $i < length($sequenza) - ($len - 1); $i++) { $piece = substr($sequenza, $i, $len); if(exists $oligo{$piece}) { $oligo{$piece} = $oligo{$piece} + 1; push(@{$pos{$piece}} , $i ); } else { $oligo{$piece} = 1; push(@{$pos{$piece}} , $i ); } } foreach $piece (sort keys %oligo) { print("$piece\t$oligo{$piece}\t@{$pos{$piece}}\n\n"); } $rev = reverse $query; $rev =~ tr/ATCG/TAGC/; $revcomp = $rev; if(exists $oligo{$query} || exists $oligo{$revcomp}) { print("$query\t appear $oligo{$query} times\t in position @{$pos{$query}} on positive strand\n"); } else { print("$revcomp\t appear $oligo{$revcomp} times\t in position ($length - @{$pos{$query}}) on negative strand\n"); }