> >2L type=chromosome_arm; loc=2L:1..23011544; ID=2L; dbxref=REFSEQ:NT_033779,GB:AE014134; MD5=bfdfb99d39fa5174dae1e2ecd8a231cd; length=23011544; release=r5.54; species=Dmel;
CGACAATGCACGACAGAGGAAGCAGAACAGATATTTAGATTGCCTCTCAT
TTTCTCTCCCATATTATAGGGAGAAATATGATCGCGTATGCGAGAGTAGT
GCCAACATATTGTGCTCTTTGATTTTTTGGCAACCCAAAATGGTGGCGGA
TGAACGAGATGATAATATATTCAAGTTGCCGCTAATCAGAAATAAATTCA
TTGCAACGTTAAATACAGCACAATATATGATCGCGTATGCGAGAGTAGTG
CCAACATATTGTGCTAATGAGTGCCTCTCGTTCTCTGTCTTATATTACCG
CAAACCCAAAAAGACAATACACGACAGAGAGAGAGAGCAGCGGAGATATT
TAGATTGCCTATTAAATATGATCGCGTATGCGAGAGTAGTGCCAACATAT
TGTGCTCTCTATATAATGACTGCCTCTCATTCTGTCTTATTTTACCGCAA
ACCCAAATCGACAATGCACGACAGAGGAAGCAGAACAGATATTTAGATTG
CCTCTCATTTTCTCTCCCATATTATAGGGAGAAATATGATCGCGTATGCG
AGAGTAGTGCCAACATATTGTGCTCTTTGATTTTTTGGCAACCCAAAATG
GTGGCGGATGAACGAGATGATAATATATTCAAGTTGCCGCTAATCAGAAA
TAAATTCATTGCAACGTTAAATACAGCACAATATATGATCGCGTATGCGA
GAGTAGTGCCAACATATTGTGCTAATGAGTGCCTCTCGTTCTCTGTCTTA
TATTACCGCAAACCCAAAAAGACAATACACGACAGAGAGAGAGAGCAGCG
GAGATATTTAGATTGCCTATTAAATATGATCGCGTATGCGAGAGTAGTGC
CAACATATTGTGCTCTCTATATAATGACTGCCTCTCATTCTGTCTTATTT
TACCGCAAACCCAAATCGACAATGCACGACAGAGGAAGCAGAACAGATAT
####
>crispr_1
GGGTGGAGCTCCCGAAATGCAGG
>crispr_2
TTAATAAATATTGACACAGCGGG
>crispr_3
ATCGTGGGGCGTTTTGTGAAAGG
>crispr_4
AGTTTTTCACATAATCAGACAGG
>crispr_5
GTGTTGGATGAGTGTCCTCTGGG
>crispr_6
ATAGGTTGGTTGTTTTAAAAGGG
>crispr_7
AAATTTTTGTTGCCACTGAATGG
>crispr_8
AAGTTTCGAACTACGATGGTTGG
>crispr_9
CATGCTTTGTGGAAATAAGTCGG
>crispr_10
CACAGTGGGTGTTTGCACCTCGG
.....and so on
####
#!/usr/bin/perl
use warnings;
use strict;
my %windowSeqCount = ();
my $sequenceRef = loadSequence("input.fasta");
#writing to a new file
open (UNIQUEKMERS,">",output.fasta') or die $!;
my $windowSize = 23;
my $stepSize = 1;
for(
my $windowStart = 0 ;
$windowStart <= ( length($$sequenceRef) - $windowSize );
$windowStart += $stepSize
)
{
my $windowSeq = substr ( $$sequenceRef, $windowStart, $windowSize);
$windowSeqCount{$windowSeq}++ if $windowSeq =~ /GG$/;
}
for (keys %windowSeqCount){
print UNIQUEKMERS $_, "\t", $windowSeqCount{$_}, "\n";
}
sub loadSequence {
my ($sequenceFile) = @_;
my $sequence = "";
unless ( open( FASTA, "<", $sequenceFile ) ) {
die $!;
}
while (){
my $line = $_;
chomp ($line);
if ($line !~ /^>/ ) {
$sequence .= $line;
}
}
return \$sequence;
}