#!/usr/bin/perl # http://perlmonks.org/?node_id=1188439 use strict; use warnings; $_ = join('', grep /^[\sACGT]+$/, ) =~ tr/ACGT//cdr; my $count = 0; while( /(?=(.{21}GG))/g and $count < 1000 ) { /(?=$1).+$1/ or print ">crispr_@{[++$count]}\n$1\n"; } __DATA__ > >2L type=chromosome_arm; loc=2L:1..23011544; ID=2L; dbxref=REFSEQ:NT_033779,GB:AE014134; MD5=bfdfb99d39fa5174dae1e2ecd8a231cd; length=23011544; release=r5.54; species=Dmel; CCTCTCATTTTCTCTCCCATATTATAGGGAGAAATATGATCGCGTATGCG AGAGTAGTGCCAACATATTGTGCTCTTTGATTTTTTGGCAACCCAAAATG TAAATTCATTGCAACGTTAAATACAGCACAATATATGATCGCGTATGCGA GAGTAGTGCCAACATATTGTGCTAATGAGTGCCTCTCGTTCTCTGTCTTA TATTACCGCAAACCCAAAAAGACAATACACGACAGAGAGAGAGAGCAGCG GAGATATTTAGATTGCCTATTAAATATGATCGCGTATGCGAGAGTAGTGC CAACATATTGTGCTCTCTATATAATGACTGCCTCTCATTCTGTCTTATTT TACCGCAAACCCAAATCGACAATGCACGACAGAGGAAGCAGAACAGATAT