#!/usr/bin/perl
use strict;
use warnings;
my $sequence = '';
my $filename = "NM_014143.3.fasta";
my @name = split( /\./, $filename );
my $name = $name[0];
my $infile;
my $outfile;my $out;my $reject; my @missing;
open( $infile, "<", $filename ) || die "Check the $filename $!\n";
while ( my $line = <$infile> )
{
chomp $line;
if ( $line =~ /^>/ ) { next; }
elsif ( $line =~ /^\s*$/ ) { next; }
elsif ( $line =~ /^\s*#/ ) { next; }
else { $sequence .= $line; }
}
$sequence =~ s/\n//g;
$sequence =~ s/\s+//g;
#print "$sequence\n";
close ($infile);
my @seq = (1 .. 15, 30 .. 40, 50 .. 60);
for (my $pos = 0; $pos <= length($sequence); $pos++){
foreach my $ran (@seq){
my $frag = substr($sequence, $pos, $ran);
print "$frag\n";
}
}
####
Data file:
>gi|292658763|ref|NM_014143.3| Homo sapiens CD274 molecule (CD274), transcript variant 1, mRNA
GGCGCAACGCTGAGCAGCTGGCGCGTCCCGCGCGGCCCCAGTTCTGCGCAGCTTCCCGAGGCTCCGCACC
AGCCGCGCTTCTGTCCGCCTGCAGGGCATTCCAGAAAGATGAGGATATTTGCTGTCTTTATATTCATGAC
CTACTGGCATTTGCTGAACGCATTTACTGTCACGGTTCCCAAGGACCTATATGTGGTAGAGTATGGTAGC
AATATGACAATTGAATGCAAATTCCCAGTAGAAAAACAATTAGACCTGGCTGCACTAATTGTCTATTGGG
AAATGGAGGATAAGAACATTATTCAATTTGTGCATGGAGAGGAAGACCTGAAGGTTCAGCATAGTAGCTA
CAGACAGAGGGCCCGGCTGTTGAAGGACCAGCTCTCCCTGGGAAATGCTGCACTTCAGATCACAGATGTG
AAATTGCAGGATGCAGGGGTGTACCGCTGCATGATCAGCTATGGTGGTGCCGACTACAAGCGAATTACTG
TGAAAGTCAATGCCCCATACAACAAAATCAACCAAAGAATTTTGGTTGTGGATCCAGTCACCTCTGAACA
TGAACTGACATGTCAGGCTGAGGGCTACCCCAAGGCCGAAGTCATCTGGACAAGCAGTGACCATCAAGTC
CTGAGTGGTAAGACCACCACCACCAATTCCAAGAGAGAGGAGAAGCTTTTCAATGTGACCAGCACACTGA
GAATCAACACAACAACTAATGAGATTTTCTACTGCACTTTTAGGAGATTAGATCCTGAGGAAAACCATAC
####
Desired output:
GGCGCAACGCTGAGCAGCTGGCGCGTCCCGCGCGGCCCCAGTTCTGCGCAGCTTCCCGAGGCTCCGCACC
AGCCGCGCTTCTGTCCGCCTGCAGGGCATTCCAGAAAGATGAGGATATTTGCTGTCTTTATATTCATGAC
CTACTGGCATTTGCTGAACGCATTTACTGTCACGGTTCCCAAGGACCTATATGTGGTAGAGTATGGTAGC
AATATGACAATTGAATGCAAATTCCCAGTAGAAAAACAATTAGACCTGGCTGCACTAATTGTCTATTGGG
AAATGGAGGATAAGAACATTATTCAATTTGTGCATGGAGAGGAAGACCTGAAGGTTCAGCATAGTAGCTA
CAGACAGAGGGCCCGGCTGTTGAAGGACCAGCTCTCCCTGGGAAATGCTGCACTTCAGATCACAGATGTG
AAATTGCAGGATGCAGGGGTGTACCGCTGCATGATCAGCTATGGTGGTGCCGACTACAAGCGAATTACTG
TGAAAGTCAATGCCCCATACAACAAAATCAACCAAAGAATTTTGGTTGTGGATCCAGTCACCTCTGAACA
TGAACTGACATGTCAGGCTGAGGGCTACCCCAAGGCCGAAGTCATCTGGACAAGCAGTGACCATCAAGTC
CTGAGTGGTAAGACCACCACCACCAATTCCAAGAGAGAGGAGAAGCTTTTCAATGTGACCAGCACACTGA
GAATCAACACAACAACTAATGAGATTTTCTACTGCACTTTTAGGAGATTAGATCCTGAGGAAAACCATAC