#!/usr/bin/perl -w use strict; ## set minimum length for polytail to search for my $polytail_min_len = 10; my $polytail_curr_len = $polytail_min_len; my $workline; my $polya_str_base; ## read data record for () { ## remove exteranious characters s/[\r\n]//g; # add new data to end of working data string $workline = $workline . $_; while (length($workline) > $polytail_curr_len) { $polya_str_base = substr($workline,0,$polytail_curr_len); ## remove desired characters from string $polya_str_base =~ s/[AN]//g; ## no characters left = all characters were in desired character set if (length($polya_str_base) == 0) { ## add a character from data set to string to test (ok - bump subscript) $polytail_curr_len++; } else { ## a polytail of at least minimum length was found if ($polytail_curr_len > $polytail_min_len) { print substr($workline,0,$polytail_curr_len-1) . "\n"; ## trim characters of found polytail from working string $workline = substr($workline,$polytail_curr_len); ## reset length of string to test $polytail_curr_len = $polytail_min_len; } ## trim lead character from working field $workline = substr($workline,1); } } } __DATA__ 123456789abcdefghijklmnopqrstuvwxyz ACGGAAAAAAAAAAATCGGATCTGAATGTCTAGAGGGGTTCTCTCCCTTGGTGTGAGTCTAGCCCTGAAAGTTGCANANAN NANANAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NTCCCACTCTANCGCGAAGCCAAATTTGTCGAGAGTACTCTGGGGGGAAGAGATCAGAATTGTGCAGACTAATCCGTAACTGC CAAGTACTATTGGCCCTGTTCCAACCATCTAACCTCCTTATGATAACCATGCCACTAAATGGGTTCCTGGATCTGCACCT CATTCGCTCGCCTTATGGCCTCGGCTCTCTGCGTATCCACCCTCCTCGTCACCGCCATGCCCTTCGACCTTCAGCGGGGG