#### The file has the following headers for each record###
Exon #
Gene id
Nm_id
snoRNA Key
text Sequence  Query, subject  
Gene name and weblink

                      ##Start the records###
3
GI:91982771
NM_001040105.1  
snoRNA 10
Query  4     TGGAGTCAAT  13
             ||||||||||
Sbjct  4854  TGGAGTCAAT  4845
Homo sapiens mucin 17, cell surface associated (MUC17), mRNA.
http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=nucleotide&dopt=GenBank&RID=UDU305DZ01N&log%24=nuclalign&blast_rank=97&list_uids=91982771
3
GI:154448895
NM_001100162.1  
snoRNA 25, 26 and 27
Query  2    CCTGGAGTCGAGTG  15
            ||||||||||||||
Sbjct  146  CCTGGAGTCGAGTG  133
Homo sapiens exportin 7 (XPO7), transcript variant 3, mRNA.
http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=nucleotide&dopt=GenBank&RID=UDW41RSS01S&log%24=nuclalign&blast_rank=2&list_uids=154448895					
31
4 different hits
GI:153945877
NM_002458.1  
snoRNA 25, 26 and 27
Query  3     CTGGAGTCGAGTG  15
             |||||||||||||
Sbjct  6818  CTGGAGTCGAGTG  6806
Query  3     CTGGAGTCGAGTG  15
             |||||||||||||
Sbjct  8489  CTGGAGTCGAGTG  8477
Query  3      CTGGAGTCGAGTG  15
              |||||||||||||
Sbjct  10589  CTGGAGTCGAGTG  10577
Query  3      CTGGAGTCGAGTG  15
              |||||||||||||
Sbjct  12260  CTGGAGTCGAGTG  12248
Homo sapiens mucin 5B, oligomeric mucus/gel-forming (MUC5B), mRNA.
http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=nucleotide&dopt=GenBank&RID=UDW41RSS01S&log%24=nuclalign&blast_rank=9&list_uids=153945877
4
GI:150418008
NM_206862.2  
snoRNA 25, 26 and 27
Query  1     ACCTGGAGTCGAG  13
             |||||||||||||
Sbjct  4775  ACCTGGAGTCGAG  4763
Homo sapiens transforming, acidic coiled-coil containing protein 2 (TACC2), transcript variant 1, mRNA.
http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=nucleotide&dopt=GenBank&RID=UDW41RSS01S&log%24=nuclalign&blast_rank=10&list_uids=150418008

##</code><code>##

#!/usr/local/bin/perl
use strict;
use warnings;
open (FH,'<',"F:/Bioinformatics_NCBI/20MARCH_10/PERL Analysis/test.txt") or die("$!\n");
open(FO, '>',"F:/Bioinformatics_NCBI/20MARCH_10/PERL Analysis/testOut.txt") or die ("$!\n");   #TESTING
my (@snoRNA, @geneID, @productID, @geneNames, @references,@queries,@subjects);
while(<FH>){
        chomp;
        if(/(?=^\d+$)/../(?=http:.*)\n/){ #range matching
               # s/\W+\n+!\W+//;
               next unless /(\w+ |\| | \n+)/x;  #except for words | pipes | \n
                print FO $_, "\n" ;
        }
        if(/snoRNA(\s+|\d+)[\s|-|\d]/){   #snoRNA
        push @snoRNA, $_;
                }
         if(/^\d+$/){      #exon Numbers
                push @exonNumbers, $_;
                }
                if(/^GI:\d+[\.\d+]/){    #gene Names
                push @geneID , $_;
                }
        if(/^NM_\d+[\.\d+]/){        #gene product ID
                my $name = $_;
                $name =~ s/\s+$//; #substitute the trailing blanks..
                push @productID, $name;
                }
        if(/homo sapiens[\w+\W+]/i){ #gene name, Need MultiLine support..
                my $name = $_;
                push @geneNames, $name;
                }
        if(/http:.*/){          #web refs, need multiline support..
                my $name = $_;
                push @references, $name;
                }
        if(/^Query(\s+)\d+\s+[agtc]/i){       #Prepare the query and subject arrays
                my $queryName = $_;
                $queryName =~ s/$1//;
                push @queries, $queryName;
                }
        if(/^sbjct(\s+)\d+\s+[agtc]/i){
                        my $sbjctName =  $_;
                        $sbjctName =~ s/$1//;
                        push @subjects, $sbjctName;
                        }

        }

##</code><code>##