#!/usr/local/bin/perl use strict; use warnings; open (FH,'<',"F:/Bioinformatics_NCBI/20MARCH_10/PERL Analysis/test.txt") or die("$!\n"); open(FO, '>',"F:/Bioinformatics_NCBI/20MARCH_10/PERL Analysis/testOut.txt") or die ("$!\n"); #TESTING my (@snoRNA, @geneID, @productID, @geneNames, @references,@queries,@subjects); while(){ chomp; if(/(?=^\d+$)/../(?=http:.*)\n/){ #range matching # s/\W+\n+!\W+//; next unless /(\w+ |\| | \n+)/x; #except for words | pipes | \n print FO $_, "\n" ; } if(/snoRNA(\s+|\d+)[\s|-|\d]/){ #snoRNA push @snoRNA, $_; } if(/^\d+$/){ #exon Numbers push @exonNumbers, $_; } if(/^GI:\d+[\.\d+]/){ #gene Names push @geneID , $_; } if(/^NM_\d+[\.\d+]/){ #gene product ID my $name = $_; $name =~ s/\s+$//; #substitute the trailing blanks.. push @productID, $name; } if(/homo sapiens[\w+\W+]/i){ #gene name, Need MultiLine support.. my $name = $_; push @geneNames, $name; } if(/http:.*/){ #web refs, need multiline support.. my $name = $_; push @references, $name; } if(/^Query(\s+)\d+\s+[agtc]/i){ #Prepare the query and subject arrays my $queryName = $_; $queryName =~ s/$1//; push @queries, $queryName; } if(/^sbjct(\s+)\d+\s+[agtc]/i){ my $sbjctName = $_; $sbjctName =~ s/$1//; push @subjects, $sbjctName; } }