#! perl -slw use strict; use Data::Dump qw[ pp ]; my %records; until( eof( DATA ) ) { chomp( my $exon = ); push @{ $records{ $exon } }, {}; my $seqs = 1; my $line = ; if( $line =~ m[(\d+) different hits] ) { $seqs = $1; chomp( $records{ $exon }[ -1 ]{ gene_id } = ); } else { chomp( $records{ $exon }[ -1 ]{ gene_id } = $line ); } chomp( $records{ $exon }[ -1 ]{ Nm_id } = ); chomp( $records{ $exon }[ -1 ]{ snoRNA_key } = ); for( 1 .. $seqs ) { chomp( my $query = ); scalar (); chomp( my $sbjct = ); push @{ $records{ $exon }[ -1 ]{ seqs } }, { $query => $sbjct }; } chomp( $records{ $exon }[ -1 ]{ gene_name } = ); chomp( $records{ $exon }[ -1 ]{ web_link } = ); } pp \%records; __DATA__ 3 GI:91982771 NM_001040105.1 snoRNA 10 Query 4 TGGAGTCAAT 13 |||||||||| Sbjct 4854 TGGAGTCAAT 4845 Homo sapiens mucin 17, cell surface associated (MUC17), mRNA. http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=nucleotide&dopt=GenBank&RID=UDU305DZ01N&log%24=nuclalign&blast_rank=97&list_uids=91982771 3 GI:154448895 NM_001100162.1 snoRNA 25, 26 and 27 Query 2 CCTGGAGTCGAGTG 15 |||||||||||||| Sbjct 146 CCTGGAGTCGAGTG 133 Homo sapiens exportin 7 (XPO7), transcript variant 3, mRNA. http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=nucleotide&dopt=GenBank&RID=UDW41RSS01S&log%24=nuclalign&blast_rank=2&list_uids=154448895 31 4 different hits GI:153945877 NM_002458.1 snoRNA 25, 26 and 27 Query 3 CTGGAGTCGAGTG 15 ||||||||||||| Sbjct 6818 CTGGAGTCGAGTG 6806 Query 3 CTGGAGTCGAGTG 15 ||||||||||||| Sbjct 8489 CTGGAGTCGAGTG 8477 Query 3 CTGGAGTCGAGTG 15 ||||||||||||| Sbjct 10589 CTGGAGTCGAGTG 10577 Query 3 CTGGAGTCGAGTG 15 ||||||||||||| Sbjct 12260 CTGGAGTCGAGTG 12248 Homo sapiens mucin 5B, oligomeric mucus/gel-forming (MUC5B), mRNA. http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=nucleotide&dopt=GenBank&RID=UDW41RSS01S&log%24=nuclalign&blast_rank=9&list_uids=153945877 4 GI:150418008 NM_206862.2 snoRNA 25, 26 and 27 Query 1 ACCTGGAGTCGAG 13 ||||||||||||| Sbjct 4775 ACCTGGAGTCGAG 4763 Homo sapiens transforming, acidic coiled-coil containing protein 2 (TACC2), transcript variant 1, mRNA. http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=nucleotide&dopt=GenBank&RID=UDW41RSS01S&log%24=nuclalign&blast_rank=10&list_uids=150418008 #### C:\test>junk55 { 3 => [ { Nm_id => "NM_001040105.1", gene_id => "GI:91982771", gene_name => "Homo sapiens mucin 17, cell surface associated (MUC17), mRNA.", seqs => [ { "Query 4 TGGAGTCAAT 13" => "Sbjct 4854 TGGAGTCAAT 4845", }, ], snoRNA_key => "snoRNA 10", web_link => "http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=nucleotide&dopt=GenBank&RID=UDU305DZ01N&log%24=nuclalign&blast_rank=97&list_uids=91982771", }, { Nm_id => "NM_001100162.1", gene_id => "GI:154448895", gene_name => "Homo sapiens exportin 7 (XPO7), transcript variant 3, mRNA.", seqs => [ { "Query 2 CCTGGAGTCGAGTG 15" => "Sbjct 146 CCTGGAGTCGAGTG 133", }, ], snoRNA_key => "snoRNA 25, 26 and 27", web_link => "http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=nucleotide&dopt=GenBank&RID=UDW41RSS01S&log%24=nuclalign&blast_rank=2&list_uids=154448895", }, ], 4 => [ { Nm_id => "NM_206862.2", gene_id => "GI:150418008", gene_name => "Homo sapiens transforming, acidic coiled-coil containing protein 2 (TACC2), transcript variant 1, mRNA.", seqs => [ { "Query 1 ACCTGGAGTCGAG 13" => "Sbjct 4775 ACCTGGAGTCGAG 4763", }, ], snoRNA_key => "snoRNA 25, 26 and 27", web_link => "http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=nucleotide&dopt=GenBank&RID=UDW41RSS01S&log%24=nuclalign&blast_rank=10&list_uids=150418008", }, ], 31 => [ { Nm_id => "NM_002458.1", gene_id => "GI:153945877", gene_name => "Homo sapiens mucin 5B, oligomeric mucus/gel-forming (MUC5B), mRNA.", seqs => [ { "Query 3 CTGGAGTCGAGTG 15" => "Sbjct 6818 CTGGAGTCGAGTG 6806", }, { "Query 3 CTGGAGTCGAGTG 15" => "Sbjct 8489 CTGGAGTCGAGTG 8477", }, { "Query 3 CTGGAGTCGAGTG 15" => "Sbjct 10589 CTGGAGTCGAGTG 10577", }, { "Query 3 CTGGAGTCGAGTG 15" => "Sbjct 12260 CTGGAGTCGAGTG 12248", }, ], snoRNA_key => "snoRNA 25, 26 and 27", web_link => "http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=nucleotide&dopt=GenBank&RID=UDW41RSS01S&log%24=nuclalign&blast_rank=9&list_uids=153945877", }, ], }