Okay. You want the snoRNA text used as the primary key. Output like this?
C:\test>junk55 { "snoRNA 10" => [ { Nm_id => "NM_001040105", exon => 3, gene_id => 91982771, gene_name => "Homo sapiens mucin 17, cell surface associated + (MUC17), mRNA.", seqs => [ { query => [4, "TGGAGTCAAT", 13], sbjct => [4 +854, "TGGAGTCAAT", 4845] }, ], web_link => "http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=R +etrieve&db=nucleotide&dopt=GenBank&RID=UDU305DZ01N&log%24=nuclalign&b +last_rank=97&list_uids=91982771", }, ], "snoRNA 25, 26 and 27" => [ { Nm_id => "NM_001100162", exon => 3, gene_id => 154448895, gene_name => "Homo sapiens exportin 7 (XPO7), transcript var +iant 3, mRNA.", seqs => [ { query => [2, "CCTGGAGTCGAGTG", 15], sbjct => [146, "CCTGGAGTCGAGTG", 133], }, ], web_link => "http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=R +etrieve&db=nucleotide&dopt=GenBank&RID=UDW41RSS01S&log%24=nuclalign&b +last_rank=2&list_uids=154448895", }, { Nm_id => "NM_002458", exon => 31, gene_id => 153945877, gene_name => "Homo sapiens mucin 5B, oligomeric mucus/gel-fo +rming (MUC5B), mRNA.", seqs => [ { query => [3, "CTGGAGTCGAGTG", 15], sbjct => [6818, "CTGGAGTCGAGTG", 6806], }, { query => [3, "CTGGAGTCGAGTG", 15], sbjct => [8489, "CTGGAGTCGAGTG", 8477], }, { query => [3, "CTGGAGTCGAGTG", 15], sbjct => [10589, "CTGGAGTCGAGTG", 10577], }, { query => [3, "CTGGAGTCGAGTG", 15], sbjct => [12260, "CTGGAGTCGAGTG", 12248], }, ], web_link => "http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=R +etrieve&db=nucleotide&dopt=GenBank&RID=UDW41RSS01S&log%24=nuclalign&b +last_rank=9&list_uids=153945877", }, { Nm_id => "NM_206862", exon => 4, gene_id => 150418008, gene_name => "Homo sapiens transforming, acidic coiled-coil +containing protein 2 (TACC2), transcript variant 1, mRNA.", seqs => [ { query => [1, "ACCTGGAGTCGAG", 13], sbjct => [4775, "ACCTGGAGTCGAG", 4763], }, ], web_link => "http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=R +etrieve&db=nucleotide&dopt=GenBank&RID=UDW41RSS01S&log%24=nuclalign&b +last_rank=10&list_uids=150418008", }, ], }
If so, then the changes required from Re: split a file into records and process it are minimal:
#! perl -slw use strict; use Data::Dump qw[ pp ]; my %records; until( eof( DATA ) ) { my %record; ## put the exon number inside the record ( $record{ exon } ) = ( <DATA> =~ m[(\d+)] ); my $seqs = 1; my $line = <DATA>; if( $line =~ m[(\d+) different hits] ) { $seqs = $1; $line = <DATA>; } ( $record{ gene_id } ) = ( $line =~ m[GI:(\d+)] ); ( $record{ Nm_id } ) = ( <DATA> =~ m[(NM_\d[\d]+)] ); ## save the snoRNA text... chomp( my $snoRNA_key = <DATA> ); for( 1 .. $seqs ) { my $query = [ split ' ', <DATA> ]; shift @$query; scalar (<DATA>); my $sbjct = [ split ' ', <DATA> ]; shift @$sbjct; push @{ $record{ seqs } }, { query => $query, sbjct => $sbjct +}; } chomp( $record{ gene_name } = <DATA> ); chomp( $record{ web_link } = <DATA> ); ## And use it as the primary key in the main hash push @{ $records{ $snoRNA_key } }, \%record; } pp \%records; __DATA__
In reply to Re^5: split a file into records and process it
by BrowserUk
in thread split a file into records and process it
by biohisham
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |