I admit, I didn't think that that was legal. But anyway, the fix is quite trivial and has no affect upon performance:
#! perl -slw use strict; use Data::Dump qw[ pp ]; my %sequences; local $/ = '>'; (undef) = scalar <DATA>; ## Discard first delimiter local $/ = "\n>"; while( my $record = <DATA> ) { my @lines = split "\n", $record; pop @lines if $lines[-1] eq '>'; my $id = shift @lines; $sequences{ $id } = join'', @lines; } pp \%sequences; __DATA__ >uc002yje.1 > chr21:13973492-13974491 cccctgccccaccgcaccctggattactgcacgccaagaccctcacctga acgcgccctacactctggcatgggggaacccggccccgcagagccctgga CTCTGACATTGGAGGACTCCTCGGCTACGTCCTGGACTCCTGCACAAGAG >uc002yje.2 > chr21:13974492-13975432 cccctgccccaccgcaccctggattactgcacgccaagaccctcacctga acgcgccctacactctggcatgggggaaaaaacccggccccgcagagccctgga CTCTGACATTGGAGGACTCCTCGGCTACGTCCTGGACTCCTGCACAAGAG >uc002yje.3 > chr21:13975431-13976330 cccctgccccaccgcaccctggattactgcacgccaagaccctcacctga acgcgccctacactctggcatgggggaacccggccccgcagagggccctgga CTCTGACATTGGAGGACTCCTCGGCTACGTCCTGGACTCCTGCACAAGAG
Produces:
C:\test>fasta { "uc002yje.1 > chr21:13973492-13974491" => "cccctgccccaccgcaccctggatt +actgcacgccaagaccctcacctgaacgcgccctacactctggcatgggggaacccggccccgcagagc +cctggaCTCTGACATTGGAGGACTCCTCGGCTACGTCCTGGACTCCTGCACAAGAG", "uc002yje.2 > chr21:13974492-13975432" => "cccctgccccaccgcaccctggatt +actgcacgccaagaccctcacctgaacgcgccctacactctggcatgggggaaaaaacccggccccgca +gagccctggaCTCTGACATTGGAGGACTCCTCGGCTACGTCCTGGACTCCTGCACAAGAG", "uc002yje.3 > chr21:13975431-13976330" => "cccctgccccaccgcaccctggatt +actgcacgccaagaccctcacctgaacgcgccctacactctggcatgggggaacccggccccgcagagg +gccctggaCTCTGACATTGGAGGACTCCTCGGCTACGTCCTGGACTCCTGCACAAGAG", }
In reply to Re^3: Bioinformatics: Slow Parsing of a Fasta File
by BrowserUk
in thread Bioinformatics: Slow Parsing of a Fasta File
by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |