in reply to Bioinformatic task

Does the DNA occupy multiple lines between the single header lines? (Ie. Is this a standard FASTA format file?)

If so, this might work for you:

#! perl -slw use strict; use Data::Dump qw[ pp ]; $Data::Dump::WIDTH = 80; local $/ = '>'; my %sequences; (undef) = scalar <DATA>; ## Discard first delimiter while( my $record = <DATA> ) { my @lines = split "\n", $record; pop @lines if $lines[-1] eq '>'; my $id = shift @lines; $sequences{ $id } = join'', @lines; } pp \%sequences; __DATA__ >uc002yje.1 chr21:13973492-13974491 cccctgccccaccgcaccctggattactgcacgccaagaccctcacctga acgcgccctacactctggcatgggggaacccggccccgcagagccctgga CTCTGACATTGGAGGACTCCTCGGCTACGTCCTGGACTCCTGCACAAGAG >uc002yje.2 chr21:13974492-13975432 cccctgccccaccgcaccctggattactgcacgccaagaccctcacctga acgcgccctacactctggcatgggggaaaaaacccggccccgcagagccctgga CTCTGACATTGGAGGACTCCTCGGCTACGTCCTGGACTCCTGCACAAGAG >uc002yje.3 chr21:13975431-13976330 cccctgccccaccgcaccctggattactgcacgccaagaccctcacctga acgcgccctacactctggcatgggggaacccggccccgcagagggccctgga CTCTGACATTGGAGGACTCCTCGGCTACGTCCTGGACTCCTGCACAAGAG

Outputs

c:\test>fasta { "uc002yje.1 chr21:13973492-13974491" => "cccctgccccaccgcaccctggattac +tgcacgccaagaccctcacctgaacgcgccctacactctggcatgggggaacccggccccgcagagccc +tggaCTCTGACATTGGAGGACTCCTCGGCTACGTCCTGGACTCCTGCACAAGAG", "uc002yje.2 chr21:13974492-13975432" => "cccctgccccaccgcaccctggattac +tgcacgccaagaccctcacctgaacgcgccctacactctggcatgggggaaaaaacccggccccgcaga +gccctggaCTCTGACATTGGAGGACTCCTCGGCTACGTCCTGGACTCCTGCACAAGAG", "uc002yje.3 chr21:13975431-13976330" => "cccctgccccaccgcaccctggattac +tgcacgccaagaccctcacctgaacgcgccctacactctggcatgggggaacccggccccgcagagggc +cctggaCTCTGACATTGGAGGACTCCTCGGCTACGTCCTGGACTCCTGCACAAGAG", }