in reply to Refomating a large fasta file...
See perlvar $INPUT_RECORD_SEPARATOR.
#! perl -slw use strict; use Data::Dumper; local $/ = '>'; # Set record delimiter my @sequences; # AoA (undef) = scalar <DATA>; # Discard first (blank) record. while( my $record = <DATA> ) { # Read each paragraph # Split to lines my @lines = split "\n", $record; # Discard the last line if necessary pop @lines if $lines[-1] eq '>'; # Build the array push @sequences, [ split( '\|', shift @lines), join( '', @lines ) +]; } print Dumper \@sequences; __DATA__ >gi|2695846|emb|Y13255.1|ABY13255 Acipenser baeri mRNA for immunoglobu +lin heavychain, clone ScH 3.3 TGGTTACAACACTTTCTTCTTTCAATAACCACAATACTGCAGTACAATGGGGATTTTAACAGCTCTCTGT +ATAATAATGA CAGCTCTATCAAGTGTCCGGTCTGATGTAGTGTTGACTGAGTCCGGACCAGCAGTTATAAAGCCTGGAGA +GTCCCATAAA CTGTCCTGTAAAGCCTCTGGATTCACATTCAGCAGCGCCTACATGAGCTGGGTTCGACAAGCTCCTGGAA +AGGGTCTGGA ATGGGTGGCTTATATTTACTCAGGTGGTAGTAGTACATACTATGCCCAGTCTGTCCAGGGAAGATTCGCC +ATCTCCAGAG ACGATTCCAACAGCATGCTGTATTTACAAATGAACAGCCTGAAGACTGAAGACACTGCCGTGTATTACTG +TGCTCGGGGC GGGCTGGGGTGGTCCCTTGACTACTGGGGGAAAGGCACAATGATCACCGTAACTTCTGCTACGCCATCAC +CACCGACAGT GTTTCCGCTTATGGAGTCATGTTGTTTGAGCGATATCTCGGGTCCTGTTGCTACGGGCTGCTTAGCAACC +GGATTCTGCC TACCCCCGCGACCTTCTCGTGGACTGATCAATCTGGAAAAGCTTTT >gi|2695846|emb|Y13255.1|ABY13255 Acipenser baeri mRNA for immunoglobu +lin heavychain, clone ScH 3.3 TGGTTACAACACTTTCTTCTTTCAATAACCACAATACTGCAGTACAATGGGGATTTTAACAGCTCTCTGT +ATAATAATGA CAGCTCTATCAAGTGTCCGGTCTGATGTAGTGTTGACTGAGTCCGGACCAGCAGTTATAAAGCCTGGAGA +GTCCCATAAA CTGTCCTGTAAAGCCTCTGGATTCACATTCAGCAGCGCCTACATGAGCTGGGTTCGACAAGCTCCTGGAA +AGGGTCTGGA ATGGGTGGCTTATATTTACTCAGGTGGTAGTAGTACATACTATGCCCAGTCTGTCCAGGGAAGATTCGCC +ATCTCCAGAG ACGATTCCAACAGCATGCTGTATTTACAAATGAACAGCCTGAAGACTGAAGACACTGCCGTGTATTACTG +TGCTCGGGGC GGGCTGGGGTGGTCCCTTGACTACTGGGGGAAAGGCACAATGATCACCGTAACTTCTGCTACGCCATCAC +CACCGACAGT GTTTCCGCTTATGGAGTCATGTTGTTTGAGCGATATCTCGGGTCCTGTTGCTACGGGCTGCTTAGCAACC +GGATTCTGCC TACCCCCGCGACCTTCTCGTGGACTGATCAATCTGGAAAAGCTTTT >gi|2695846|emb|Y13255.1|ABY13255 Acipenser baeri mRNA for immunoglobu +lin heavychain, clone ScH 3.3 TGGTTACAACACTTTCTTCTTTCAATAACCACAATACTGCAGTACAATGGGGATTTTAACAGCTCTCTGT +ATAATAATGA CAGCTCTATCAAGTGTCCGGTCTGATGTAGTGTTGACTGAGTCCGGACCAGCAGTTATAAAGCCTGGAGA +GTCCCATAAA CTGTCCTGTAAAGCCTCTGGATTCACATTCAGCAGCGCCTACATGAGCTGGGTTCGACAAGCTCCTGGAA +AGGGTCTGGA ATGGGTGGCTTATATTTACTCAGGTGGTAGTAGTACATACTATGCCCAGTCTGTCCAGGGAAGATTCGCC +ATCTCCAGAG ACGATTCCAACAGCATGCTGTATTTACAAATGAACAGCCTGAAGACTGAAGACACTGCCGTGTATTACTG +TGCTCGGGGC GGGCTGGGGTGGTCCCTTGACTACTGGGGGAAAGGCACAATGATCACCGTAACTTCTGCTACGCCATCAC +CACCGACAGT GTTTCCGCTTATGGAGTCATGTTGTTTGAGCGATATCTCGGGTCCTGTTGCTACGGGCTGCTTAGCAACC +GGATTCTGCC TACCCCCGCGACCTTCTCGTGGACTGATCAATCTGGAAAAGCTTTT >gi|2695846|emb|Y13255.1|ABY13255 Acipenser baeri mRNA for immunoglobu +lin heavychain, clone ScH 3.3 TGGTTACAACACTTTCTTCTTTCAATAACCACAATACTGCAGTACAATGGGGATTTTAACAGCTCTCTGT +ATAATAATGA CAGCTCTATCAAGTGTCCGGTCTGATGTAGTGTTGACTGAGTCCGGACCAGCAGTTATAAAGCCTGGAGA +GTCCCATAAA CTGTCCTGTAAAGCCTCTGGATTCACATTCAGCAGCGCCTACATGAGCTGGGTTCGACAAGCTCCTGGAA +AGGGTCTGGA ATGGGTGGCTTATATTTACTCAGGTGGTAGTAGTACATACTATGCCCAGTCTGTCCAGGGAAGATTCGCC +ATCTCCAGAG ACGATTCCAACAGCATGCTGTATTTACAAATGAACAGCCTGAAGACTGAAGACACTGCCGTGTATTACTG +TGCTCGGGGC GGGCTGGGGTGGTCCCTTGACTACTGGGGGAAAGGCACAATGATCACCGTAACTTCTGCTACGCCATCAC +CACCGACAGT GTTTCCGCTTATGGAGTCATGTTGTTTGAGCGATATCTCGGGTCCTGTTGCTACGGGCTGCTTAGCAACC +GGATTCTGCC TACCCCCGCGACCTTCTCGTGGACTGATCAATCTGGAAAAGCTTTT
Output
P:\test>junk $VAR1 = [ [ 'gi', '2695846', 'emb', 'Y13255.1', 'ABY13255 Acipenser baeri mRNA for immunoglobulin heavycha +in, clone ScH 3.3', 'TGGTTACAACACTTTCTTCTTTCAATAACCACAATACTGCAGTACAATGGGGATTTT +AACAGCTCTCTGTATAATAATGACAGCTCTATCAAGTGTCCGGTCTGATGTAGTGTTGACTGAGTCCGG +ACCAGCAGTTATAAAGCCTGGAGAGTCCCATAAACTGTCCTGTAAAGCCTCTGGATTCACATTCAGCAG +CGCCTACATGAGCTGGGTTCGACAAGCTCCTGGAAAGGGTCTGGAATGGGTGGCTTATATTTACTCAGG +TGGTAGTAGTACATACTATGCCCAGTCTGTCCAGGGAAGATTCGCCATCTCCAGAGACGATTCCAACAG +CATGCTGTATTTACAAATGAACAGCCTGAAGACTGAAGACACTGCCGTGTATTACTGTGCTCGGGGCGG +GCTGGGGTGGTCCCTTGACTACTGGGGGAAAGGCACAATGATCACCGTAACTTCTGCTACGCCATCACC +ACCGACAGTGTTTCCGCTTATGGAGTCATGTTGTTTGAGCGATATCTCGGGTCCTGTTGCTACGGGCTG +CTTAGCAACCGGATTCTGCCTACCCCCGCGACCTTCTCGTGGACTGATCAATCTGGAAAAGCTTTT' ], [ 'gi', '2695846', 'emb', 'Y13255.1', 'ABY13255 Acipenser baeri mRNA for immunoglobulin heavycha +in, clone ScH 3.3', 'TGGTTACAACACTTTCTTCTTTCAATAACCACAATACTGCAGTACAATGGGGATTTT +AACAGCTCTCTGTATAATAATGACAGCTCTATCAAGTGTCCGGTCTGATGTAGTGTTGACTGAGTCCGG +ACCAGCAGTTATAAAGCCTGGAGAGTCCCATAAACTGTCCTGTAAAGCCTCTGGATTCACATTCAGCAG +CGCCTACATGAGCTGGGTTCGACAAGCTCCTGGAAAGGGTCTGGAATGGGTGGCTTATATTTACTCAGG +TGGTAGTAGTACATACTATGCCCAGTCTGTCCAGGGAAGATTCGCCATCTCCAGAGACGATTCCAACAG +CATGCTGTATTTACAAATGAACAGCCTGAAGACTGAAGACACTGCCGTGTATTACTGTGCTCGGGGCGG +GCTGGGGTGGTCCCTTGACTACTGGGGGAAAGGCACAATGATCACCGTAACTTCTGCTACGCCATCACC +ACCGACAGTGTTTCCGCTTATGGAGTCATGTTGTTTGAGCGATATCTCGGGTCCTGTTGCTACGGGCTG +CTTAGCAACCGGATTCTGCCTACCCCCGCGACCTTCTCGTGGACTGATCAATCTGGAAAAGCTTTT' ], [ 'gi', '2695846', 'emb', 'Y13255.1', 'ABY13255 Acipenser baeri mRNA for immunoglobulin heavycha +in, clone ScH 3.3', 'TGGTTACAACACTTTCTTCTTTCAATAACCACAATACTGCAGTACAATGGGGATTTT +AACAGCTCTCTGTATAATAATGACAGCTCTATCAAGTGTCCGGTCTGATGTAGTGTTGACTGAGTCCGG +ACCAGCAGTTATAAAGCCTGGAGAGTCCCATAAACTGTCCTGTAAAGCCTCTGGATTCACATTCAGCAG +CGCCTACATGAGCTGGGTTCGACAAGCTCCTGGAAAGGGTCTGGAATGGGTGGCTTATATTTACTCAGG +TGGTAGTAGTACATACTATGCCCAGTCTGTCCAGGGAAGATTCGCCATCTCCAGAGACGATTCCAACAG +CATGCTGTATTTACAAATGAACAGCCTGAAGACTGAAGACACTGCCGTGTATTACTGTGCTCGGGGCGG +GCTGGGGTGGTCCCTTGACTACTGGGGGAAAGGCACAATGATCACCGTAACTTCTGCTACGCCATCACC +ACCGACAGTGTTTCCGCTTATGGAGTCATGTTGTTTGAGCGATATCTCGGGTCCTGTTGCTACGGGCTG +CTTAGCAACCGGATTCTGCCTACCCCCGCGACCTTCTCGTGGACTGATCAATCTGGAAAAGCTTTT' ], [ 'gi', '2695846', 'emb', 'Y13255.1', 'ABY13255 Acipenser baeri mRNA for immunoglobulin heavycha +in, clone ScH 3.3', 'TGGTTACAACACTTTCTTCTTTCAATAACCACAATACTGCAGTACAATGGGGATTTT +AACAGCTCTCTGTATAATAATGACAGCTCTATCAAGTGTCCGGTCTGATGTAGTGTTGACTGAGTCCGG +ACCAGCAGTTATAAAGCCTGGAGAGTCCCATAAACTGTCCTGTAAAGCCTCTGGATTCACATTCAGCAG +CGCCTACATGAGCTGGGTTCGACAAGCTCCTGGAAAGGGTCTGGAATGGGTGGCTTATATTTACTCAGG +TGGTAGTAGTACATACTATGCCCAGTCTGTCCAGGGAAGATTCGCCATCTCCAGAGACGATTCCAACAG +CATGCTGTATTTACAAATGAACAGCCTGAAGACTGAAGACACTGCCGTGTATTACTGTGCTCGGGGCGG +GCTGGGGTGGTCCCTTGACTACTGGGGGAAAGGCACAATGATCACCGTAACTTCTGCTACGCCATCACC +ACCGACAGTGTTTCCGCTTATGGAGTCATGTTGTTTGAGCGATATCTCGGGTCCTGTTGCTACGGGCTG +CTTAGCAACCGGATTCTGCCTACCCCCGCGACCTTCTCGTGGACTGATCAATCTGGAAAAGCTTTT' ] ];
|
|---|