in reply to doubt in storing a data of 2 lines in an array.
use strict; use warnings; my $rxRecord = qr {(?xs) (ENTRY.*?\n) (?=ENTRY|\z) }; my $rxFieldHdrs = qr{(?:ENTRY|TITLE|ORGANISM|ACCESSIONS)}; my $rxField = qr {(?xs) ($rxFieldHdrs.*?\n) (?=$rxFieldHdrs|\z) }; my $fileText; { local $/; $fileText = <DATA>; } my @records = $fileText =~ m{$rxRecord}g; foreach my $record (@records) { print qq{$record}, q{+} x 50, qq{\n}; my @fields = $record =~ m{$rxField}g; foreach my $field (@fields) { print qq{$field}, q{-} x 50, qq{\n}; } print q{*} x 50, qq{\n}; } __END__ ENTRY CCHU #type complete TITLE cytochrome c [validated] - human Homo sapiens ORGANISM #formal_name Homo sapiens #common_name man ACCESSIONS A31764; A05676; I55192; A00001 MGDVEKGKKIFIMKCSQCHTVEMGDVEKGGKHKTGPNLHGMIYARAJLFGRKTSEKGQAPGYSYTAANKN +KGIIWGEDTLMEYLENPKKYIP ENTRY CCCZ #type complete TITLE cytochrome c - chimpanzee (tentative sequence) ORGANISM #formal_name Pan troglodytes #common_name chimpanzee ACCESSIONS A00002 GDVEKGKKIFIMKCSQCHTSEKVEKGSSSKHKSSSTGPNLHGLMIYARAJFGRKTGSEKQAPGYSYTAAN +KNKGIIWGED ENTRY CCMQR #type complete TITLE cytochrome c - rhesus macaque (tentative sequence) Macaca mulatta ORGANISM #formal_name Macaca mulatta #common_name rhesus macaq +ue ACCESSIONS A00003 GDVEKGKKIFIMKCSQSEKCHTVEKGGSSSSKHKTGPNLHGSSEKEMIYARAJKSEKLFGAAAAAAAARK +TGQAPGYSYTAANKSSSSNKGITWGEDTLMEYLENPKKYIPGTKMIFVGIKKKEE ENTRY CCMKP #type complete TITLE cytochrome c - spider monkey ORGANISM #formal_name Ateles sp. #common_name spider monkey ACCESSIONS A00004 GDVFKGKRIFIMKCSQCHTVESSSSKGGKHKTGPNLHGLMIYARAJSEKFGSSSSSSSSSSR
and here is the output showing for each record the whole record then each individual field. As you can see, your two-line title is preserved.
ENTRY CCHU #type complete TITLE cytochrome c [validated] - human Homo sapiens ORGANISM #formal_name Homo sapiens #common_name man ACCESSIONS A31764; A05676; I55192; A00001 MGDVEKGKKIFIMKCSQCHTVEMGDVEKGGKHKTGPNLHGMIYARAJLFGRKTSEKGQAPGYSYTAANKN +KGIIWGEDTLMEYLENPKKYIP ++++++++++++++++++++++++++++++++++++++++++++++++++ ENTRY CCHU #type complete -------------------------------------------------- TITLE cytochrome c [validated] - human Homo sapiens -------------------------------------------------- ORGANISM #formal_name Homo sapiens #common_name man -------------------------------------------------- ACCESSIONS A31764; A05676; I55192; A00001 MGDVEKGKKIFIMKCSQCHTVEMGDVEKGGKHKTGPNLHGMIYARAJLFGRKTSEKGQAPGYSYTAANKN +KGIIWGEDTLMEYLENPKKYIP -------------------------------------------------- ************************************************** ENTRY CCCZ #type complete TITLE cytochrome c - chimpanzee (tentative sequence) ORGANISM #formal_name Pan troglodytes #common_name chimpanzee ACCESSIONS A00002 GDVEKGKKIFIMKCSQCHTSEKVEKGSSSKHKSSSTGPNLHGLMIYARAJFGRKTGSEKQAPGYSYTAAN +KNKGIIWGED ++++++++++++++++++++++++++++++++++++++++++++++++++ ENTRY CCCZ #type complete -------------------------------------------------- TITLE cytochrome c - chimpanzee (tentative sequence) -------------------------------------------------- ORGANISM #formal_name Pan troglodytes #common_name chimpanzee -------------------------------------------------- ACCESSIONS A00002 GDVEKGKKIFIMKCSQCHTSEKVEKGSSSKHKSSSTGPNLHGLMIYARAJFGRKTGSEKQAPGYSYTAAN +KNKGIIWGED -------------------------------------------------- ************************************************** ENTRY CCMQR #type complete TITLE cytochrome c - rhesus macaque (tentative sequence) Macaca mulatta ORGANISM #formal_name Macaca mulatta #common_name rhesus macaq +ue ACCESSIONS A00003 GDVEKGKKIFIMKCSQSEKCHTVEKGGSSSSKHKTGPNLHGSSEKEMIYARAJKSEKLFGAAAAAAAARK +TGQAPGYSYTAANKSSSSNKGITWGEDTLMEYLENPKKYIPGTKMIFVGIKKKEE ++++++++++++++++++++++++++++++++++++++++++++++++++ ENTRY CCMQR #type complete -------------------------------------------------- TITLE cytochrome c - rhesus macaque (tentative sequence) Macaca mulatta -------------------------------------------------- ORGANISM #formal_name Macaca mulatta #common_name rhesus macaq +ue -------------------------------------------------- ACCESSIONS A00003 GDVEKGKKIFIMKCSQSEKCHTVEKGGSSSSKHKTGPNLHGSSEKEMIYARAJKSEKLFGAAAAAAAARK +TGQAPGYSYTAANKSSSSNKGITWGEDTLMEYLENPKKYIPGTKMIFVGIKKKEE -------------------------------------------------- ************************************************** ENTRY CCMKP #type complete TITLE cytochrome c - spider monkey ORGANISM #formal_name Ateles sp. #common_name spider monkey ACCESSIONS A00004 GDVFKGKRIFIMKCSQCHTVESSSSKGGKHKTGPNLHGLMIYARAJSEKFGSSSSSSSSSSR ++++++++++++++++++++++++++++++++++++++++++++++++++ ENTRY CCMKP #type complete -------------------------------------------------- TITLE cytochrome c - spider monkey -------------------------------------------------- ORGANISM #formal_name Ateles sp. #common_name spider monkey -------------------------------------------------- ACCESSIONS A00004 GDVFKGKRIFIMKCSQCHTVESSSSKGGKHKTGPNLHGLMIYARAJSEKFGSSSSSSSSSSR -------------------------------------------------- **************************************************
I hope this is of use
Cheers,
JohnGG
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: doubt in storing a data of 2 lines in an array.
by Anonymous Monk on Oct 30, 2006 at 14:53 UTC | |
by johngg (Canon) on Oct 30, 2006 at 15:09 UTC | |
by Anonymous Monk on Oct 31, 2006 at 10:22 UTC |