#!/usr/bin/perl -w use strict; my $last_tag = ''; my $buffer; while () { if (! /^(..)\s+(.*)$/) { warn "Malformed line >$_<"; next; }; my ($tag,$content) = ($1,$2); if ($tag ne $last_tag) { print "Collected: $tag, $buffer\n"; $buffer = $content; } else { $buffer .= $content; }; }; __DATA__ AB AAAA_BBBBB DE AC2-(EC 2.7.00.1) (Adaptor-associated DE protein 1). // ID CCCCC_DDDDD DE Serine/threonine-protein kinase (EC 2.7.99.1) (Tyrosine DE binding protein) (p35BP). PR AAAAAAAAAAAAAAAAAAAAAAA. // ID RRRRR_GGGGG AC Q6Q8; DE Serine/threonine-aaaaaa kinase (Tyrosine DE kinase 1) (Apoptosis-associated tyrosine kinase) (AATYK) (Brain DE binding protein) (p35BP). PR xxxxxxxxxxxxxx. CD zzzzzzzzzzzzzz. //