undef $/; # This sets your input record separator # (i.e. $INPUT_RECORD_SEPARATOR if you 'use English') # to undef (usually it's \n -- it's what makes 'read' read in # a line at a time. You could even set this to "[SDGT]\n" if # you wanted, and read in a whole actual record at a time, # if that was going to be a consistent marker of the end of # a record my $stream = ; # Read the whole file into one scalar variable my @records = split($stream, m{\[[^\x5d]+\]\n}/); # Split the stream into records, based on a regexp, if you could # figure out a regexp that would consistently mark the end of a record # This should match any [.*] marker at the end of a record foreach (@records) { s{\n}{ }g; # Remove line breaks # You could also do your matching and marking up in this loop } #### @records = ( 'ABU BAKR, Ibrahim Ali Muhammad (a.k.a. AL-LIBI, Abd al-Muhsin) (individual) [SDGT]', 'AFGHAN SUPPORT COMMITTEE (ASC) (a.k.a. AHYA UL TURAS; a.k.a. JAMIAT AYAT-UR-RHAS AL ISLAMIA; a.k.a. JAMIAT IHYA UL TURATH AL ISLAMIA; a.k.a. LAJNAT UL MASA EIDATUL AFGHANIA) Grand Trunk Road, near Pushtoon Garhi Pabbi, Peshawar, Pakistan; Cheprahar Hadda, Mia Omar Sabaqah School, Jalalabad, Afghanistan [SDGT]' );