my $file = "c:/webcod_enf.txt"; open F, $file or die $!; local $/ = "\n\n"; # break up into rough 'records' at blank lines # often you can get good records just by setting $/ # not with this data though ;-) while () { # skip the extraneous data, valid chunks will start with ^\s*\d{5} unless ( m/^\s*\d{5}/) { #print "Skipping:\n$_"; next; } # now we have chunks of real data to parse. we split it on the unique m/^\s*\d{5}/m # numeric feature to break out the individual records. we use a lookahead assertion # to do this so we don't loose that data in the split for my $course( split /(?=^\s*\d{5})/m, $_ ) { next if $course =~ m/^\s*$/; # we possibly get a null record to start so skip print "$course\n\n"; } } __DATA__ 92861 APMA 109 0001 GI LC CALCULUS I 4.0 0900-0950 M W F OLS 011 OBERHAUSER JP 055 002 O 0830-0920 T OLS 005 90063 APMA 109 0002 GI LC CALCULUS I 4.0 1000-1050 M W F OLS 120 BECK M 055 004 O 0830-0920 R OLS 120 91589 APMA 109 0003 GI LC CALCULUS I 4.0 1100-1150 M W F OLS 120 BECK M 055 006 O 0830-0920 T MEC 205