#!/usr/bin/env perl use Modern::Perl; my $hr = qr{\n.[-_]+\n}; # close enough for (literally) government work sub get_rule { my $text = shift; my $keep; $text =~ s/.*?$hr//s; # cut away to first line $text =~ s/$hr.+//s; # cut away after second line for my $p (split /\n\n+/, $text) { # loop through paragraphs $keep = $p unless $p =~ /^[A-Z]+:/; # keep this one unless it matches } return $keep; # print last one matched } for (qw(2011-33656.htm 2011-33662.htm 2011-33692.htm 2012-2363.htm)) { say; my $page = `cat f/$_`; if (my $keep = get_rule($page)){ say $keep; } else { say "Unable to find a match in $_"; } say '------------------------------------------'; }