use warnings; use strict; my $rx_comma = qr{ \s* , \s* }xms; my $rx_date = qr{ \d{4} - \d\d - \d\d }xms; my $rx_name = qr{ [[:alpha:]] (?: '? [[:alpha:]]+)? }xms; my $rx_hyphenate = qr{ - $rx_name }xms; my $rx_surname = qr{ $rx_name $rx_hyphenate? }xms; my $rx_initial = qr{ [[:alpha:]] \. }xms; my $rx_givenname = qr{ $rx_initial | $rx_surname }xms; my $rx_prof = qr{ $rx_surname (?: $rx_comma (?: \s* $rx_givenname )+ )? }xms; # avoid polluting namespace with a bunch of common variable names. my $rx_facility = do { my $room = qr{ \d{3,4} [[:alpha:]]? }xms; my $range = qr{ $room (?: \s* - \s* $room)? }xms; my $rooms = qr{ $range (?: $rx_comma $range)* }xms; my $function = qr{ \( [^)]+ \) }xms; qr{ $rooms \s* $function }xms; # final regex }; $/ = ""; # paragrep mode while (my $entry = ) { my ($date, $prof, @facilities) = $entry =~ m{ $rx_date | $rx_prof | $rx_facility }xmsg; print <