# ParsePlaybill.pm 20apr19waw package ParsePlaybill; use warnings; use strict; # use Data::Dump qw(dd); # for debug # parsing regexes. # general record format. my $rx_intro = qr{ \A \^ }xms; # start of record my $rx_outro = qr{ \s* \+ \Z }xms; # end of record # index number field and separator from title field. my $rx_index = qr{ \b \d+ \b }xms; # index number my $rx_index_sep = qr{ \s* % \s* }xms; # index number separator # general identifier regex. used extensively: may or may not be # appropriate for all dependent patterns (play titles, role titles, # actor names, etc). my $rx_ident = qr{ [[:alpha:]] [-'.[:alnum:]]* }xms; # play title field and separator from first role. my $rx_title = qr{ \b $rx_ident (?: \s+ $rx_ident)* \b }xms; my $rx_title_sep = qr{ \s* \Q.|\E \s* }xms; # role character name and name of actor in the role. my $rx_char = qr{ \b $rx_ident (?: \s+ $rx_ident)* \b }xms; my $rx_player = qr{ \b $rx_ident (?: \s+ $rx_ident)* \b }xms; # role character/actor compound field. my $rx_role_sep = qr{ \s* - \s* }xms; my $rx_role_term = qr{ \s* ; \s* }xms; my $rx_role = qr{ $rx_char $rx_role_sep $rx_player $rx_role_term }xms; # record parsing and formatting functions. sub rec { my ($record, ) = @_; my $parsed = my ($n_index, $title, $role_list) = $record =~ m{ $rx_intro ($rx_index) $rx_index_sep # must have index number ($rx_title) $rx_title_sep # must have play title ($rx_role+) # must have at least one role $rx_outro }xms; die "bad record '$record'" unless $parsed; my @roles; # character/actor fields as array refs. push @roles, [ $1, $2 ] while $role_list =~ m{ ($rx_char) $rx_role_sep ($rx_player) }xmsg; return $n_index, $title, @roles; } sub fmt { my ($n_index, $title, @roles, # array of array refs.: character/actor pairs ) = @_; my $head = qq{"$n_index","$title"}; # put header on each character/actor role pair. @roles = map qq{$head,"$_->[0]","$_->[1]"}, @roles; # terminate every headed role with a newline. return join "\n", @roles, ''; } 1; # flag successful module inclusion