# ParsePlaybill.pm 20apr19waw
package ParsePlaybill;
use warnings;
use strict;
# use Data::Dump qw(dd); # for debug
# parsing regexes.
# general record format.
my $rx_intro = qr{ \A \^ }xms; # start of record
my $rx_outro = qr{ \s* \+ \Z }xms; # end of record
# index number field and separator from title field.
my $rx_index = qr{ \b \d+ \b }xms; # index number
my $rx_index_sep = qr{ \s* % \s* }xms; # index number separator
# general identifier regex. used extensively: may or may not be
# appropriate for all dependent patterns (play titles, role titles,
# actor names, etc).
my $rx_ident = qr{ [[:alpha:]] [-'.[:alnum:]]* }xms;
# play title field and separator from first role.
my $rx_title = qr{ \b $rx_ident (?: \s+ $rx_ident)* \b }xms;
my $rx_title_sep = qr{ \s* \Q.|\E \s* }xms;
# role character name and name of actor in the role.
my $rx_char = qr{ \b $rx_ident (?: \s+ $rx_ident)* \b }xms;
my $rx_player = qr{ \b $rx_ident (?: \s+ $rx_ident)* \b }xms;
# role character/actor compound field.
my $rx_role_sep = qr{ \s* - \s* }xms;
my $rx_role_term = qr{ \s* ; \s* }xms;
my $rx_role = qr{ $rx_char $rx_role_sep $rx_player $rx_role_term }xms;
# record parsing and formatting functions.
sub rec {
my ($record,
) = @_;
my $parsed =
my ($n_index, $title, $role_list) =
$record =~ m{
$rx_intro
($rx_index) $rx_index_sep # must have index number
($rx_title) $rx_title_sep # must have play title
($rx_role+) # must have at least one role
$rx_outro
}xms;
die "bad record '$record'" unless $parsed;
my @roles; # character/actor fields as array refs.
push @roles, [ $1, $2 ] while
$role_list =~ m{ ($rx_char) $rx_role_sep ($rx_player) }xmsg;
return $n_index, $title, @roles;
}
sub fmt {
my ($n_index,
$title,
@roles, # array of array refs.: character/actor pairs
) = @_;
my $head = qq{"$n_index","$title"};
# put header on each character/actor role pair.
@roles = map qq{$head,"$_->[0]","$_->[1]"}, @roles;
# terminate every headed role with a newline.
return join "\n", @roles, '';
}
1; # flag successful module inclusion
####
# ParsePlaybill.t 20apr19waw
use warnings;
use strict;
use Test::More 'no_plan';
use Test::NoWarnings;
# use Data::Dump qw(dd); # for debug
BEGIN { use_ok 'ParsePlaybill'; }
my @Tests = (
[ "^46004 % Tamerlane.| Tamerlane - Sheridan; Bajazet - Barry; Moneses - A Gentleman; Arpasia - Mrs. Furnival; Selima - Mrs. Elmy; +\n",
<<'EOT',
"46004","Tamerlane","Tamerlane","Sheridan"
"46004","Tamerlane","Bajazet","Barry"
"46004","Tamerlane","Moneses","A Gentleman"
"46004","Tamerlane","Arpasia","Mrs. Furnival"
"46004","Tamerlane","Selima","Mrs. Elmy"
EOT
],
'Polonius "fixed" in this test case',
[ "^46005 % Hamlet.| Hamlet - Sheridan; Polonius - J. Morris; Laertes- Lacy; Ophelia- Mrs. Storer; Queen - Mrs. Furnival; +\n",
<<'EOT',
"46005","Hamlet","Hamlet","Sheridan"
"46005","Hamlet","Polonius","J. Morris"
"46005","Hamlet","Laertes","Lacy"
"46005","Hamlet","Ophelia","Mrs. Storer"
"46005","Hamlet","Queen","Mrs. Furnival"
EOT
],
'one-character play',
[ "^1 % Mark Twain Tonight .| Mark Twain -Hal Holbrook;+\n",
<<'EOT',
"1","Mark Twain Tonight","Mark Twain","Hal Holbrook"
EOT
],
);
VECTOR:
for my $ar_vector (@Tests) {
if (not ref $ar_vector) {
note $ar_vector;
next VECTOR;
}
my ($record, $expected) = @$ar_vector;
my @fields = ParsePlaybill::rec $record;
my $got = ParsePlaybill::fmt @fields;
ok $got eq $expected,
"$record"
;
} # end for VECTOR
####
c:\@Work\Perl\monks\jcg3525>perl ParsePlaybill.t
ok 1 - use ParsePlaybill;
ok 2 - ^46004 % Tamerlane.| Tamerlane - Sheridan; Bajazet - Barry; Moneses - A Gentleman; Arpasia - Mrs. Furnival; Selima - Mrs. Elmy; +
#
# Polonius "fixed" in this test case
ok 3 - ^46005 % Hamlet.| Hamlet - Sheridan; Polonius - J. Morris; Laertes- Lacy; Ophelia- Mrs. Storer; Queen - Mrs. Furnival; +
#
# one-character play
ok 4 - ^1 % Mark Twain Tonight .| Mark Twain -Hal Holbrook;+
#
ok 5 - no warnings
1..5