# ParsePlaybill.pm 20apr19waw package ParsePlaybill; use warnings; use strict; # use Data::Dump qw(dd); # for debug # parsing regexes. # general record format. my $rx_intro = qr{ \A \^ }xms; # start of record my $rx_outro = qr{ \s* \+ \Z }xms; # end of record # index number field and separator from title field. my $rx_index = qr{ \b \d+ \b }xms; # index number my $rx_index_sep = qr{ \s* % \s* }xms; # index number separator # general identifier regex. used extensively: may or may not be # appropriate for all dependent patterns (play titles, role titles, # actor names, etc). my $rx_ident = qr{ [[:alpha:]] [-'.[:alnum:]]* }xms; # play title field and separator from first role. my $rx_title = qr{ \b $rx_ident (?: \s+ $rx_ident)* \b }xms; my $rx_title_sep = qr{ \s* \Q.|\E \s* }xms; # role character name and name of actor in the role. my $rx_char = qr{ \b $rx_ident (?: \s+ $rx_ident)* \b }xms; my $rx_player = qr{ \b $rx_ident (?: \s+ $rx_ident)* \b }xms; # role character/actor compound field. my $rx_role_sep = qr{ \s* - \s* }xms; my $rx_role_term = qr{ \s* ; \s* }xms; my $rx_role = qr{ $rx_char $rx_role_sep $rx_player $rx_role_term }xms; # record parsing and formatting functions. sub rec { my ($record, ) = @_; my $parsed = my ($n_index, $title, $role_list) = $record =~ m{ $rx_intro ($rx_index) $rx_index_sep # must have index number ($rx_title) $rx_title_sep # must have play title ($rx_role+) # must have at least one role $rx_outro }xms; die "bad record '$record'" unless $parsed; my @roles; # character/actor fields as array refs. push @roles, [ $1, $2 ] while $role_list =~ m{ ($rx_char) $rx_role_sep ($rx_player) }xmsg; return $n_index, $title, @roles; } sub fmt { my ($n_index, $title, @roles, # array of array refs.: character/actor pairs ) = @_; my $head = qq{"$n_index","$title"}; # put header on each character/actor role pair. @roles = map qq{$head,"$_->[0]","$_->[1]"}, @roles; # terminate every headed role with a newline. return join "\n", @roles, ''; } 1; # flag successful module inclusion #### # ParsePlaybill.t 20apr19waw use warnings; use strict; use Test::More 'no_plan'; use Test::NoWarnings; # use Data::Dump qw(dd); # for debug BEGIN { use_ok 'ParsePlaybill'; } my @Tests = ( [ "^46004 % Tamerlane.| Tamerlane - Sheridan; Bajazet - Barry; Moneses - A Gentleman; Arpasia - Mrs. Furnival; Selima - Mrs. Elmy; +\n", <<'EOT', "46004","Tamerlane","Tamerlane","Sheridan" "46004","Tamerlane","Bajazet","Barry" "46004","Tamerlane","Moneses","A Gentleman" "46004","Tamerlane","Arpasia","Mrs. Furnival" "46004","Tamerlane","Selima","Mrs. Elmy" EOT ], 'Polonius "fixed" in this test case', [ "^46005 % Hamlet.| Hamlet - Sheridan; Polonius - J. Morris; Laertes- Lacy; Ophelia- Mrs. Storer; Queen - Mrs. Furnival; +\n", <<'EOT', "46005","Hamlet","Hamlet","Sheridan" "46005","Hamlet","Polonius","J. Morris" "46005","Hamlet","Laertes","Lacy" "46005","Hamlet","Ophelia","Mrs. Storer" "46005","Hamlet","Queen","Mrs. Furnival" EOT ], 'one-character play', [ "^1 % Mark Twain Tonight .| Mark Twain -Hal Holbrook;+\n", <<'EOT', "1","Mark Twain Tonight","Mark Twain","Hal Holbrook" EOT ], ); VECTOR: for my $ar_vector (@Tests) { if (not ref $ar_vector) { note $ar_vector; next VECTOR; } my ($record, $expected) = @$ar_vector; my @fields = ParsePlaybill::rec $record; my $got = ParsePlaybill::fmt @fields; ok $got eq $expected, "$record" ; } # end for VECTOR #### c:\@Work\Perl\monks\jcg3525>perl ParsePlaybill.t ok 1 - use ParsePlaybill; ok 2 - ^46004 % Tamerlane.| Tamerlane - Sheridan; Bajazet - Barry; Moneses - A Gentleman; Arpasia - Mrs. Furnival; Selima - Mrs. Elmy; + # # Polonius "fixed" in this test case ok 3 - ^46005 % Hamlet.| Hamlet - Sheridan; Polonius - J. Morris; Laertes- Lacy; Ophelia- Mrs. Storer; Queen - Mrs. Furnival; + # # one-character play ok 4 - ^1 % Mark Twain Tonight .| Mark Twain -Hal Holbrook;+ # ok 5 - no warnings 1..5