use v6;
# use lib $*PROGRAM.IO.parent.add: 'lib';
# use Grammar::Debugger;
# use Grammar::Tracer;
my @table_header;
my @table_data;
grammar html_table
{
token TOP {
<.rubbish>+?
<.rubbish>+
}
rule head {
<.ws> <.theadl>
<.ws> <.trl>
*
<.ws> <.trr>
<.ws> <.theadr>
}
rule hrow {
<.ws> <.thl> <.thr>
{ @table_header.push: ~$ }
}
rule body {
<.ws> <.tbodyl>
[<.ws> <.trl> { @table_data.push: [] } * <.trr> ]*
<.ws> <.tbodyr>
}
rule brow {
<.ws> <.tdl> <.tdr>
{ @table_data[\*-1].push: ~$ }
}
token theadl { '' }
token theadr { '' }
token tbodyl { '' }
token tbodyr { '' }
token trl { '' }
token trr { '
' }
token thl { '' }
token thr { ' | ' }
token tdl { '' }
token tdr { ' | ' }
regex data { .*? }
regex rubbish {
\N* \n
}
}
my $file_name = "Z2020_G_004_202202161115.html";
my $file_content = slurp( $file_name );
my $p = html_table.parse( $file_content );
if $p.defined {
print( "{@table_header.join: ';'}\n" );
for @table_data {
print( "{.join: ';'}\n" ) if .elems;
}
}