use v6; grammar HTML_table { token TOP { <.rubbish>+? <.ws> ''
<.ws> '' <.ws> '' + <.ws> '' <.rubbish>+ } rule header { '' ~ '' * } regex field { <.ws> '' ~ '' (.*?) } rule row { '' ~ '' * } regex data { <.ws> '' ~ '' (.*?) } regex rubbish { \N* \n } } class HTML_table_actions { method header($/) { make $>>.made; } method field($/) { # make ~$/[0]; # verbatim make $/[0].defined ?? $/[0].Str.trim !! ''; } method row($/) { make $>>.made; } method data($/) { # make ~$/[0]; # verbatim make $/[0].defined ?? $/[0].Str.trim !! ''; } } my $parser; my @file_list = dir(test => / :i '.' html $ /); my $file_name = @file_list[0].substr: 0, 16; my $output_file = open $file_name ~ ".csv", :w; my $file_content; for @file_list { $file_content = slurp($_, enc => 'iso-8859-1'); say "Parsing: $_"; $parser = HTML_table.parse($file_content, actions => HTML_table_actions.new); unless $parser { say "Unable to parse: $_"; last; }; once { $output_file.print("{ $parser
.made.join: ';'; }\n") }; $output_file.print("{ .join: ';'; }\n") for $parser>>.made; } $output_file.close;