use v6; my $file_name = "Z2020_G_004_202202161115.html"; my @table_header = (); my @table_data = (); my @file_lines = slurp( $file_name ).split: / \n /; for @file_lines { last if / '
' /; if / '##
Clasification;Descrip;Cod Program;Descrip Program;Clasification Program;Credits;Payment
; ;1360; ;Services;150.000,00;62400
0,00; ;20.504,57;20.504,57;Services;0,00;-20.504,57
0,00; ;59.179,70;59.179,70;Services;6.254,79;-59.179,70
0,00; ;16.518,85;16.518,85;Services;0,00;33.481,15
####
use v6;
# use lib $*PROGRAM.IO.parent.add: 'lib';
# use Grammar::Debugger;
# use Grammar::Tracer;
my @table_header;
my @table_data;
grammar html_table
{
token TOP {
<.rubbish>+?
<.rubbish>+
}
rule head {
<.ws> <.theadl>
<.ws> <.trl>
*
<.ws> <.trr>
<.ws> <.theadr>
}
rule hrow {
<.ws> <.thl> <.thr>
{ @table_header.push: ~$ }
}
rule body {
<.ws> <.tbodyl>
[<.ws> <.trl> { @table_data.push: [] } * <.trr> ]*
<.ws> <.tbodyr>
}
rule brow {
<.ws> <.tdl> <.tdr>
{ @table_data[\*-1].push: ~$ }
}
token theadl { '' }
token theadr { '' }
token tbodyl { '' }
token tbodyr { '' }
token trl { '' }
token trr { ' ' }
token thl { '' }
token thr { ' ' }
token tdl { '' }
token tdr { ' ' }
regex data { .*? }
regex rubbish {
\N* \n
}
}
my $file_name = "Z2020_G_004_202202161115.html";
my $file_content = slurp( $file_name );
my $p = html_table.parse( $file_content );
if $p.defined {
print( "{@table_header.join: ';'}\n" );
for @table_data {
print( "{.join: ';'}\n" ) if .elems;
}
}
## ##
Clasification;Descrip;Cod Program;Descrip Program;Clasification Program;Credits;Payment
;;1360;;Services;150.000,00;62400;0,00;;20.504,57;20.504,57;Services;0,00;-20.504,57;0,00;;59.179,70;59.179,70;Services;6.254,79;-59.179,70;0,00;;16.518,85;16.518,85;Services;0,00;33.481,15
####
use v6;
grammar HTML_table
{
token TOP {
<.rubbish>+?
<.ws> ''
<.ws> ' '
<.ws> ''
+
<.ws> '
'
<.rubbish>+
}
rule header {
> '' ~ ' ' *
}
regex field {
<.ws> '' ~ ' ' (.*?)
}
rule row {
> '' ~ ' ' *
}
regex data {
<.ws> '' ~ ' ' (.*?)
}
regex rubbish {
\N* \n
}
}
class HTML_table_actions
{
method header($/) {
make $>>.made;
}
method field($/) {
# make ~$/[0]; # verbatim
make $/[0].defined ?? $/[0].Str.trim !! '';
}
method row($/) {
make $>>.made;
}
method data($/) {
# make ~$/[0]; # verbatim
make $/[0].defined ?? $/[0].Str.trim !! '';
}
}
my $parser;
my @file_list = dir(test => / :i '.' html $ /);
my $file_name = @file_list[0].substr: 0, 16;
my $output_file = open $file_name ~ ".csv", :w;
my $file_content;
for @file_list {
$file_content = slurp($_, enc => 'iso-8859-1');
say "Parsing: $_";
$parser = HTML_table.parse($file_content, actions => HTML_table_actions.new);
unless $parser {
say "Unable to parse: $_";
last;
};
once { $output_file.print("{ $parser.made.join: ';'; }\n") };
$output_file.print("{ .join: ';'; }\n") for $parser>>.made;
}
$output_file.close;
##
##
Clasification
Descrip
Cod Program
Descrip Program
Clasification Program
Credits
Payment
1360
Services
150.000,00
62400
0,00
20.504,57
20.504,57
Services
0,00
-20.504,57
0,00
59.179,70
59.179,70
Services
6.254,79
-59.179,70
0,00
16.518,85
16.518,85
Services
0,00
33.481,15
Total
89.478.403,32
32.751.626,25
122.230.029,57
102.342.399,26
89.476.722,29
84.657.323,46
4.819.398,83
32.753.307,28