use v6; my $file_name = "Z2020_G_004_202202161115.html"; my @table_header = (); my @table_data = (); my @file_lines = slurp( $file_name ).split: / \n /; for @file_lines { last if / '' /; if / '' (.+?) '' $ / { @table_header.push: $0; next; } if / '' $ / { @table_data.push: []; next; } if / '' (.*?) '' $ / { @table_data[*-1].push: $0; } } print( "{@table_header.join: ';'}\n" ); for @table_data { print( "{.join: ';'}\n" ) if .elems; } ##

##

Clasification;Descrip;Cod Program;Descrip Program;Clasification Program;Credits;Payment
; ;1360; ;Services;150.000,00;62400
0,00; ;20.504,57;20.504,57;Services;0,00;-20.504,57
0,00; ;59.179,70;59.179,70;Services;6.254,79;-59.179,70
0,00; ;16.518,85;16.518,85;Services;0,00;33.481,15

##

##

use v6;
# use lib $*PROGRAM.IO.parent.add: 'lib';
# use Grammar::Debugger;
# use Grammar::Tracer;

my @table_header;
my @table_data;

grammar html_table
{
    token TOP {
        <.rubbish>+?
	
	
	<.rubbish>+
    }
    rule head {
        <.ws> <.theadl>
	<.ws> <.trl>
	*
	<.ws> <.trr>
	<.ws> <.theadr>
    }
    rule hrow {
        <.ws> <.thl>  <.thr>
	{ @table_header.push: ~$ }
    }
    rule body {
        <.ws> <.tbodyl>
	[<.ws> <.trl> { @table_data.push: [] } * <.trr> ]*
	<.ws> <.tbodyr>
    }
    rule brow {
        <.ws> <.tdl>  <.tdr>
	{ @table_data[\*-1].push: ~$ }
    }
    token theadl { '' }
    token theadr { '' }
    token tbodyl { '' }
    token tbodyr { '' }
    token trl { '' }
    token trr { '' }
    token thl { '' }
    token thr { '' }
    token tdl { '' }
    token tdr { '' }
    regex data { .*? }
    regex rubbish {
        \N* \n
    }
}

my $file_name = "Z2020_G_004_202202161115.html";
my $file_content = slurp( $file_name );
my $p = html_table.parse( $file_content );

if $p.defined {
    print( "{@table_header.join: ';'}\n" );
    for @table_data {
        print( "{.join: ';'}\n" ) if .elems;
    }
}

##

##

Clasification;Descrip;Cod Program;Descrip Program;Clasification Program;Credits;Payment
;;1360;;Services;150.000,00;62400;0,00;;20.504,57;20.504,57;Services;0,00;-20.504,57;0,00;;59.179,70;59.179,70;Services;6.254,79;-59.179,70;0,00;;16.518,85;16.518,85;Services;0,00;33.481,15

##

##

use v6;
grammar HTML_table
{
    token TOP {
        <.rubbish>+?
        <.ws> ''


        <.ws> ''
        <.ws> ''
        +
        <.ws> ''
        <.rubbish>+
    }
    rule header {
         '' ~ '' *
    }
    regex field {
        <.ws> '' ~ '' (.*?)
    }
    rule row {
         '' ~ '' *
    }
    regex data {
        <.ws> '' ~ '' (.*?)
    }
    regex rubbish {
        \N* \n
    }
}

class HTML_table_actions
{
    method header($/) {
        make $>>.made;
    }
    method field($/) {
        # make ~$/[0];  # verbatim
        make $/[0].defined ?? $/[0].Str.trim !! '';
    }
    method row($/) {
        make $>>.made;
    }
    method data($/) {
        # make ~$/[0];  # verbatim
        make $/[0].defined ?? $/[0].Str.trim !! '';
    }
}

my $parser;
my @file_list = dir(test => / :i '.' html $ /);
my $file_name = @file_list[0].substr: 0, 16;
my $output_file = open $file_name ~ ".csv", :w;
my $file_content;
for @file_list {
    $file_content = slurp($_, enc => 'iso-8859-1');
    say "Parsing: $_";
    $parser = HTML_table.parse($file_content, actions => HTML_table_actions.new);
    unless $parser {
        say "Unable to parse: $_";
        last;
    };
    once { $output_file.print("{ $parser

.made.join: ';'; }\n") };
    $output_file.print("{ .join: ';'; }\n") for $parser>>.made;
}
$output_file.close;

##

##








  

    
    
    
    
    
    
    
    
  
  

    
      
	
	
      
      
       
 	
 	  
	    
	  
	  
	  
	    europeanFormat
	    
	      ,

	      
\.

	    
	    
	      \.

	      
,

	    
	  
		
	
	
	  
	    
              Clasification
              Descrip
              Cod Program
              Descrip Program
              Clasification Program
              Credits
              Payment
	    
	  
	  
	    
              
               
              1360
               
              Services
              150.000,00
              62400
	    
	    
              0,00
               
              20.504,57
              20.504,57
              Services
              0,00
              -20.504,57
	    
	    
              0,00
               
              59.179,70
              59.179,70
              Services
              6.254,79
              -59.179,70
	    
	    
              0,00
               
              16.518,85
              16.518,85
              Services
              0,00
              33.481,15
	    
	  
	  
	    
              Total
              89.478.403,32
              32.751.626,25
              122.230.029,57
              102.342.399,26
              89.476.722,29
              84.657.323,46
              4.819.398,83
              32.753.307,28