#!raku # ################################################################# # # Clean classification # # last revision: 12.05.2023 # # $ raku clean_class.raku -t=12 PPP_ING_2014-2021_Liq_20230424.csv # # ################################################################# use v6; use lib 'lib'; use Text::CSV; sub MAIN (Bool :v($verbose) = False, Int :t($which_col) = 1, Str :s($sep) = ';', Str :q($quo) = '"', *@f) { for @f -> $fn { my $csv = Text::CSV.new(:$sep, :$quo); # Read the input and collect the data for the specified field (target column) my $fh = open $fn, :r, :!chomp; # Include year column my @target_col = $csv.getline_all($fh).map( *[0, $which_col] ); say "Read rows: { @target_col.elems }"; $fh.close; my %puf = (); ################################################### # +---------+ +----------+ +------+ # | $puf_id |-+->| $puf_txt |-+->| idx |--->[..] # +---------+ | +----------+ | +------+ # | | +------+ # | +->| year |--->[..] # | +------+ # | +----------+ # +->| max_year | # | +----------+ # | +----------+ # +->| sugg_txt | # +----------+ ################################################### my Int $row_count = 0; for @target_col -> $my_row { $row_count++; if ($my_row) { say "WARNING: unexpected separator: $row_count" unless $my_row[1].Str ~~ / '.- ' /; my ($puf_id, $puf_txt) = $my_row[1].Str.split(".- "); say "WARNING: empty id: $row_count" unless $puf_id; say "WARNING: empty txt: $row_count" unless $puf_txt; if ($puf_id ~~ / ^\d /) { unless $puf_id and %puf{$puf_id}:exists { %puf{$puf_id} = 0; %puf{$puf_id} = "void"; } # Collect indices %puf{$puf_id}{$puf_txt} = [] unless ($puf_txt and %puf{$puf_id}{$puf_txt}:exists); %puf{$puf_id}{$puf_txt}.push: $row_count - 1; # Collect years %puf{$puf_id}{$puf_txt} = [] unless ($puf_txt and %puf{$puf_id}{$puf_txt}:exists); %puf{$puf_id}{$puf_txt}.push: $my_row[0]; # For suggestion, record most recent text (based on year) if ($my_row[0].Numeric > %puf{$puf_id}) { %puf{$puf_id} = $my_row[0].Numeric; %puf{$puf_id} = $puf_txt; } } } } if ($csv.eof) { say "Processed rows: $row_count"; } print "\n"; # Get rid of year column # Prepare for being sliced (see "Normalize..." below) @target_col = @target_col»[1]; # Count also 'max_year' and 'sugg_txt' my $remaining = (%puf.keys.grep: { %puf{$_}.elems > 3 }).elems; # Process options menu # Show only values who repeat more than once and record user input my regex ritchie { $=(\d ** 1..2 \s* [ ',' \s* \d ** 1..2 ]*?) \s* ':' \s* [ $=(\d ** 1..2) || $=(<-[ \d ]> .+) ] } my regex valens { ^ \s* [';' \s* ]*? $ } my %replace_this_idx; my Bool $quit = False; SKIPPER: for %puf.keys.sort -> $puf_id { # Count also 'max_year' and 'sugg_txt' if (%puf{$puf_id}.elems > 3) { MENU: loop { my $i = 0; my %show = (); ######################## # +------+ +------+ # | $i |-+->| txt | # +------+ | +------+ # | +------+ # +->| year | # +------+ # +------+ # | sugg | # +------+ ######################## # Build up menu for %puf{$puf_id}.keys.sort -> $puf_txt { FIRST { %show = 0 } next if $puf_txt eq 'sugg_txt' | 'max_year'; %show{++$i} = $puf_txt; %show = $i if %puf{$puf_id} eq $puf_txt; %show{$i} = (%puf{$puf_id}{$puf_txt}.map: *=> 0).hash.keys.sort; } # Show menu for %show.keys.sort -> $j { next if $j eq 'sugg'; say "$j. %show{$j}"; print " $_" for %show{$j}; print "\n"; } my $player = trim prompt "[code: $puf_id remaining: { $remaining }] Which one (1-{ %show.elems - 1 })[{ %show }]: "; if ($player eq "q") { $quit = True; last SKIPPER; } if ($player eq "n") { $remaining--; print "\n"; next SKIPPER; } # User hit return key (accepting suggestion) $player = %show if $player eq ""; # Create text mapping from numeric menu mapping # and collect text to replace my %replace_this_txt = (); if ($player ~~ / /) { my @check_list = $.split: / <[,;:]> /; if (@check_list.elems != @check_list.unique.elems) { say "ERROR: invalid option { $player } (repetitions are not allowed)"; next MENU; } else { for $.split: / ';' / -> $guy { if ($guy ~~ / /) { if ($) { if (%show{$}:exists) { %replace_this_txt{%show{$}} = []; for $.split: / ',' / -> $j { if (%show{$j}:exists) { %replace_this_txt{%show{$}} .push: %show{$j}; } else { say "ERROR: invalid option { $j } (specified target value doesn't exist)"; next MENU; } } } else { say "ERROR: invalid option { $ } (specified player value doesn't exist)"; next MENU; } } elsif ($) { %replace_this_txt{$} = []; for $.split: / ',' / -> $j { if (%show{$j}:exists) { %replace_this_txt{$} .push: %show{$j}; } else { say "ERROR: invalid option { $j } (specified target value doesn't exist)"; next MENU; } } } else { say "ERROR: missing player value: { $ }"; next MENU; } } } } } else { if (%show{$player}:exists) { %replace_this_txt{%show{$player}} = []; for %show.keys -> $j { next if $j eq 'sugg' | $player; %replace_this_txt{%show{$player}}.push: %show{$j}; } } else { say "ERROR: invalid option { $player }"; next MENU; } } # Lookup and recover numeric indexes %replace_this_idx = (); for %replace_this_txt.keys -> $player_txt { %replace_this_idx{$player_txt} = gather { %puf{$puf_id}{$_}.deepmap: *.take for %replace_this_txt{$player_txt}.values; } } if ($verbose) { for %replace_this_idx.keys.sort -> $player_txt { say "$player_txt --> { %replace_this_idx{$player_txt} }"; } } print "\n"; next unless $player eq any(1 .. %show.elems) or $player ~~ / /; $remaining--; last; } # MENU # Normalize the column by replacing all the choosen values # (code + description strings) for %replace_this_idx.keys -> $player_txt { @target_col[%replace_this_idx{$player_txt}.list] = (loop { $puf_id ~ ".- " ~ $player_txt; }); } } } # SKIPPER unless $quit { my $fh_result = open "clean_class_" ~ $which_col ~ ".csv", :w; $fh_result.print("$_\n") for @target_col; $fh_result.close; say "Written out: clean_class_" ~ $which_col ~ ".csv"; # For debug purpose if ($verbose) { print "=" x 12; print "\n"; say "({ %puf.elems })"; print "\n"; for %puf.keys.sort -> $puf_id { # count also 'max_year' and 'sugg_txt' if (%puf{$puf_id}.elems > 3) { say "c: $puf_id"; say "w: %puf{$puf_id}"; for %puf{$puf_id}.keys.sort -> $puf_txt { say "t: $puf_txt"; print "i: "; for %puf{$puf_id}{$puf_txt} -> $row { print "$row "; } print "\n"; } print "\n"; } } } } } }