this multiple missing values happens rarely
. i'd like a solution that is still fast at parsing. i'm using Parse::CSV11004516,0,0,9,9,3,12477,,,4,,0,,,3,38a947a1,b66b7850,6a14f9b9 11006995,1,,-1,,,,,,,,,,,,fbc55dae,9a89b36c,58e67aaf,f600ec0b,my $csv = Parse::CSV->new( file => $xFile[$k], sep_char => ',', names => 1, empty_is_undef => 1, auto_diag => 1, binary => 0, header =>'auto' );
the error is "Argument "" isn't numeric in numeric eq (==)"
i want so substitute '0' for blanks or missing values i've tried code with various comparisons likeif(length($str_check)==0){return ('0');}else{return($str_check);}
if($str_check = undef){do stuff};{return ('0');}else{return($str_check +);}
any help appreciated.
thanks
Here is my full code . Yes, I admit I am new to serious Perl scripts and this code is not optimal.
use 5.12.0; use warnings; use strict; use Carp; use sigtrap 'handler' => \&myhand, 'INT'; use Cwd; use Benchmark; use File::Basename; use Acme::Comment type => 'C++', own_line => 1; use English '-no_match_vars'; ##################################################################### use Parse::CSV; use Text::CSV_XS; ##################################################################### system('clear'); my $dbg_1=0; my $dbg_2=1; my $start=time; my $t0 = new Benchmark; print "\n Current Date and Time -> " . localtime() . "\n"; my $Base='/Users/Documents/matlab/projects/kaggle/criteo'; my $s_DIR=$Base.'/input/tmp'; my $p_DIR=$Base.'/output/data/pass'; my $f_DIR=$Base.'/output/data/fail'; my @xFile = grep {-f $_}glob( "$s_DIR/x*"); # if($dbg_1){ foreach my $f (@xFile) { my $filesize = -s $f; printf "%-25s size is %15d \n", ($f, $filesize); }; }; # #initialize vars my $k=0; my $noLines=9e3; my $count=0; my $result=0; my $temp=0; my $value=0; my $name=""; my $n=@xFile; for ($k = 0; $k <= $n; $k++){ my $indexF=0; my $indexP=0; (my $suffix,my $path,$name)=fileparse($xFile[$k], "\.[^.]*" ); print 'processing '.$name."\n"; my $f_Pass=$p_DIR."/pass_table_".$name.'.txt'; my $f_Fail=$f_DIR."/fail_table_".$name.'.txt'; open(DATA,">".$f_Pass) || die "Can't open output file"; open(DATA2,">".$f_Fail) || die "Can't open output file"; if($dbg_1){ print "xF=> ".$k."\n"; print "xFile[xF]=> ".$xFile[$k]."\n"; print "name=> ".$name." \n"; print "path=> ".$path." \n"; print "suffix=> ".$suffix." \n"; print "f_Pass=> ".$f_Pass."\n"; print "f_Fail=> ".$f_Fail."\n"; }; my $csv = Parse::CSV->new( file => $xFile[$k], sep_char => ',', names => 1, empty_is_undef => 1, blank_is_undef => 1, auto_diag => 1, binary => 1, header =>'auto', callbacks => { after_parse => sub { $_ ||= 0 for @{$_[1] } },} ); my @hash = $csv->names; #returns hash my @vals = values @hash; #hash to array # for ($count = 0; $count <= $noLines; $count++) { # $value = $csv->fetch; while ( $value = $csv->fetch ){ if($value->{$vals[1]}==1){ for $k (2 .. $#vals) { $temp=$value->{$vals[$k]}; $result=check_blank($temp); process_table($k,$result); }; printf DATA "\n"; $indexP=$indexP+1; }else{ for $k (2 .. $#vals) { $temp=$value->{$vals[$k]}; $result=check_blank($temp); process_table2($k,$result); }; printf DATA2 "\n"; $indexF=$indexF+1; }; }; print " totalP $indexP totalF ".($indexF-0)." total ".($indexP+$inde +xF)." \n"; printf "%% totalP/(totalF+totalP)= %.2f %% \n",($indexP/($indexP+$in +dexF)*100); close(DATA) || die "Couldn't close output file properly"; close(DATA2) || die "Couldn't close output file properly"; }; ######################## sub ######################################### +### sub check_blank{ my $str_check= $_[0]; if((length($str_check)==0)) { return ('0'); }else{ return($str_check); }; exit 1; }; sub process_table{ my $kk= $_[0]; my $result= $_[1]; if($kk==1){ #do something here }else{ printf DATA $result." "; }; return; exit 1; }; sub process_table2{ my $kk= $_[0]; my $result= $_[1]; if($kk==1){ #do something here }else{ printf DATA2 $result." "; }; return; exit 1; }; ########################system ####################################### +# my $t1 = new Benchmark; my $td = timediff($t1, $t0); print "Code took:",timestr($td),"\n"; printf "++Finished program in ->\t %5.2f seconds\n",time-$start; print "\n"; ###################################################################### +#### sub myhand { print "\n caught $SIG{INT}", @_; close(DATA) || die "Couldn't close output file properly"; print "\nHey Stop that SIG hurts!"; print "\nCleaning up now..."; exit 1; }; sub pad (){ my ( $num, $len ) = @_; return '0' x ( $len - length $num ) . $num; exit 1; }; ###################################################################### +##### exit 1;
In reply to csv parsing with multiple missing values/multiple commas by f77coder
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |