hello, i'm dealing with a csv file that has some 'bad' lines. how do i handle multiple missing values? multiple commas. my code works for single missing values.

this multiple missing values happens rarely

. i'd like a solution that is still fast at parsing. i'm using Parse::CSV
my $csv = Parse::CSV->new( file => $xFile[$k], sep_char => ',', names => 1, empty_is_undef => 1, auto_diag => 1, binary => 0, header =>'auto' );
11004516,0,0,9,9,3,12477,,,4,,0,,,3,38a947a1,b66b7850,6a14f9b9 11006995,1,,-1,,,,,,,,,,,,fbc55dae,9a89b36c,58e67aaf,f600ec0b,

the error is "Argument "" isn't numeric in numeric eq (==)"

i want so substitute '0' for blanks or missing values i've tried code with various comparisons like
if(length($str_check)==0){return ('0');}else{return($str_check);}
if($str_check = undef){do stuff};{return ('0');}else{return($str_check +);}

any help appreciated.

thanks

Here is my full code . Yes, I admit I am new to serious Perl scripts and this code is not optimal.

use 5.12.0; use warnings; use strict; use Carp; use sigtrap 'handler' => \&myhand, 'INT'; use Cwd; use Benchmark; use File::Basename; use Acme::Comment type => 'C++', own_line => 1; use English '-no_match_vars'; ##################################################################### use Parse::CSV; use Text::CSV_XS; ##################################################################### system('clear'); my $dbg_1=0; my $dbg_2=1; my $start=time; my $t0 = new Benchmark; print "\n Current Date and Time -> " . localtime() . "\n"; my $Base='/Users/Documents/matlab/projects/kaggle/criteo'; my $s_DIR=$Base.'/input/tmp'; my $p_DIR=$Base.'/output/data/pass'; my $f_DIR=$Base.'/output/data/fail'; my @xFile = grep {-f $_}glob( "$s_DIR/x*"); # if($dbg_1){ foreach my $f (@xFile) { my $filesize = -s $f; printf "%-25s size is %15d \n", ($f, $filesize); }; }; # #initialize vars my $k=0; my $noLines=9e3; my $count=0; my $result=0; my $temp=0; my $value=0; my $name=""; my $n=@xFile; for ($k = 0; $k <= $n; $k++){ my $indexF=0; my $indexP=0; (my $suffix,my $path,$name)=fileparse($xFile[$k], "\.[^.]*" ); print 'processing '.$name."\n"; my $f_Pass=$p_DIR."/pass_table_".$name.'.txt'; my $f_Fail=$f_DIR."/fail_table_".$name.'.txt'; open(DATA,">".$f_Pass) || die "Can't open output file"; open(DATA2,">".$f_Fail) || die "Can't open output file"; if($dbg_1){ print "xF=> ".$k."\n"; print "xFile[xF]=> ".$xFile[$k]."\n"; print "name=> ".$name." \n"; print "path=> ".$path." \n"; print "suffix=> ".$suffix." \n"; print "f_Pass=> ".$f_Pass."\n"; print "f_Fail=> ".$f_Fail."\n"; }; my $csv = Parse::CSV->new( file => $xFile[$k], sep_char => ',', names => 1, empty_is_undef => 1, blank_is_undef => 1, auto_diag => 1, binary => 1, header =>'auto', callbacks => { after_parse => sub { $_ ||= 0 for @{$_[1] } },} ); my @hash = $csv->names; #returns hash my @vals = values @hash; #hash to array # for ($count = 0; $count <= $noLines; $count++) { # $value = $csv->fetch; while ( $value = $csv->fetch ){ if($value->{$vals[1]}==1){ for $k (2 .. $#vals) { $temp=$value->{$vals[$k]}; $result=check_blank($temp); process_table($k,$result); }; printf DATA "\n"; $indexP=$indexP+1; }else{ for $k (2 .. $#vals) { $temp=$value->{$vals[$k]}; $result=check_blank($temp); process_table2($k,$result); }; printf DATA2 "\n"; $indexF=$indexF+1; }; }; print " totalP $indexP totalF ".($indexF-0)." total ".($indexP+$inde +xF)." \n"; printf "%% totalP/(totalF+totalP)= %.2f %% \n",($indexP/($indexP+$in +dexF)*100); close(DATA) || die "Couldn't close output file properly"; close(DATA2) || die "Couldn't close output file properly"; }; ######################## sub ######################################### +### sub check_blank{ my $str_check= $_[0]; if((length($str_check)==0)) { return ('0'); }else{ return($str_check); }; exit 1; }; sub process_table{ my $kk= $_[0]; my $result= $_[1]; if($kk==1){ #do something here }else{ printf DATA $result." "; }; return; exit 1; }; sub process_table2{ my $kk= $_[0]; my $result= $_[1]; if($kk==1){ #do something here }else{ printf DATA2 $result." "; }; return; exit 1; }; ########################system ####################################### +# my $t1 = new Benchmark; my $td = timediff($t1, $t0); print "Code took:",timestr($td),"\n"; printf "++Finished program in ->\t %5.2f seconds\n",time-$start; print "\n"; ###################################################################### +#### sub myhand { print "\n caught $SIG{INT}", @_; close(DATA) || die "Couldn't close output file properly"; print "\nHey Stop that SIG hurts!"; print "\nCleaning up now..."; exit 1; }; sub pad (){ my ( $num, $len ) = @_; return '0' x ( $len - length $num ) . $num; exit 1; }; ###################################################################### +##### exit 1;


In reply to csv parsing with multiple missing values/multiple commas by f77coder

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.