in reply to parsing malformed CSV with per column quote chars
Update. Can reproduce
$ cat test.csv 'PRODUCT CODE','CATEGORY','CATEGORY DESCRIPTION','CODE DESCRIPTION','O +PTIONAL CATEGORY','OPTIONAL CATEGORY DESCRIPTION' ' ','0 ','No Item',"INVALID CODE IN USER SUPPLIED DATA",' ',' ' '00100','1 ','Cat',"ORANGE CAT",' ',' ' '82131','94 ','Dog',"GREEN DOG",' ',' ' '82132','94 ','Dog',"'JOHNS' FLYING' DOG (Start 2001)",' ',' ' '82133','94 ','Dog',"MAGENTA DOG (End 2009)",' ',' ' $ perldoc -m CSV | head -7 use Text::CSV_XS qw( csv ); use Data::Peek; BEGIN { *CSV:: = \%Text::CSV_XS::; } $VERSION = "0.02"; 1; $ perl -MCSV -e'DDumper csv(in=>"test.csv",diag_verbose=>9,quote_char= +>"\x27",escape_char=>undef)' # CSV_XS ERROR: 2034 - EIF - Loose unescaped quote @ rec 4 pos 24 '82132','94 ','Dog',"'JOHNS' FLYING' DOG (Start 2001)",' ',' ' ' ^ [ [ 'PRODUCT CODE', 'CATEGORY', 'CATEGORY DESCRIPTION', 'CODE DESCRIPTION', 'OPTIONAL CATEGORY', 'OPTIONAL CATEGORY DESCRIPTION' ], [ ' ', '0 ', 'No Item', '"INVALID CODE IN USER SUPPLIED DATA"', ' ', ' ' ], [ '00100', '1 ', 'Cat', '"ORANGE CAT"', ' ', ' ' ], [ 82131, '94 ', 'Dog', '"GREEN DOG"', ' ', ' ' ] ]
You need allow_loose_quotes:
perl -MCSV -e'DDumper csv(in=>"test.csv",diag_verbose=>9,quote_char=>" +\x27",escape_char=>undef,allow_loose_quotes=>1)' [ [ 'PRODUCT CODE', 'CATEGORY', 'CATEGORY DESCRIPTION', 'CODE DESCRIPTION', 'OPTIONAL CATEGORY', 'OPTIONAL CATEGORY DESCRIPTION' ], [ ' ', '0 ', 'No Item', '"INVALID CODE IN USER SUPPLIED DATA"', ' ', ' ' ], [ '00100', '1 ', 'Cat', '"ORANGE CAT"', ' ', ' ' ], [ 82131, '94 ', 'Dog', '"GREEN DOG"', ' ', ' ' ], [ 82132, '94 ', 'Dog', '"\'JOHNS\' FLYING\' DOG (Start 2001)"', ' ', ' ' ], [ 82133, '94 ', 'Dog', '"MAGENTA DOG (End 2009)"', ' ', ' ' ] ]
To get (with Text::CSV_XS) hashes, you need headers and a map:
$ perl -MCSV -e'DDumper{map{$_->{"PRODUCT CODE"}=>$_}@{csv(in=>"test.c +sv",diag_verbose=>9,quote_char=>"\x27",escape_char=>undef,allow_loose +_quotes=>1,headers=>"auto")}}' { ' ' => { CATEGORY => '0 ', 'CATEGORY DESCRIPTION' => 'No Item', 'CODE DESCRIPTION' => '"INVALID CODE IN USER SUPPLIED DATA"', 'OPTIONAL CATEGORY' => ' ', 'OPTIONAL CATEGORY DESCRIPTION' => ' ', 'PRODUCT CODE' => ' ' }, '00100' => { CATEGORY => '1 ', 'CATEGORY DESCRIPTION' => 'Cat', 'CODE DESCRIPTION' => '"ORANGE CAT"', 'OPTIONAL CATEGORY' => ' ', 'OPTIONAL CATEGORY DESCRIPTION' => ' ', 'PRODUCT CODE' => '00100' }, 82131 => { CATEGORY => '94 ', 'CATEGORY DESCRIPTION' => 'Dog', 'CODE DESCRIPTION' => '"GREEN DOG"', 'OPTIONAL CATEGORY' => ' ', 'OPTIONAL CATEGORY DESCRIPTION' => ' ', 'PRODUCT CODE' => 82131 }, 82132 => { CATEGORY => '94 ', 'CATEGORY DESCRIPTION' => 'Dog', 'CODE DESCRIPTION' => '"\'JOHNS\' FLYING\' DOG (Start 2001)"', 'OPTIONAL CATEGORY' => ' ', 'OPTIONAL CATEGORY DESCRIPTION' => ' ', 'PRODUCT CODE' => 82132 }, 82133 => { CATEGORY => '94 ', 'CATEGORY DESCRIPTION' => 'Dog', 'CODE DESCRIPTION' => '"MAGENTA DOG (End 2009)"', 'OPTIONAL CATEGORY' => ' ', 'OPTIONAL CATEGORY DESCRIPTION' => ' ', 'PRODUCT CODE' => 82133 } }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: parsing malformed CSV with per column quote chars (loosely)
by tye (Sage) on Aug 07, 2014 at 18:49 UTC | |
by Tux (Canon) on Aug 07, 2014 at 19:23 UTC |