in reply to splitting headache
It's not the best piece of code I've ever written, and I'm not sure if it works in all cases, but maybe it could help you... But it's a lot of code just for nearly nothing :-)#!perl -w use strict; my $file = "anyfile.txt"; my $sep = ';'; unless (open (CSV, $file)){ die "Error: $!\n"; } else { while (<CSV>){ next if $. == 1; # kill headline: dirty my @list = &ExtractFields($_, $sep); print join ":_:", @list; } # while close (CSV); } # else # ------------------------------------------------------------ sub ExtractFields { my ($string, $sep) = @_; my @csv = &FilterIndexList($string, $sep); my $start = 0; my @list = (); foreach my $j (@csv){ my $end = $j-1; # print "$start-$end "; push (@list, substr($string, $start, $end-$start+1)); $start = $j+1; } # foreach # filter leading and trailing " foreach (@list){ s/^\"(.*)\"$/$1/; } # print join("(_|_)", @list); return (@list); } # ExtractFields # ------------------------------------------------------------ sub FilterIndexList { my ($string, $sep) = @_; my @sep = &GetIndexList($string, $sep); my @hc = &GetIndexList($string, '"'); # try to find connected " and remove # the positions within from @sep my $i = 0; foreach (;;){ my ($start) = grep {$_ == $hc[$i]-1 } @sep; if ($start){ $i++; my ($end) = grep {$_ == $hc[$i]+1 } @sep; if ($end){ # print "found at $start-$end: $hc[$i]\n"; # kill positions in @sep within $start and $end @sep = grep { $_ <= $start or $_ >= $end } @sep; $i++; } else { # invalid end; throw away end and start over again splice(@hc, $i, 1); $i--; } } else { # invalid begin; throw away start splice(@hc, $i, 1); } last if $i > $#hc; # exit loop if no more " to test } return (@sep); } # FilterIndexList # ------------------------------------------------------------ # Return a list of incices of positions of $sep in $string sub GetIndexList { my ($string, $subStr) = @_; my @list = (); my $pos = -1; # startposition while (1){ # search for next $subStr $pos = index($string, $subStr, $pos+1); # if startposition again or not found, return last if $pos == -1; # else push found position onto list push (@list, $pos); } return (@list); } # GetIndexList # ------------------------------------------------------------
Best regards,
perl -le "s==*F=e=>y~\*martinF~stronat~=>s~[^\w]~~g=>chop,print"
|
|---|