in reply to split on commas

Considering you're constantly adding more requirements, it's my opinion that it's time to move to a full-fledged parser.

#!/usr/bin/perl # make_parser.pl use strict; use warnings; use Parse::RecDescent qw( ); my $grammar = <<'__END_OF_GRAMMAR__'; { use strict; use warnings; } parse : <skip:''> item_list /\Z/ { $item[2] } item_list : <leftop: item ',' item> { $item[1] } item : prefix bodies { [ $item[1], $item[2] ] } | bodies { [ undef, $item[1] ] } bodies : group { $item[1] } | body { [ $item[1] ] } prefix : '<*' INTEGER '>' { $item[2] } group : '(' <leftop: body ',' body> ')' { $item[2] } body : NAME suffix(?) { [ $item[1], @{ $item[2] } ] } suffix : '<' suffix_list '>' { $item[2] } suffix_list : INTEGER suffix_list_[ $item[1] ] suffix_list_ : ',' <leftop: INTEGER ',' INTEGER> { [ ',' => $arg[0], @{ $item[2] } ] } | ':' <leftop: INTEGER ':' INTEGER> { [ ':' => $arg[0], @{ $item[2] } ] } | { [ '' => $arg[0] ] } INTEGER : /0|[1-9][0-9]*/ NAME : /[A-Za-z][A-Za-z0-9_]*/ __END_OF_GRAMMAR__ Parse::RecDescent->Precompile($grammar, 'Grammar') or die("Bad grammar\n");
#!/usr/bin/perl # test.pl use strict; use warnings; use Data::Dumper qw( Dumper ); use Grammar qw( ); #$::RD_TRACE = ''; # ----------vvv Example of what you can do vvv---------- sub deparse_suffix { my ($suffix) = @_; return '' if !defined($suffix); my ($sep, @rest) = @$suffix; return '<' . join($sep, @rest) . '>'; } sub deparse_body { my ($body) = @_; my ($name, $suffix) = @$body; return $name . deparse_suffix($suffix); } sub deparse_bodies { my ($bodies) = @_; my $deparsed = join ',', map deparse_body($_), @$bodies; return "($deparsed)" if @$bodies > 1; return $deparsed; } sub deparse_item { my ($item) = @_; my ($prefix, $bodies) = @$_; my $deparsed = ''; $deparsed .= "<*$prefix>" if defined($prefix); $deparsed .= deparse_bodies($bodies); return $deparsed; } sub deparse_items { my ($items) = @_; return join ',', map deparse_item($_), @$items; } # ----------^^^ Example of what you can do ^^^---------- my $parser = Grammar->new(); while (<DATA>) { chomp; my $items = $parser->parse($_) or do { warn("Bad data at line $.\n"); next; }; print("in: $_\n"); #print Dumper $items; print("out: ", deparse_items($items), "\n"); print("\n"); } __DATA__ <*2>FOO<2,1>,<*3>(SigB<8:0:2>,BAR),<*2>Siga<2:0>,Sigb<8,7,6,5,0>
$ perl make_parser.pl && perl test.pl in: <*2>FOO<2,1>,<*3>(SigB<8:0:2>,BAR),<*2>Siga<2:0>,Sigb<8,7,6,5,0> out: <*2>FOO<2,1>,<*3>(SigB<8:0:2>,BAR),<*2>Siga<2:0>,Sigb<8,7,6,5,0>

You didn't specify what data you need from the line, so the parser returns everything. It could be simplified if your requirements are more specific.

Replies are listed 'Best First'.
Re^2: split on commas
by pip9ball (Acolyte) on Jun 05, 2009 at 22:08 UTC
    Wow, thanks for your in depth reply! I can see how this can be very powerful but I'm afraid I don't understand
    the ParseRec module well enough to expand on the grammar.

    I'll see if I can read up on the module :-)

    Thanks again!
      You still didn't specify what data you need from the line. If you really just want to split on the commans, you can use Text::Balanced.
      use strict; use warnings; use Text::Balanced qw( extract_bracketed extract_multiple ); while (<DATA>) { chomp; print("\n") if $. != 1; my @extracted = extract_multiple($_, [ ',', \&extract_bracketed, ]); my @pieces; push @pieces, '' if @extracted; for (@extracted) { if ($_ eq ',') { push @pieces, ''; } else { $pieces[-1] .= $_; } } print("$_\n") for @pieces; } __DATA__ <*2>FOO<2,1>,<*3>(SigB<8:0:2>,BAR),<*2>Siga<2:0>,Sigb<8,7,6,5,0>
        Sorry, I apologize I didn't see this question. Yes, all I want is each token back.
        This solution seems to be working

        Thanks!