comment on

Here is how that might look with Regexp::Grammars

#!/usr/bin/perl --

use strict;
use warnings;

my $s = q[dogs OR cats OR "flying fish" OR (shrimp AND squid)];

my $parser = do {
    use Regexp::Grammars;
    qr{
# <logfile: - >
<[TERM]>*
<rule: TERM>  <OP> | <MATCH=IDENT> | <MATCH=STRING> | <LIST>
<rule: STRING> "([^"]+?)"
<rule: OP> AND|OR
<rule: IDENT> \w+
<rule: LIST> \( <[TERM]>* \)
    }xs
};

if($s =~ $parser){
    my(%rash) = %/;#bah for scite lexer /#
    undef %/;# bah for scite lexer /#

    use Data::Dumper();
    print Data::Dumper->new([\%rash])->Indent(1)->Useqq(1)->Dump,"\n";
    
    kek(\%rash); # kill empty key
    print Data::Dumper->new([\%rash])->Indent(1)->Useqq(1)->Dump,"\n";
    my $rash = reorder_terms(\%rash); # consumes %rash
    print Data::Dumper->new([$rash])->Indent(1)->Useqq(1)->Dump,"\n";
}

sub reorder_terms {
    my( $ref ) = @_;
    if( $$ref{TERM}){
        my @term;
        my @op;
        for my $t( @{$$ref{TERM}} ){
            if( ref $t ){
                if( $$t{OP} ){
                    push @op, delete $$t{OP};
                }elsif( $$t{LIST} ){
                    push @term, reorder_terms(delete $$t{LIST} );
                }else{
                    die "uh oh, no OP or LIST key";
                }
            } else {
                push @term, $t;
            }
        }
        undef %$ref;
        #return [@op, @term ];
        return [$op[0], @term ];
    }

    die "uh oh, no TERM key";
}


sub kek {
    my ($ref) = @_;
    my $typ = ref $ref;

    if( $typ eq 'HASH'){
        delete $$ref{""};
        for my $val( values %$ref){
            ref $val and kek($val);
        }
    }
    if( $typ eq 'ARRAY'){
        for my $val( @$ref){
            ref $val and kek($val);
        }
    }
    return;
}

__END__
$VAR1 = {
  "" => "dogs OR cats OR \"flying fish\" OR (shrimp AND squid)",
  "TERM" => [
    "dogs",
    {
      "" => " OR",
      "OP" => "OR"
    },
    "cats",
    {
      "" => " OR",
      "OP" => "OR"
    },
    "\"flying fish\"",
    {
      "" => " OR",
      "OP" => "OR"
    },
    {
      "" => " (shrimp AND squid)",
      "LIST" => {
        "" => "(shrimp AND squid)",
        "TERM" => [
          "shrimp",
          {
            "" => " AND",
            "OP" => "AND"
          },
          "squid"
        ]
      }
    }
  ]
};

$VAR1 = {
  "TERM" => [
    "dogs",
    {
      "OP" => "OR"
    },
    "cats",
    {
      "OP" => "OR"
    },
    "\"flying fish\"",
    {
      "OP" => "OR"
    },
    {
      "LIST" => {
        "TERM" => [
          "shrimp",
          {
            "OP" => "AND"
          },
          "squid"
        ]
      }
    }
  ]
};

$VAR1 = [
  "OR",
  "dogs",
  "cats",
  "\"flying fish\"",
  [
    "AND",
    "shrimp",
    "squid"
  ]
];
[download]

Uncomment <logfile: - > for some debug. See also KinoSearch::Docs::Cookbook::CustomQueryParser, Text::Query.

In reply to Re^2: Break string into array by Anonymous Monk
in thread Break string into array by Anonymous Monk

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.