result = ''; my $re = qr{ \( (?: (?>[^()]+) | (??{ $re }) )* \) | (?>[^()]+) }x; while() { print parse($_) . "\n\n\n"; } sub parse { my $line = shift; my $op = ''; my $result; while($line =~ m/\G($re)/g){ my $part = $1; if ($part =~ m/\((.*)\)/) { # previously greedy, non working! $result .= " ( "; $result .= parse($1); # did not recures on prior version $result .= ' ) '; } else { my ($op, $w1, $w2) = matchPart($part); $result .= " ". $op . $w1; $result .= " " . $op . $w2 if $w2; } } return $result; } sub matchPart { my $part = shift; if ($part =~ m/(-?[\w]*)\s+(and|or)\s+(-?\w+)/i) { my $op = $2 eq 'and' ? '+' : $2 eq 'not' ? '-': ''; return ($op, $1, $3); } return ('', '',''); } __DATA__ (word1 or word2) and (word3 or word4) word1 or word2 word2 and word3 (word1 and word2) or word3 (word1 or word2) and (word3 or word4 ((word1 or word2) and (word3 or word4)) __END__ ( word1 word2 ) ( word3 word4 ) word1 word2 +word2 +word3 ( +word1 +word2 ) word3 ( word1 word2 ) ( ( word1 word2 ) ( word3 word4 ) )