Just for fun :)
It only takes one s/// (with a little extra state)
#!/usr/bin/perl use strict; # https://perlmonks.org/?node_id=11144309 use warnings; my $str = 'word1 <tag0> word2 <tag1>word3 word4</tag1> word5 </tag0> w +ord6 <tag2>word7 word8</tag2>word9 <tag3>word10</tag3> word11'; print parsestringwithtags( $str, 'tag0', 'tag1' ), "\n"; print parsestringwithtags( $str, 'tag3' ), "\n"; print parsestringwithtags( $str, 'tag1', 'tag2', 'tag3' ), "\n"; sub parsestringwithtags { local $_ = shift; my %tags = map { $_, 1 } @_; my $active = 1; my @state = ['', 1]; s{ <(/?)(\w+)> | ([^<>]+) | ([<>]) }{ $4 ? die "rogue angle bracket $4 at $-[4]" : $2 # tag ? $1 # it is an end tag ? do { $state[-1][0] eq $2 # matches current tag ? do { $active = (pop @state)->[1]; '' } : die "mismatched tags $state[-1][0] vs $2" } : do { push @state, [$2, $active]; $tags{$2} or $active = 0; + '' } : $3 x $active # non-tag only if active }gex; @state > 1 and die "missing close tag for $state[-1][0]"; return $_; }
Outputs:
word1 word2 word3 word4 word5 word6 word9 word11 word1 word6 word9 word10 word11 word1 word6 word7 word8word9 word10 word11
In reply to Re: Parsing string with tags
by tybalt89
in thread Parsing string with tags
by Dirk80
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |