(.*) # what's between tags, even newline #### #!/usr/bin/env perl5.10.0 use strict; use warnings; use re 'eval'; use Test::More qw(no_plan); use 5.010; use Data::Dumper; my $xml; my $nested_tags; my $cdata = qr{ (?> [^<>&"]+ # any amount of "normal" text | \&\w+; # named chars | \&\#\d+; # numbered codepoints )}x; $xml = qr{ (?> (??{$nested_tags}) | $cdata)+ }x; #$xml = qr/ $single_xml+/; my $name = qr{ (?>\w+(?: [:-]\w+)*) }x; my $attribute = qr{ (?>$name="$cdata*+") }x; { $nested_tags = qr{ (? < ($name) # (?{print "after <$^N: \n"}) # (?{print "match: [$&] (\$2:$2) \n"}) (?>\s+$attribute)*\s* (?: /\s*> # either an empty tag end ... | > # or end-of-tag and (?> (?&nested_tags) | $cdata)*+ # arbitrary XML # and a closing tag containing # the current name ) ) }x; } like "foo bar baz", qr/^$cdata$/, "cdata"; unlike "", qr/^$cdata$/, "cdata"; like 'blerk="foo"', qr/^$attribute$/, "simple attribute"; unlike 'blerk=bar', qr/^$attribute$/, "non-quoted attribute"; like '', qr/^$nested_tags$/, "single, empty XML tag"; unlike '', qr/^$nested_tags$/, "single, non-empty XML tag"; like '', qr/^$nested_tags$/, "single, closed XML tag"; like '', qr/^$nested_tags$/, "nested tags 1"; like 'foo', qr/^$nested_tags$/, "nested tags 2"; unlike '',qr/^$nested_tags$/, "nested tags 3"; like '', qr/^$nested_tags$/, "nested tags 4"; like '', qr/^$xml+$/, 'multiple nested tags'; unlike '', qr/^$xml+$/, "wrongly nested tags"; like 'foo', qr/^$xml+$/, "nested tags with cdata"; like '', qr/^$nested_tags$/, 'Tag with attribute'; like 'foo äblubb', qr/^$nested_tags$/, 'Tags with named entities'; unlike 'foo äblubb', qr/^$nested_tags$/, 'Tags with malformed named entities'; #print Dumper \@names;