# match character data } elsif( $text =~ /(^[^&&>]+)/ ) { print "char data"; my $data = $1; # make sure the data is inside an element return if( $data =~ /\S/ and not( @elements )); $text = $'; #### $rc=is_well_formed (" self Don't forget to mow the car and wash the lawn. "); $report = ( $rc ) ? "okay" : "bad"; print "$report\n"; sub is_well_formed { my $text = shift; # XML text to check print "text to check:\n$text\n"; # match patterns my $ident = '[:_A-Za-z][:A-Za-z0-9\._-]*'; # identifier my $optsp = '\s*'; # optional space my $att1 = "$ident$optsp=$optsp\"[^\"]*\""; # attribute my $att2 = "$ident$optsp=$optsp'[^']*'"; # attr. variant my $att = "($att1|$att2)"; # any attribute my @elements = ( ); # stack of open elems # loop through the string to pull out XML markup objects while( length($text) ) { # match an empty element if( $text =~ /^<($ident)(\s+$att)*\s*\/>/ ) { $text = $'; # match an element start tag } elsif( $text =~ /^<($ident)(\s+$att)*\s*>/ ) { push( @elements, $1 ); $text = $'; # match an element end tag } elsif( $text =~ /^<\/($ident)\s*>/ ) { if ( $1 ne pop @elements ) { warn "close tag $1 lacks open tag\n"; return; } $text = $'; # match a comment } elsif( $text =~ /^/ ) { $text = $'; if( $` =~ /--/ ) { warn "comment can't contain '--'\n"; return; } } else { warn "no end-comment fount after start-comment\n"; return; } # match extra whitespace # (in case there is space outside the root element) } elsif( $text =~ m|^\s+| ) { $text = $'; # match character data } elsif( $text =~ /(^[^<&>]+)/ ) { my $data = $1; $text = $'; # make sure the data is inside an element if( $data =~ /\S/ and not( @elements )) { warn "data found outside root element\n"; return; } # match entity reference } elsif( $text =~ /^&$ident;+/ ) { $text = $'; # something unexpected } else { warn "something unexpected\n"; return; } } if( @elements ) { warn "elements not closed at end of stream: @elements\n"; return; } return 1; }