#!/bin/perl -w
use strict;
use XML::Parser;
# global, but could be attached to the parser or passed to the handlers
my @results;
# we need those to hold info about the parsing
my( $elt_id, $child_seen, $in_child, $child_text);
my $p= new XML::Parser( Handlers =>
{ Start => \&start, # called when a start tag is found
End => \&end, # called when an end tag is found
Char => \&char, # called when characters are found
},
);
$p->parse( \*DATA); # use parsefile to parse... a file
print join "\n", @results;
print "\n";
sub start
{ my( $p, $elt, %atts)= @_; # set by XML::Parser::Expat
if( $elt eq 'elt') # we found an elt start tag
{ $child_seen= 0; # reset the flag, no child found yet
$elt_id= $atts{id}; # store it in case we need it
}
elsif( $elt eq 'child') # found a child start tag
{ $child_seen= 1; # we've seen a child
$in_child= 1; # we are in the child
$child_text= ''; # reset the child text
}
}
sub end
{ my( $p, $elt)= @_;
if( $elt eq 'elt') # found and elt end tag
{ if( $child_seen)
{ push @results, $child_text; }
else
{ push @results, "missing child for elt $elt_id"; }
}
elsif( $elt eq 'child') # found a child end tag
{ $in_child= 0; } # Toto, I guess we are not in the child any more
}
sub char # called for all non mark-up text
{ my( $p, $string)= @_;
$child_text .= $string if( $in_child); # see the docs for why you can't
} # just write $child_text = $string
__DATA__
I am a
child 1
child 2
####
#!/bin/perl -w
use strict;
use XML::Twig;
my @results; # does not have to be global, it's just easier
# create the twig see the docs for why to use TwigRoots
my $t= new XML::Twig( TwigRoots => { elt => \&check_elt }); # call check_elt every time an element elt is parsed
$t->parse( \*DATA); # parse the XML (use parsefile to parse... a file)
print join "\n", @results;
print "\n";
sub check_elt
{ my( $t, $elt)= @_;) # $t is the XML::Twig object
# $elt is an XML::Twig::Elt object
if( my $child= $elt->first_child( 'child')) # that's how you navigate the element
{ push @results, $child->text; } # text includes sub elements of child
else
{ push @results, "missing child for elt " . $elt->att( 'id'); }
$t->purge; # call only if your document is huge
} # to free the memory
__DATA__
I am a
child 1
child 2
####
#!/bin/perl -n -w
use strict;
# global, but could be attached to the parser or passed to the handlers
use vars qw( @results);
# we need those to hold info about the parsing
# @in_element is a stack of open elements,
# the current element is $in_element[-1]
use vars qw( @in_element $elt_id $child_seen $child_text);
if(m/^\((.*)$/) # element start tag (tag
{ push @in_element, $1;
if( $1 eq 'elt') # elt start tag
{ $child_seen= 0; } # reset the flag
elsif( $1 eq 'child') # child start tag
{ $child_seen= 1; # set the flag
$child_text= ''; # reset the text
}
}
elsif( m/^A([^\s]*) (.*)$/) # attribute Aatt value
{ # store the id for elt elements
$elt_id= $2 if( ($in_element[-1] eq 'elt') && ($1 eq 'id'));
}
elsif( m/^-(.*)\n/) # text -text
{ $child_text.= $1 if( $in_element[-1] eq 'child'); }
elsif( m/\)(.*)$/) # end tag )tag
{ if( $1 eq 'elt')
{ if( $child_seen)
{ push @results, $child_text; }
else
{ push @results, "missing child for elt $elt_id"; }
}
}
END
{ print join "\n", @results;
print "\n";
}
####
#!/bin/perl -w
use strict;
use XML::XPath;
my @results;
# create the xpath object from the DATA filehandle
my $xp = XML::XPath->new( ioref => \*DATA);
# find all paragraphs
my $elts = $xp->find('/doc/elt');
foreach my $elt ($elts->get_nodelist)
{ # there is probably a more elegant way to get the child children
# but I don't know XML::XPath enough
my $children= $elt->getChildNodes; # get all children
my @children= grep { $_->getName eq 'child' } @$children; # grep only the relevant ones
if( @children)
{ push @results, $children[0]->string_value; } # that's how you get the text
else
{ push @results, "missing child for elt " . $elt->getAttribute( 'id'); }
}
print join "\n", @results;
print "\n";
__DATA__
I am a
child 1
child 2