#!/bin/perl -w use strict; use XML::Parser; # global, but could be attached to the parser or passed to the handlers my @results; # we need those to hold info about the parsing my( $elt_id, $child_seen, $in_child, $child_text); my $p= new XML::Parser( Handlers => { Start => \&start, # called when a start tag is found End => \&end, # called when an end tag is found Char => \&char, # called when characters are found }, ); $p->parse( \*DATA); # use parsefile to parse... a file print join "\n", @results; print "\n"; sub start { my( $p, $elt, %atts)= @_; # set by XML::Parser::Expat if( $elt eq 'elt') # we found an elt start tag { $child_seen= 0; # reset the flag, no child found yet $elt_id= $atts{id}; # store it in case we need it } elsif( $elt eq 'child') # found a child start tag { $child_seen= 1; # we've seen a child $in_child= 1; # we are in the child $child_text= ''; # reset the child text } } sub end { my( $p, $elt)= @_; if( $elt eq 'elt') # found and elt end tag { if( $child_seen) { push @results, $child_text; } else { push @results, "missing child for elt $elt_id"; } } elsif( $elt eq 'child') # found a child end tag { $in_child= 0; } # Toto, I guess we are not in the child any more } sub char # called for all non mark-up text { my( $p, $string)= @_; $child_text .= $string if( $in_child); # see the docs for why you can't } # just write $child_text = $string __DATA__ I am a child 1 child 2 #### #!/bin/perl -w use strict; use XML::Twig; my @results; # does not have to be global, it's just easier # create the twig see the docs for why to use TwigRoots my $t= new XML::Twig( TwigRoots => { elt => \&check_elt }); # call check_elt every time an element elt is parsed $t->parse( \*DATA); # parse the XML (use parsefile to parse... a file) print join "\n", @results; print "\n"; sub check_elt { my( $t, $elt)= @_;) # $t is the XML::Twig object # $elt is an XML::Twig::Elt object if( my $child= $elt->first_child( 'child')) # that's how you navigate the element { push @results, $child->text; } # text includes sub elements of child else { push @results, "missing child for elt " . $elt->att( 'id'); } $t->purge; # call only if your document is huge } # to free the memory __DATA__ I am a child 1 child 2 #### #!/bin/perl -n -w use strict; # global, but could be attached to the parser or passed to the handlers use vars qw( @results); # we need those to hold info about the parsing # @in_element is a stack of open elements, # the current element is $in_element[-1] use vars qw( @in_element $elt_id $child_seen $child_text); if(m/^\((.*)$/) # element start tag (tag { push @in_element, $1; if( $1 eq 'elt') # elt start tag { $child_seen= 0; } # reset the flag elsif( $1 eq 'child') # child start tag { $child_seen= 1; # set the flag $child_text= ''; # reset the text } } elsif( m/^A([^\s]*) (.*)$/) # attribute Aatt value { # store the id for elt elements $elt_id= $2 if( ($in_element[-1] eq 'elt') && ($1 eq 'id')); } elsif( m/^-(.*)\n/) # text -text { $child_text.= $1 if( $in_element[-1] eq 'child'); } elsif( m/\)(.*)$/) # end tag )tag { if( $1 eq 'elt') { if( $child_seen) { push @results, $child_text; } else { push @results, "missing child for elt $elt_id"; } } } END { print join "\n", @results; print "\n"; } #### #!/bin/perl -w use strict; use XML::XPath; my @results; # create the xpath object from the DATA filehandle my $xp = XML::XPath->new( ioref => \*DATA); # find all paragraphs my $elts = $xp->find('/doc/elt'); foreach my $elt ($elts->get_nodelist) { # there is probably a more elegant way to get the child children # but I don't know XML::XPath enough my $children= $elt->getChildNodes; # get all children my @children= grep { $_->getName eq 'child' } @$children; # grep only the relevant ones if( @children) { push @results, $children[0]->string_value; } # that's how you get the text else { push @results, "missing child for elt " . $elt->getAttribute( 'id'); } } print join "\n", @results; print "\n"; __DATA__ I am a child 1 child 2