To answer my own question regarding merging/expanding elements, here is a snippet on how to do it with sax:
#!/usr/bin/perl
package MyHandler;
use strict;
use warnings;
use base qw(XML::SAX::Base);
use Data::Dumper;
use XML::SAX::ParserFactory;
use XML::SAX::Writer;
sub start_element {
my $self = shift;
my $data = shift;
if( $data->{LocalName} eq 'expand') {
$self->{in_include}++;
my $p = XML::SAX::ParserFactory->parser( Handler => $self
+);
$p->parse_string( "<expanded>This is my expanded tag</expa
+nded>" );
return;
}
#$data->{Attributes} = undef;
$self->SUPER::start_element($data);
#print Dumper($data);
}
sub characters {
my $self = shift;
my $data = shift;
#print "Data is $data->{Data}" if defined $data->{Data};
$self->SUPER::characters($data);
}
sub end_element {
my ($self, $element) = @_;
if ($element->{LocalName} eq "expand") {
$self->{in_include}--;
} else {
$self->SUPER::end_element($element);
}
}
sub start_document { # same for end_document
my($self, $data) = @_;
return if($self->{in_include});
$self->SUPER::start_document($data);
}
sub end_document { # same for end_document
my($self, $data) = @_;
return if($self->{in_include});
$self->SUPER::end_document($data);
}
1;
#============================
#Main programm
#============================
use strict;
use warnings;
use XML::SAX::ParserFactory;
use XML::SAX::Writer;
my $out;
my $o = XML::SAX::Writer->new( Output => \$out );
my $h = MyHandler->new( Handler => $o );
my $p = XML::SAX::ParserFactory->parser(Handler => $h);
my $data;
{ local undef $/ }; $data = <DATA>;
$p->parse_string( $data );
print $out;
__DATA__
<?xml version="1.0" encoding="UTF-8"?>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:wicket="http://wi
+cket.apache.org/dtds.data/wicket-xhtml1.4-strict.dtd">
<body>
<wicket:panel>
<form wicket:id="mvpForm">
<span>Edit Information: </span>
<input type="checkbox" wicket:id="editForm"/>
<span>Name: </span>
<span wicket:id="name"></span>
<input type="text" wicket:id="nameEdit"/>
<span>Last Name: </span>
<span wicket:id="lastName"></span>
<input type="text" wicket:id="lastNameEdit"/>
<span>DOB: </span>
<span wicket:id="dob"></span>
<input type="text" wicket:id="dobEdit"/>
<span>Occupation: </span>
<span wicket:id="occupation"></span>
<input type="text" wicket:id="occupationEdit"/>
<span>Gender: </span>
<span wicket:id="gender"></span>
<span wicket:id="genderEdit"/>
<input type="submit" wicket:id="submit"/>
<expand/>
</form>
</wicket:panel>
</body>
</html>
The `<expand/>` tag will be replaced by `<expanded>This is my expanded tag</expanded>`.
Basically all is needed is to create a new parser and hand it a file/string to be parsed. However, note that there are a couple of gotchas. The first one is to stop propagating the event where you have intercepted the tag to be expanded. In other words don't call $self->SUPER::start/end_element whenever expanding/nesting tags, that will prevent the replaced tag to end up in the output. Second, it's required to intercept start_document/end_document and skip calling the parent for those ones, otherwise the following error will be produced:
>Trying to pop context without push context at /usr/share/perl5/XML/NamespaceSupport.pm line 79, <DATA> chunk 1.
In other words some clean up fails:
>This message is being triggered because XML::NamespaceSupport does some initialisation on a start_document event and some cleanup on an end_document event. The problem is that with your code there will be a pair of these events for the main document and a nested pair for each included document. When the second end_document event occurs, there is nothing to clean up - hence the message.
Taken from here |