After xml_split here is xml_merge. The tool is quite simple, just feed it the foo-00.xml file created by xml_split and it will happily output the entire file.
Note that, just like xml_split, it respects the encoding and indentation of the original XML file.
#!/usr/bin/perl -w # $Id: xml_merge,v 1.2 2005/02/10 11:45:07 mrodrigu Exp $ use strict; use XML::Twig; use FindBin qw( $RealBin $RealScript); use Getopt::Std; $Getopt::Std::STANDARD_HELP_VERSION=1; # to stop processing after --he +lp or --version use vars qw( $VERSION $USAGE); $VERSION= "0.01"; $USAGE= "xml_merge [-o <output_file>] [-v] [-h] [-m] [-V] [file]\n"; { # main block my $opt={}; getopts('vhmV', $opt); if( $opt->{h}) { die $USAGE, "\n"; } if( $opt->{m}) { exec "pod2text $RealBin/$RealScript"; } if( $opt->{V}) { print "xml_merge version $VERSION\n"; exit; } if( $opt->{o}) { open( my $out, '>', $opt->{0}) or die "cannot create $opt->{o}: $! +"; $opt->{fh}= $out; # used to set twig_print_outside_roots } else { $opt->{fh}= 1; } # this way twig_print_outside_roots outputs to S +TDOUT $opt->{subdocs} = 1; $opt->{file} = $ARGV[0]; merge( $opt); if( $opt->{v}) { warn "done\n"; } } sub merge { my( $opt)= @_; my $t= XML::Twig->new( keep_encoding => 1, keep_spaces => 1, twig_roots => { '?merge' => sub { $opt= par +se( $_->data, $opt); if( $opt- +>{subdocs}) { merge( $opt); } else + { spit( $opt); } } }, twig_print_outside_roots => $opt->{fh}, ); if( $opt->{v} && $opt->{file}) { warn "merging $opt->{file} (parsi +ng)\n"; } if( $opt->{file}) { $t->parsefile( $opt->{file}); } else { $t->par +se( \*STDIN); } } sub spit { my( $opt)= @_; if( $opt->{v} && $opt->{file}) { warn "merging $opt->{file} (no pa +rsing)\n"; } open( my $in, '<', $opt->{file}) or die "cannot open sub document +'$opt->{file}': $!"; while( <$in>) { if( $opt->{o}) { print {$opt->{fh}} $_; } else { print $_; } } close $in; } # data is the pi data, # (ugly) format is keyword1 = val1 : keyword2 = val2 ... : filename # ex: subdoc = 1 : file-01.xml sub parse { my( $data, $opt)= @_; while( $data=~ s{^\s*(\S+)\s*=\s*(\S+)\s*:\s*}{}) { $opt->{$1}= $2 +; } $opt->{file}= $data; return $opt; } # for Getop::Std sub HELP_MESSAGE { return $USAGE; } sub VERSION_MESSAGE { return $VERSION; } __END__ =head1 NAME xml_merge - merge back XML files split with C<xml_split> =head1 DESCRIPTION C<xml_merge> takes several xml files that have been split using C<xml_split> and recreates a single file. =head1 OPTIONS =over 4 =item -o <output_file> unless this option is used the program output goes to STDOUT =item -v verbose output =item -V outputs version and exit =item -h short help =item -m man (requires pod2text to be in the path) =back =head1 EXAMPLES xml_merge foo-00.xml # output to stdout xml_merge -o foo.xml foo-00.xml # output to foo.xml =head1 SEE ALSO XML::Twig, xml_split =head1 TODO/BUGS =head1 AUTHOR Michel Rodriguez <mirod@cpan.org> =head1 LICENSE This tool is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: xml_merge
by gellyfish (Monsignor) on Feb 10, 2005 at 15:05 UTC | |
by mirod (Canon) on Feb 10, 2005 at 15:09 UTC | |
|
Re: xml_merge
by morfeas (Novice) on Feb 11, 2005 at 10:40 UTC | |
by mirod (Canon) on Feb 11, 2005 at 11:05 UTC | |
by morfeas (Novice) on Feb 11, 2005 at 11:34 UTC | |
by mirod (Canon) on Feb 11, 2005 at 12:07 UTC | |
by morfeas (Novice) on Feb 11, 2005 at 12:54 UTC | |
|