#!/usr/bin/perl -w use strict; use XML::Twig; use FindBin qw( $RealBin $RealScript); use Getopt::Std; $Getopt::Std::STANDARD_HELP_VERSION=1; use vars qw( $VERSION $USAGE); my $VERSION= "0.01"; $USAGE= "xml_split [-l | -c ] [-b ] [-n ] [-e ] [-d] [-v] [-h] [-m] [-V] \n"; { my $opt={}; getopts('l:c:b:n:e:dvhmV', $opt); $opt->{n} ||= 3; if( $opt->{h}) { die $USAGE, "\n"; } if( $opt->{m}) { exec "pod2text $RealBin/$RealScript"; } if( $opt->{V}) { print "xml_split version $VERSION\n"; exit; } if( $opt->{c}) { die "cannot use --level and --condition at the same time\n" if( $opt->{l}); } else { $opt->{l} ||= 1; $opt->{c}= "level( $opt->{l})"; } my $options= { cond => $opt->{c}, base => $opt->{b}, nb_digits => $opt->{n}, ext => $opt->{e}, verbose => $opt->{v}, no_pi => $opt->{d} }; my $state; $state->{seq_nb}=0; if( !@ARGV) { $options->{base} ||= 'out'; $options->{ext} ||= '.xml'; my $twig_options= twig_options( $options); my $t= XML::Twig->new( %$twig_options); $t->parse( \*STDIN); end_file( $t, $options, $state); } else { foreach my $file (@ARGV) { unless( $options->{base}) { $state->{seq_nb}=0; } my( $base, $ext)= ($file=~ m{^(.*?)(\.\w+)?$}); $options->{base} ||= $base; $options->{ext} ||= $ext || '.xml'; my $twig_options= twig_options( $options, $state); my $t= XML::Twig->new( %$twig_options); $t->parsefile( $file); end_file( $t, $options, $state); } } } sub twig_options { my( $tool_options, $state)= @_; my $twig_options= { keep_encoding => 1, keep_spaces => 1 }; unless( $tool_options->{no_pi}) { my $file_name= file_name( $tool_options, { %$state, seq_nb => 0} ); # main file name warn "generating main file $file_name\n" if( $tool_options->{verbose}); open( my $out, '>', $file_name) or die "cannot create main file '$file_name': $!"; $state->{out}= $out; $twig_options->{twig_print_outside_roots}= $out; $twig_options->{start_tag_handlers}= { $tool_options->{cond} => sub { $_->set_att( '#in_fragment' => 1); } }; } $twig_options->{twig_roots}= { $tool_options->{cond} => sub { dump_elt( @_, $tool_options, $state); } }; return $twig_options; } sub dump_elt { my( $t, $elt, $options, $state)= @_; $state->{seq_nb}++; my $file_name= file_name( $options, $state); warn "generating $file_name\n" if( $options->{verbose}); my $fragment= XML::Twig->new(); $fragment->{twig_xmldecl} = $t->{twig_xmldecl}; $fragment->{twig_doctype} = $t->{twig_doctype}; $fragment->{twig_dtd} = $t->{twig_dtd}; if( !$options->{no_pis}) { my $subdocs= $elt->att( '#has_subdocs') || 0; my $pi= XML::Twig::Elt->new( '#PI') ->set_pi( merge => " subdocs = $subdocs+:$file_name"); $elt->del_att( '#in_fragment'); if( $elt->inherited_att( '#in_fragment')) { $elt->parent( '*[@#in_fragment="1"]')->set_att( '#+has_subdocs' => 1); $pi->replace( $elt); } else { $elt->cut; $pi->print( $state->{out}); } } else { $elt->cut; } $fragment->set_root( $elt); open( my $out, '>', $file_name) or die "cannot create output file '$file_name': $!"; $fragment->print( $out); close $out; } sub end_file { my( $t, $options, $state)= @_; unless( $options->{no_pi}) { close $state->{out}; } } sub file_name { my( $options, $state)= @_; my $nb= sprintf( "%0$options->{nb_digits}d", $state->{seq_nb}); my $file_name= "$options->{base}-$nb$options->{ext}"; return $file_name; } sub HELP_MESSAGE { return $USAGE; } sub VERSION_MESSAGE { return $VERSION; }