in reply to How can I keep or discard certain blocks of an XML file based on first line of block?
I've added some attribute names and corrected the last tag in your example to get valid XML.
#!perl use strict; use XML::Twig; use Time::Piece; #open my $IN ,'<','origfile.xml' or die "$!"; open my $OUT,'>','newfile.xml' or die "$!"; my $twig = XML::Twig->new( twig_roots => { 'label_a' => \&label_a }, # process label_a blocks twig_print_outside_roots => $OUT, # print rest ); $twig->set_pretty_print('indented'); $twig->parse(\*DATA); # or $IN # process sub label_a { my ($twig,$e) = @_; my $timea = $e->att('timea'); my $id = $e->att('id'); my $t = Time::Piece->strptime($timea,'%Y%m%d%H%M%S %z'); my $hr = $t->hour; my $day = $t->day; my $keep = 0; $keep=1 if (( $id =~ /match this/ ) && ( $hr >=18 && $hr <=20 ) && ( $day =~ /Mon|Tue|Wed/i )); # $keep=1 if ... another condition if ($keep == 1) { $twig->flush($OUT); # save } else { $twig->purge(); # discard print STDOUT $t->strftime." $id $hr $day skipped\n"; } } __DATA__
<?xml version="1.0" encoding="ISO-8859-1"?> <label_x data1="somevalue" data2="someothervalue" data3="anothervalue" +> <label_y y="somevalue"> <label_z>a value</label_z> <label_z>a value</label_z> <label_z>a value</label_z> <label_z>a value</label_z> </label_y> <label_y y="somevalue"> <label_z>a value</label_z> <label_z>a value</label_z> <label_z>a value</label_z> <label_z>a value</label_z> </label_y> <label_a timea="20140623203000 -0400" timeb="20140623210000 -0400" + id="must_match_this"> <label_b b="data">data_of_variable_number_of_lines_and_indenta +tions</label_b> <label_b b="more_data">data_of_variable_number_of_lines_and_in +dentations</label_b> <label_c> <label_d>Some_data_may_be_indented_further</label_d> </label_c> <label_b b="still_more_data">data_of_variable_number_of_lines_ +and_indentations</label_b> </label_a> <label_a timea="20140623210000 -0400" timeb="20140623220000 -0400" + id="must_match_this"> <label_b b="data">data_of_variable_number_of_lines_and_indenta +tions</label_b> <label_b b="more_data">data_of_variable_number_of_lines_and_in +dentations</label_b> <label_c> <label_d>Some_data_may_be_indented_further</label_d> </label_c> <label_b b="still_more_data">data_of_variable_number_of_lines_ +and_indentations</label_b> </label_a> </label_x>
|
|---|