#!/usr/bin/perl use strict; use warnings; use XML::LibXML; my $xmlstring; { local $/; $xmlstring = ; } my $xml = XML::LibXML->new; my $doc = $xml->parse_string( $xmlstring ); # you can do $xml->parse_file( "filename" ) instead for my $font_node ( $doc->findnodes( "//font" )) { my %attr_val; my $matched_text_nodes = 1; my $matched_content; my @text_nodes = $font_node->findnodes( "./text" ); for my $tnode ( @text_nodes ) { my @atts = $tnode->attributes; my $y_indx = grep { $atts[$_]->nodeName eq 'y' } 0 .. $#atts; my $p_indx = grep { $atts[$_]->nodeName eq 'page' } 0 .. $#atts; if ( ! keys %attr_val ) { # first text_node $attr_val{y} = $atts[$y_indx]->textContent; $attr_val{p} = $atts[$p_indx]->textContent; } elsif ( $attr_val{y} ne $atts[$y_indx]->textContent or $attr_val{p} ne $atts[$p_indx]->textContent ) { $matched_text_nodes = 0; } if ( $matched_text_nodes ) { $matched_content .= $tnode->textContent . " "; } } if ( $matched_text_nodes ) { $text_nodes[0]->firstChild->setData( $matched_content ); $font_node->removeChild( $_ ) for ( @text_nodes[1..$#text_nodes] ); } } print $doc->toString; __DATA__ Part I Introduction 2 Chapter 1 Managers and Management 2 History Module Part II Planning 56