#!/usr/bin/perl use HTML::TreeBuilder::XPath; use warnings; use strict; &readfile; exit(0); sub readfile { my ($file)= (@_); my $xhtml = HTML::TreeBuilder::XPath->new; $xhtml->implicit_tags(1); $xhtml->no_space_compacting(1); $xhtml->parse_file(\*DATA) or die(); # find double-spaced paragraphs inside blockquotes and expand them for my $p ($xhtml->findnodes('//blockquote/p')) { my $text = $p->as_text(); $text =~ s/^\s+//; $text =~ s/\s+$//; next unless($text =~/\n\s*\n\s*/); my @paragraphs = split(/\s*\n\s*/, $text); print qq(\t\@paragraphs=),join(',',@paragraphs),qq(\n); if ($#paragraphs >= 0) { my $pp = shift(@paragraphs); print qq(\t\tpp1=$pp\n); $p->replace_with_content(); $p->push_content(['p',,$pp]); print qq(Identified :\n); print qq(«),$p->as_XML_indented,qq(»\n); foreach $pp (@paragraphs) { print qq(\t\tpp2=$pp\n); $p->postinsert(['p',,$pp]); } } } print qq(\n),qq(-)x30,qq(\n); my ($body) = $xhtml->findnodes('//body'); print qq(\n); print $body->as_XML_indented; $xhtml->delete; return (1); } __DATA__
aaa bbb ccc
ddd eee fff
ggg
hhh
iii
jjj