use strict; use warnings; use diagnostics; use HTML::TreeBuilder; use HTML::Entities; use HTML::Element; sub traverse ; foreach my $file_name (@ARGV) { my $tree = HTML::TreeBuilder->new ; $tree->parse_file($file_name); $tree->dump ; print "\n\nWhere would you like to put the output file? " ; my $output = ; open OUTPUT_FILE, "> $output" or die $! ; select OUTPUT_FILE ; traverse ($tree); $tree = $tree->delete ; close OUTPUT_FILE or die $!; } sub traverse { foreach (@_) { if ($_) { if (ref $_) { print STDERR $_->tag(), "\n\n" ; if ($_->tag() ne "head" && $_->tag() ne "script" && $_->tag() ne "img" && $_->tag() ne "object" && $_->tag() ne "applet") { my @contents = $_->content_list() ; traverse (@contents) ; } if (!$_->parent) { my $s = $_->as_HTML ("",{}) ; $s =~ s/>\n/>/g ; $_ =~ s/'em\s/’em /g ; $_ =~ s/'tis\s/’tis /g ; $_ =~ s/'twas\s/’twas /g ; $_ =~ s/'Twas\s/’Twas /g ; $_ =~ s/'Tis\s/’Tis / ; $_ =~ s/'\s/’ /g ; $_ =~ s/^'/‘/g ; $_ =~ s/(\s)'/$1‘/g ; $_ =~ s/"'/“lsquo;/g ; $_ =~ s/'"/’”/g ; $_ =~ s/\s"/ “/g ; $_ =~ s/^'/‘/g ; $_ =~ s/^"/“/g ; $_ =~ s/"\s/” /g ; $_ =~ s/'$/’/g ; $_ =~ s/"$/”/g ; $_ =~ s/(,|\.)'/$1’/g ; $_ =~ s/(,|\.)"/$1”/g ; $_ =~ s/(\S)'(\S)/$1’$2/g ; print STDERR ($_ , "\n\n"); } } } } #### foreach my $file_name (@ARGV) { my $tree = HTML::TreeBuilder->new ; $tree->parse_file($file_name); print "\n\nWhere would you like to put the output file for $file_name? " ; my $output = ; open OUTPUT_FILE, "> $output" or die $! ; $tree = traverse ($tree); print OUTPUT_FILE $tree->as_HTML (""," ",{}) ; $tree = $tree->delete ; close OUTPUT_FILE or die $!; } #### sub traverse ($) { my $element = $_[0] ; if ($element) { if (ref $element) { print $element->tag(), "\n\n" ; if (go_ahead ($element)) { for my $child ($element->content_list()) { $child = traverse ($child) ; } } } else { print "Processing a string element...\n" ; $element = curly_quotes ($element) ; print ($element , "\n\n"); } } return $element ; } #### C:\ ... \Programs\HTMLify>htmlify3.pl test.html Where would you like to put the output file for test.html? output.txt html head body h1 Processing a string element... Testing...’ p Processing a string element... “lsquo;I’d rather ne’er have been here,’she said,” I said. “So what?” I ‘wondered.’ pre p Processing a string element... Jack & Jill went < the hill to > a pail of water. & <> p Processing a string element... “I’m really happy!” #### HTMLify - Convert text to HTML paragraphs

Testing...'

"'I'd rather ne'er have been here,'she said," I said. "So what?" I 'wondered.'

Jack & Jill went < the hill to > a pail of water. & <>

"I'm really happy!"

##
## sub traverse ($) { my $element = $_[0] ; if ($element) { if (ref $element) { print $element->tag(), "\n\n" ; if (go_ahead ($element)) { for my $child ($element->content_list()) { $child = traverse ($child) ; } } } else { print "Processing a string element...\n" ; ${$element} = curly_quotes (${element}) ; print ($element , "\n\n"); } } return $element ; } #### { my @contents = $_->content_list() ; print STDERR "before: @contents\n"; traverse (@contents) ; print STDERR "after: @contents\n"; $_->detach_content(); $_->push_content(@contents); } #### sub traverse { for my $element (@_) { if (Scalar::Util::blessed ($element)) { if (go_ahead($element)) { my @contents = $element->content_list() ; print "Before: ", @contents, "\n\n" ; traverse(@contents) ; print "After: ", @contents, "\n\n" ; $element->detach_content() ; $element->push_content (@contents) ; } } else { print "Processing a string: " ; $element = curly_quotes($element) ; print $element, "\n\n" ; } } } #### sub traverse { my $element = $_[0] ; if (Scalar::Util::blessed ($element)) { if (go_ahead($element)) { my @contents = $element->content_list() ; print "Before: ", @contents, "\n\n" ; for my $child (@contents) { traverse ($child) ; print "Middle: ", $child, "\n\n" ; } print "After: ", @contents, "\n\n" ; } } else { print "Processing a string: " ; $element = curly_quotes($element) ; print $element, "\n\n" ; } } #### sub traverse { my $element = $_[0] ; if (Scalar::Util::blessed (${element})) { if (go_ahead(${element})) { my @contents = ${element}->content_refs_list() ; print "Before: ", @{contents}, "\n\n" ; for my $child (@contents) { traverse (${${child}}) ; print "Middle: ", ${${child}}, "\n\n" ; } print "After: ", @{contents}, "\n\n" ; } } else { print "Processing a string: " ; ${element} = curly_quotes(${element}) ; print ${element}, "\n\n" ; } }