in reply to Editing HTML files

Here's my go with HTML::TreeBuilder.

The docs discuss inline editing under the $h->contents_refs_list

You may have to adjust depending on how "unpredictable" the HTML is.

#!/usr/local/bin/perl use strict; use warnings; use HTML::TreeBuilder; my $root = HTML::TreeBuilder->new_from_file(*DATA) or die qq{cant build tree\n}; my $noclass = $root->look_down( _tag => q{div}, class => q{noclass}, ); die qq{noclass not found\n} unless $noclass; my $replaced; for my $item_r ($noclass->content_refs_list) { next if ref ${$item_r}; ${$item_r} = lookup_replacement(${$item_r}); $replaced++; } die qq{no replace\n} unless $replaced; my $html = $root->as_HTML(undef, qq{ }, {}); print qq{$html\n}; sub lookup_replacement{ my $lookup = shift; # find replacement return q{something}; } __DATA__ <html> <head> <title>tb_test</title> </head> <body> <div align="center" class="noclass">nothing</div> </body> </html>
output
<html> <head> <title>tb_test</title> </head> <body> <div align="center" class="noclass">something</div> </body> </html>

Replies are listed 'Best First'.
Re^2: Editing HTML files
by spivey49 (Monk) on Jul 10, 2008 at 21:41 UTC
    Thanks wfsp! Works perfectly.
Re^2: Editing HTML files
by spivey49 (Monk) on Jul 14, 2008 at 16:32 UTC

    So this code works perfectly until I come across a blank tag. Any suggestions how to handle a situation where the tag exists, but has no text? Currently when the script comes across this situation it dies instead of inserting the new text. Here's the code with sample html:

    Code:

    #Find the indext.txt config files #and edit the index.htm files one dir up from the config file use warnings; use HTML::TreeBuilder; use File::Find; my $dir = $ARGV[0]; my $html_file; my $cfg_path; my $index_pref; find(\&file_finds, $dir); sub file_finds{ if ($_=/index.htm/){ $html_file = $File::Find::name; $cfg_path = $File::Find::dir."/index/index.txt"; &config; &edit; } } sub config{ open (CF, "$cfg_path") or die ("Can't open $cfg_path $!"); while (<CF>) { chomp; # no newline s/#.*//; # no comments s/^\s+//; # no leading white s/\s+$//; # no trailing white next unless length; # anything left? my ($var, $value) = split(/\s*=\s*/, $_, 2); $index_pref{$var} = $value; } close CF; } sub edit{ my $root = HTML::TreeBuilder->new_from_file($html_file) or die qq{cant build tree\n}; my $class1 = $root->look_down( _tag => q{div}, class => q{class1}, ); die qq{client not found\n$html_file\n} unless $class1; my $class2 = $root->look_down( _tag => q{div}, class => q{class2}, ); die qq{class2 not found\n$html_file\n} unless $class2; my $class3 = $root->look_down( _tag => q{div}, class => q{class3}, ); die qq{class3 not found\n$html_file\n} unless $class3; my $rep_class1; for my $item_r ($class1->content_refs_list) { next if ref ${$item_r}; ${$item_r} = $index_pref{"class1"}; $rep_class1++; } die qq{Class1 not replaced\n$html_file\n} unless $rep_class1; my $rep_class2; for my $item_r ($class2->content_refs_list) { next if ref ${$item_r}; ${$item_r} = $index_pref{"class2"}; $rep_class2++; } die qq{Class2 not replaced\n$html_file\n} unless $rep_class2; my $rep_class3; for my $item_r ($class3->content_refs_list) { next if ref ${$item_r}; ${$item_r} = $index_pref{"class3"}; $rep_class3++; } die qq{Class3 not replaced\n$html_file\n} unless $rep_class3; my $html = $root->as_HTML(undef, qq{ }, {}); open (FH, ">$html_file") or die $!; print FH $html; close FH; }

    HTML: <!--Class 3 has no text--> <div align="center" class="class1">something</div> <div align="center" class="class2">something else</div> <div align="center" class="class3"></div>

      Have a look at $h->splice_content(...).

      The cut down example below inserts new text immediately after the opening div tag (if a text element is not found).

      #!/usr/local/bin/perl use strict; use warnings; use HTML::TreeBuilder; my $html = do{local $/;<DATA>}; my $replace = q{replaced}; my $edited = edit($html, $replace); print $edited; sub edit{ my $html = shift; my $replace = shift; # my $root = HTML::TreeBuilder->new_from_file($html_file) # or die qq{cant build tree\n}; my $root = HTML::TreeBuilder->new_from_content($html) or die qq{cant build tree\n}; my $class3 = $root->look_down( _tag => q{div}, class => q{class3}, ); die qq{class3 not found\n} unless $class3; my $rep_class3; for my $item_r ($class3->content_refs_list) { next if ref ${$item_r}; ${$item_r} = $replace; $rep_class3++; } if (not $rep_class3){ $class3->splice_content(1, 0, $replace); } #die qq{Class3 not replaced\n} unless $rep_class3; my $edited_html = $root->as_HTML(undef, qq{ }, {}); return $edited_html; } __DATA__ <div align="center" class="class1">something</div> <div align="center" class="class2">something else</div> <div align="center" class="class3"></div>
      output
      <html> <head> </head> <body> <div align="center" class="class1">something</div> <div align="center" class="class2">something else</div> <div align="center" class="class3">replaced</div> </body> </html>
        Thanks again wfsp