svenXY has asked for the wisdom of the Perl Monks concerning the following question:
#!/usr/bin/perl -w use strict; use HTML::TreeBuilder; use HTML::PrettyPrinter; use Data::Dumper; my $html_code = ' <html> <head> <title>Glossary</title> <h1>Glossary</h1> <dl> <dt><b>E Definition</b></dt> <dd>E - data</dd> <p></p> <dt><b>B Definition</b></dt> <dd>B - data</dd> <p></p> <dt><b>A_definition</b></dt> <dd>A data.</dd> <p></p> <dt><b>C definition</b></dt> <dd>C - data</dd> <p></p> </dl> </body> </html> '; my %glossar; my $tree = HTML::TreeBuilder->new; $tree->parse($html_code); my ($dl) = $tree->look_down('_tag', 'dl'); my %data; # looping trough the dt tags, # spawning a hash with the text of dt as key # and the HTML of dt and dd as values for my $dt ($dl->look_down("_tag", "dt")) { my $key = lc($dt->as_text); $data{$key}{'dt'} = $dt->as_HTML; my $dd = $dt->right; $data{$key}{'dd'} = $dd->as_HTML; } # create a string my $output; foreach (sort {lc($a) cmp lc($b)} keys %data) { $output .= $data{$_}{'dt'} . $data{$_}{'dd'} . "<p></p>"; } # feed the string to a new Parser Object my $new_dl = HTML::TreeBuilder->new; $new_dl->parse($output); my $nu_aber = (); # remove unneccesary tags $nu_aber = $new_dl->guts(); # replace old dl with new dl $dl->delete_content(); $dl->push_content($nu_aber); my $hpp = new HTML::PrettyPrinter ( 'linelength' => 130, 'quote_attr' => 1, 'allow_forced_nl' => 1, 'entities' => "&<>äöüßÄÖÜ"); $hpp->set_force_nl(1,qw(body head table tr td)); $hpp->nl_before(2,qw(tr td p)); my $linearray_ref = $hpp->format($tree); print @{$linearray_ref}; $tree = $tree->destroy;
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: HTML::TreeBuilder: sort a Definition List (<dl>)
by Tanktalus (Canon) on Sep 12, 2005 at 18:59 UTC | |
|
Re: HTML::TreeBuilder: sort a Definition List (<dl>)
by skillet-thief (Friar) on Sep 12, 2005 at 19:25 UTC | |
by Util (Priest) on Sep 13, 2005 at 01:54 UTC | |
by svenXY (Deacon) on Sep 13, 2005 at 09:03 UTC |