in reply to HTML::TreeBuilder: sort a Definition List (<dl>)
For playing with HTML, I prefer enforcing XHTML and playing with that, instead. Using XML::Twig. For example:
Tested. The only caveat is if you start getting funky characters that aren't part of standard XML, e.g., "©". Then it's a bit more work. Still doable, but more work.use strict; use warnings; use XML::Twig; my $html_code = ' <html> <head> <title>Glossary</title> </head><body> <!-- had to close head, open body --> <h1>Glossary</h1> <dl> <dt><b>E Definition</b></dt> <dd>E - data</dd> <p></p> <dt><b>B Definition</b></dt> <dd>B - data</dd> <p></p> <dt><b>A_definition</b></dt> <dd>A data.</dd> <p></p> <dt><b>C definition</b></dt> <dd>C - data</dd> <p></p> </dl> </body> </html> '; my $twig = XML::Twig->new(pretty_print => 'indented'); $twig->parse($html_code); for my $dl ($twig->root()->get_xpath('//dl')) { my @entries; for my $el ($dl->children()) { $el->cut(); if ($el->gi() eq 'dt') { push @entries, [ $el ]; } else { push @{$entries[-1]}, $el; } } @entries = sort { $a->[0]->text() cmp $b->[0]->text() } @entries; for my $entry (@entries) { $_->paste(last_child => $dl) for @$entry; } print $dl->sprint(),"\n"; }
|
|---|