#!/usr/bin/perl -w
use strict;
use HTML::TreeBuilder;
use HTML::PrettyPrinter;
use Data::Dumper;
my $html_code = '
Glossary
Glossary
- E Definition
- E - data
- B Definition
- B - data
- A_definition
- A data.
- C definition
- C - data
';
my %glossar;
my $tree = HTML::TreeBuilder->new;
$tree->parse($html_code);
my ($dl) = $tree->look_down('_tag', 'dl');
my %data;
# looping trough the dt tags,
# spawning a hash with the text of dt as key
# and the HTML of dt and dd as values
for my $dt ($dl->look_down("_tag", "dt")) {
my $key = lc($dt->as_text);
$data{$key}{'dt'} = $dt->as_HTML;
my $dd = $dt->right;
$data{$key}{'dd'} = $dd->as_HTML;
}
# create a string
my $output;
foreach (sort {lc($a) cmp lc($b)} keys %data) {
$output .= $data{$_}{'dt'} . $data{$_}{'dd'} . "";
}
# feed the string to a new Parser Object
my $new_dl = HTML::TreeBuilder->new;
$new_dl->parse($output);
my $nu_aber = ();
# remove unneccesary tags
$nu_aber = $new_dl->guts();
# replace old dl with new dl
$dl->delete_content();
$dl->push_content($nu_aber);
my $hpp = new HTML::PrettyPrinter (
'linelength' => 130,
'quote_attr' => 1,
'allow_forced_nl' => 1,
'entities' => "&<>äöüßÄÖÜ");
$hpp->set_force_nl(1,qw(body head table tr td));
$hpp->nl_before(2,qw(tr td p));
my $linearray_ref = $hpp->format($tree);
print @{$linearray_ref};
$tree = $tree->destroy;