#!/usr/bin/perl -w use strict; use HTML::TreeBuilder; use HTML::PrettyPrinter; use Data::Dumper; my $html_code = ' Glossary

Glossary

E Definition
E - data

B Definition
B - data

A_definition
A data.

C definition
C - data

'; my %glossar; my $tree = HTML::TreeBuilder->new; $tree->parse($html_code); my ($dl) = $tree->look_down('_tag', 'dl'); my %data; # looping trough the dt tags, # spawning a hash with the text of dt as key # and the HTML of dt and dd as values for my $dt ($dl->look_down("_tag", "dt")) { my $key = lc($dt->as_text); $data{$key}{'dt'} = $dt->as_HTML; my $dd = $dt->right; $data{$key}{'dd'} = $dd->as_HTML; } # create a string my $output; foreach (sort {lc($a) cmp lc($b)} keys %data) { $output .= $data{$_}{'dt'} . $data{$_}{'dd'} . "

"; } # feed the string to a new Parser Object my $new_dl = HTML::TreeBuilder->new; $new_dl->parse($output); my $nu_aber = (); # remove unneccesary tags $nu_aber = $new_dl->guts(); # replace old dl with new dl $dl->delete_content(); $dl->push_content($nu_aber); my $hpp = new HTML::PrettyPrinter ( 'linelength' => 130, 'quote_attr' => 1, 'allow_forced_nl' => 1, 'entities' => "&<>äöüßÄÖÜ"); $hpp->set_force_nl(1,qw(body head table tr td)); $hpp->nl_before(2,qw(tr td p)); my $linearray_ref = $hpp->format($tree); print @{$linearray_ref}; $tree = $tree->destroy;