#!/usr/bin/perl use strict; use warnings; use HTML::Parser (); my $htmltree = [ { tag => 'document', content => [] } ]; my $node = $htmltree->[0]->{content}; my @prevnodes = ($htmltree); sub start { my $tagname = shift; my $attr = shift; my $newnode = {}; $newnode->{tag} = $tagname; foreach my $key(keys %{$attr}) { $newnode->{$key} = $attr->{$key}; } $newnode->{content} = []; push @prevnodes, $node; push @{$node}, $newnode; $node = $newnode->{content}; } sub end { my $tagname = shift; $node = pop @prevnodes; } sub text { my $text = shift; chomp $text; if($text ne '') { push @{$node}, $text; } } my $p = HTML::Parser->new( api_version => 3, start_h => [\&start, "tagname, attr"], end_h => [\&end, "tagname"], text_h => [\&text, "dtext"] ); $p->parse_file("test.html"); ####
some content more content
##
## sub descend_htmltree { my $node = shift; my $withclickiness = shift || 0; foreach my $tmpnode (@{$node}) { if(ref($tmpnode) eq 'HASH') { my $nodeid = ""; # Magic code to generate node's position in tree $htmloutput .= "
" if($withclickiness); $htmloutput .= "<$tmpnode->{tag}"; foreach(keys %{$tmpnode}) { $htmloutput .= " $_=\"$tmpnode->{$_}\"" if($_ ne 'tag' && $_ ne 'content'); } $htmloutput .= ">"; descend_htmltree($tmpnode->{content}); $htmloutput .= "{tag}>"; $htmloutput .= "
" if($withclickiness); } else { $htmloutput .= "$tmpnode"; } } } sub htmltree_to_html { my $filename = shift || ''; my $withclickiness = shift || 0; descend_htmltree($htmltree->[0]->{content}, $withclickiness); if($filename ne '') { open HTML, "> $filename" or die "Can't open $filename for HTML output"; print HTML $htmloutput; close HTML; } return $htmloutput; }