#!/usr/bin/perl use strict; use warnings; use HTML::Parser (); my $htmltree = [ { tag => 'document', content => [] } ]; my $node = $htmltree->[0]->{content}; my @prevnodes = ($htmltree); sub start { my $tagname = shift; my $attr = shift; my $newnode = {}; $newnode->{tag} = $tagname; foreach my $key(keys %{$attr}) { $newnode->{$key} = $attr->{$key}; } $newnode->{content} = []; push @prevnodes, $node; push @{$node}, $newnode; $node = $newnode->{content}; } sub end { my $tagname = shift; $node = pop @prevnodes; } sub text { my $text = shift; chomp $text; if($text ne '') { push @{$node}, $text; } } my $p = HTML::Parser->new( api_version => 3, start_h => [\&start, "tagname, attr"], end_h => [\&end, "tagname"], text_h => [\&text, "dtext"] ); $p->parse_file("test.html"); ##

##



some content
more content



##

##

sub descend_htmltree {
  my $node = shift;
  my $withclickiness = shift || 0;

  foreach my $tmpnode (@{$node}) {
    if(ref($tmpnode) eq 'HASH') {
      my $nodeid = ""; # Magic code to generate node's position in tree
      $htmloutput .= "" if($withclickiness);
      $htmloutput .= "<$tmpnode->{tag}";
      foreach(keys %{$tmpnode}) {
        $htmloutput .= " $_=\"$tmpnode->{$_}\"" if($_ ne 'tag' && $_ ne 'content');
      }
      $htmloutput .= ">";
      descend_htmltree($tmpnode->{content});
      $htmloutput .= "{tag}>";
      $htmloutput .= "" if($withclickiness);
    } else {
      $htmloutput .= "$tmpnode";
    }
  }
}

sub htmltree_to_html {
  my $filename = shift || '';
  my $withclickiness = shift || 0;

  descend_htmltree($htmltree->[0]->{content}, $withclickiness);
  if($filename ne '') {
    open HTML, "> $filename" or die "Can't open $filename for HTML output";
    print HTML $htmloutput;
    close HTML;
  }

  return $htmloutput;
}