#!/usr/bin/perl -w
use HTML::Tree;
use LWP::Simple;
use strict;
getstore ("http://www.guardian.co.uk", "guardian.htm") or die "Cannot get the page.\n";
my $tree = HTML::TreeBuilder->new();
$tree = parse_file("guardian.htm);
####
{
my $counter = 'x0000';
sub give_id {
my $x = $_[0];
$x->attr('id', $counter++) unless defined $x->attr('id');
foreach my $c ($x->content_list) {
give_id($c) if ref $c; # ignore text nodes
}
};
give_id($start_node);
}
####
if $element->teg('h1' or 'h2')
{
my $content = $element->as_text();
print outfile "====$content====\n";
}