#!/usr/bin/perl -w
use strict;
use HTML::TokeParser;
my @list;
my $level = -1;
my $file = "c:/test.htm";
my $p = HTML::TokeParser->new($file) || die "Can't open $file: $!";
LOOP:
while (my $token = $p->get_token ) {
my $se = (@$token)[0]; # an opening tag will eq 'S' a closing tag 'E'
my $tag = (@$token)[1];
next LOOP unless $tag eq 'ul' or $tag eq 'li';
if ( $tag eq 'ul' ) {
# this will be either a
if ( $se eq 'S' ) {
$level++; # increase level in response to
} else {
$level--; # decrease level in response to
}
next LOOP;
}
my $text = $p->get_trimmed_text();
push @{$list[$level]}, $text;
}
# data is now in a 2D data structure. you will need to read
# up on these to understand the syntax
# @{$list[0]} is level 1
# @{$list[1]} contains level 2
for my $i (0.. $#list) {
my @array = @{$list[$i]};
@array = sort @array;
print "Level $i\n";
print " $_\n" for @array;
}