#!/usr/bin/perl -w use strict; use HTML::TokeParser; my @list; my $level = -1; my $file = "c:/test.htm"; my $p = HTML::TokeParser->new($file) || die "Can't open $file: $!"; LOOP: while (my $token = $p->get_token ) { my $se = (@$token)[0]; # an opening tag will eq 'S' a closing tag 'E' my $tag = (@$token)[1]; next LOOP unless $tag eq 'ul' or $tag eq 'li'; if ( $tag eq 'ul' ) { # this will be either a if ( $se eq 'S' ) { $level++; # increase level in response to } next LOOP; } my $text = $p->get_trimmed_text(); push @{$list[$level]}, $text; } # data is now in a 2D data structure. you will need to read # up on these to understand the syntax # @{$list[0]} is level 1 # @{$list[1]} contains level 2 for my $i (0.. $#list) { my @array = @{$list[$i]}; @array = sort @array; print "Level $i\n"; print " $_\n" for @array; }