#!/usr/local/bin/perl -w

###########################################################
# includes ################################################
###########################################################
use strict;
use HTML::TokeParser;

#################
### Variables ###
#################
my $file_in = 'test.html';

##################
### Parse HTML ###
##################
my $p = HTML::TokeParser->new($file_in) || die "Can't open: $!";
##
while (my $token = $p->get_token) {
	my $token_type = @$token[0];
	start(@$token[1], @$token[4]) if ($token_type =~ /S/i); # Start Tag
	end(@$token[1], @$token[2]) if ($token_type =~ /E/i); # End Tag
	text(@$token[1]) if ($token_type =~ /T/i); # Text
	comment(@$token[1]) if($token_type =~ /C/i); # Comment
	declaration(@$token[1]) if ($token_type =~ /D/i); # Declaration
}

###########################################################
# SUB's ###################################################
###########################################################

#############
### DTD's ###
#############
sub declaration {
	my ($declaration) = @_;
	print "DEC: $declaration\n";
}

################
### Comments ###
################
sub comment {
	my ($comment) = @_;
	print "CMT: $comment\n";
}

#####################
### Text Entities ###
#####################
sub text {
	my ($text) = @_;
	return if ($text =~ /^(\s+)$/); #skip blank lines
	$text =~ s/\s+/ /g; #kill off big chunks of whitespace
	$text =~ s/\n//g; #keep text split across lines together
	print "TEXT: $text\n";
}

##################
### Start Tags ###
##################
sub start {
	my ($tag, $origtext) = @_;
	chomp $origtext;
	print "ST: $tag = $origtext\n";
}

################
### End Tags ###
################
sub end {
	my ($tag, $origtext) = @_;
	chomp $origtext;
	print "ET: $tag = $origtext\n";
}