#!c:/perl/bin/perl #This version allows the use of a filename specified on the command line use strict; use warnings; package HTMLStrip; use base "HTML::Parser"; #system("cls"); my $output = "c:/perl/bin/parseOutput.txt"; if (-e $output) { unlink $output; } open PARSETEXT,'>',$output or die $!; my $p = new HTMLStrip; # parse line-by-line, rather than the whole file at once while (<>) { $p->parse($_); } # flush and parse remaining unparsed HTML $p->eof; close PARSETEXT; sub text { my ($self, $text) = @_; chomp($text); $text =~ s/#.*//; # comments $text =~ s/^\s+//; # leading whitespace $text =~ s/\s+$//; # trailing whitespace #Once the beginning comment if found, remove style if ($text =~ /^<\!--$/) { next unless ($text =~ /^-->$/); } #Print non-blank lines if (length($text) > 0) { print PARSETEXT $text . "\n"; } } #Process OPENING/STARTING HTML tags sub start { my ($self, $tag, $attr, $attrseq, $origtext) = @_; #We're only interested in dealing with table tags if ($tag =~ /^table$/) { print PARSETEXT "\n************* BEGIN TABLE ****************\n"; } if ($tag =~ /^tr$/) { print PARSETEXT "\n"; } if ($tag =~ /^td$/) { print PARSETEXT "\t"; if (defined $attr->{'class'}) { if ($attr->{'class'} =~ /alarmClear/) { print PARSETEXT "OK"; } if ($attr->{'class'} =~ /alarmSet/) { print PARSETEXT "ALARM"; } } } } #Process CLOSING/ENDING HTML tags sub end { my($self, $tag, $origtext) = @_; if ($tag =~ /^table$/) { print PARSETEXT "\n************* END TABLE ****************\n"; } }