#!/usr/bin/perl
use strict;
# define the subclass
package ProcExternal;
use base "HTML::Parser";
#specifying filename to open
if (($ARGV[0] eq "?") || ($#ARGV != 1)) {die "usage $0 file_name ordering (name/number)\n";};
my $file_name=$ARGV[0];
my $order = $ARGV[1];
my $skip = 1;
my ($product_url, $product_bigurl, $product_id, $product_name, $dir_name);
my (@tarray, @sarray);
my ($orig_text, $orig_self, $product_line);
@sarray = split /\\/,$file_name;
pop @sarray;
$dir_name = join ('\\', @sarray);
if (!open(OFILE,">index.xml")) { die "Can't open $product_name: $!"; };
print OFILE "\n";
&proc_html($file_name);
print OFILE "\n";
close OFILE;
sub text {
my ($self, $text) = @_;
$orig_self = $self;
$orig_text = $text;
if (!$skip) {
@tarray = split(/ /,$text);
if ($order eq "name")
{
$product_id = pop @tarray;
}
elsif ($order eq "number")
{
$product_id = shift @tarray;
}
elsif ($order eq "none")
{
$product_id = "999";
}
else {
die "invalid ordering parameter\n";
}
if ($product_id !~/[0-9]/)
{
print "number format error: $text\n";
}
foreach (@tarray)
{
$_ =~ s/\s+//g;
}
$product_name = join(' ',@tarray);
$product_name =~ s/^\s//g;
if (($product_id) && ($product_name))
{
print OFILE "\n";
if ($#tarray < 2)
{
print OFILE "\t$orig_text\n";
print "EditLine #: $product_id $product_name\n";
};
print OFILE "\t$product_name\n";
print OFILE "\t$product_url\n";
print OFILE "\t$product_id\n";
print OFILE "\n\n";
# }
}
$skip = 1;
}
}
sub comment {
my ($self, $comment) = @_;
}
sub start {
my ($self, $tag, $attr, $attrseq, $origtext) = @_;
if ($tag eq "a") {
$skip = 0;
$product_line = $origtext;
$product_bigurl = $attr->{href};
@sarray = split('/',$product_bigurl);
$product_url = uc(pop @sarray);
}
}
sub end {
my ($self, $tag, $origtext) = @_;
if ($tag eq "a") {
# print $origtext;
}
}
sub proc_html {
my $htmlcontent = shift (@_);
my $p = new ProcExternal;
$p->parse_file($htmlcontent);
$p->eof;
return;
}