#!/usr/bin/perl use strict; # define the subclass package ProcExternal; use base "HTML::Parser"; #specifying filename to open if (($ARGV[0] eq "?") || ($#ARGV != 1)) {die "usage $0 file_name ordering (name/number)\n";}; my $file_name=$ARGV[0]; my $order = $ARGV[1]; my $skip = 1; my ($product_url, $product_bigurl, $product_id, $product_name, $dir_name); my (@tarray, @sarray); my ($orig_text, $orig_self, $product_line); @sarray = split /\\/,$file_name; pop @sarray; $dir_name = join ('\\', @sarray); if (!open(OFILE,">index.xml")) { die "Can't open $product_name: $!"; }; print OFILE "\n"; &proc_html($file_name); print OFILE "\n"; close OFILE; sub text { my ($self, $text) = @_; $orig_self = $self; $orig_text = $text; if (!$skip) { @tarray = split(/ /,$text); if ($order eq "name") { $product_id = pop @tarray; } elsif ($order eq "number") { $product_id = shift @tarray; } elsif ($order eq "none") { $product_id = "999"; } else { die "invalid ordering parameter\n"; } if ($product_id !~/[0-9]/) { print "number format error: $text\n"; } foreach (@tarray) { $_ =~ s/\s+//g; } $product_name = join(' ',@tarray); $product_name =~ s/^\s//g; if (($product_id) && ($product_name)) { print OFILE "\n"; if ($#tarray < 2) { print OFILE "\t$orig_text\n"; print "EditLine #: $product_id $product_name\n"; }; print OFILE "\t$product_name\n"; print OFILE "\t$product_url\n"; print OFILE "\t$product_id\n"; print OFILE "\n\n"; # } } $skip = 1; } } sub comment { my ($self, $comment) = @_; } sub start { my ($self, $tag, $attr, $attrseq, $origtext) = @_; if ($tag eq "a") { $skip = 0; $product_line = $origtext; $product_bigurl = $attr->{href}; @sarray = split('/',$product_bigurl); $product_url = uc(pop @sarray); } } sub end { my ($self, $tag, $origtext) = @_; if ($tag eq "a") { # print $origtext; } } sub proc_html { my $htmlcontent = shift (@_); my $p = new ProcExternal; $p->parse_file($htmlcontent); $p->eof; return; }