#!/usr/bin/perl -w use strict; use XML::Parser; # initialize hash that will hold header info my $parser = new XML::Parser(ErrorContext => 4,Handlers => {Start => \&handle_start, End => \&handle_end, Char => \&handle_char}); my $counter =0; my $ms = 0; my $name = 0; my @tagdesc; my $last_prodid; my $last_prod = 0; my %tags; my @struct; # parse the file whose name we specified as a command-line parameter $parser->parsefile(shift); &write_out; my $odd_cnt = 1; open(DUMP,">dump.txt") or die "No debug file can be opened"; foreach (@struct) { print DUMP; $odd_cnt++; if($odd_cnt == 3) { $odd_cnt = 1; print DUMP "\n"; } } sub handle_start { my $p = shift; my $el = shift; my %attribs = @_; if($el eq 'product_data') { $counter ++; $ms++;} if($el eq 'product_id') { $last_prod = 1; } if($el eq 'name') { $name = 1; } } sub handle_char { my ($p, $data) = @_; if($name) { push(@tagdesc, $data); if($data =~ m/age_quantity/i) {push(@struct, $last_prodid); push(@struct, $data); } } if($last_prod) { $last_prodid = $data; } # print $data,"\n" if $counter; } sub handle_end { my $p = shift; my $el = shift; my %atrribs = @_; my $not_written = 0; if($el eq 'product_data') { $counter --; $not_written = 1; if(($ms % 1000) == 0) { print "$ms...\n"; # &write_out; } } if($el eq 'name') { $name = 0; } if($el eq 'product_id') { $last_prod = 0; } if($not_written) { my $str = join(':',@tagdesc); if($str =~ m/age_quantity/i) { my $a = 0; open(DUMPER1,">>dumper1.txt") or die "No dumper open"; print DUMPER1 "$last_prodid : "; foreach my $element(@tagdesc) { print DUMPER1 "$a: $element "; $a++; if($element eq 'PACK') { print $last_prodid, "\n"; } } print DUMPER1 "\n"; print DUMPER1 "$str \n"; close DUMPER1; } @tagdesc = (); if(exists $tags{$str}) { my $cnt = $tags{$str}; $cnt++; $tags{$str} = $cnt; } else { $tags{$str} = 1; } $str = undef; $not_written = 0; } } sub write_out { open(OUTPUT, ">tag.desc") or die "No open"; foreach my $keyval(keys %tags) { print OUTPUT $keyval, "\n"; } close OUTPUT; } #### 100000 Star Donal McCann|Saskia Reeves|Ciaran Hinds|Patrick Malahide|Brenda Bruce Street Date 970506 Year Released 94 Run Time 90 min Director Thaddeus O'Sullivan Originally Released 1993 Rating Not Rated Items 1 MuzeID 1060749 Muze PRelRefNum 1 Categories Dramas, Love, Triangle, Romance, Drama|Dramas|Love Triangle|Romance|Drama Title December Bride Format VHS First Star Donal McCann RUNTIME 0090 STREET_DATE 970506 LAST_UPDATE 990701 ATTRIBUTES C YEAR_RELEASED 94 PACKAGE_QUANTITY 1 PREORDER_DATE 970415 MANUFACTURER_PARTNO 1166 UPC 720917011660 SUMMARY SASKIA REEVES GENRE DRAMA PREBOOK_DATE 1997/04/15 RELEASE_DATE 1997/05/06 ITEM_TYPE S STAR1 SASKIA REEVES STAR2 DONAL MC CANN SUBTITLE N COLORIZED N ISBN 1572520205 CLASS_CODE 11120 SETUP_DATE 1995/07/17 LAST_MODIFY 1997/11/25 ITEM_NO FLV 1166V