Anonymous Monk has asked for the wisdom of the Perl Monks concerning the following question:
my input is an xml file and my output is a pairs of extracted items per line. any idea to improve? thanks,.use strict; use XML::Parser; use XML::XPath; use Lingua::StopWords qw( getStopWords ); my $stopwords = getStopWords('en'); my $file = $ARGV[0]; my $xp = XML::XPath->new(filename=>$file); for (my $n = 1; $n <= 600; $n++) { my $textnodeset = $xp->find('//pair[@id = '.$n.']/tAnnotation/tree/nod +e/word/attribute[@name="token"]'); my @texts; if (my @textnodelist = $textnodeset->get_nodelist) { @texts = map($_->string_value, @textnodelist);} my %seent; my @uniqt = grep !$seent{$_}++, @texts; my $hyponodeset = $xp->find('//pair[@id = '.$n.']/hAnnotation/tree/nod +e/word/attribute[@name="token"]'); my @hypos; if (my @hyponodelist = $hyponodeset->get_nodelist) { @hypos = map($_->string_value, @hyponodelist);} my %seenh; my @uniqh = grep !$seenh{$_}++, @hypos; my @termst = grep { ! $stopwords->{ $_ } } @uniqt; my @termsh = grep { ! $stopwords->{ $_ } } @uniqh; for my $i (0 .. $#termst) { for my $j ( 0 .. $#termsh) { print "$termst[$i] $termsh[$j]\n"; }} }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: my xml xpath is too slow.
by mirod (Canon) on Aug 26, 2009 at 09:22 UTC | |
by Anonymous Monk on Aug 26, 2009 at 09:38 UTC | |
by mirod (Canon) on Aug 26, 2009 at 09:47 UTC | |
|
Re: my xml xpath is too slow.
by grizzley (Chaplain) on Aug 26, 2009 at 14:27 UTC | |
|
Re: my xml xpath is too slow.
by Jenda (Abbot) on Sep 03, 2009 at 21:44 UTC |