D'Femstar has asked for the wisdom of the Perl Monks concerning the following question:
use XML::TreeBuilder; sub tokenizefast ($) { $line = $_[0]; $line =~ s/^\s*//; $line =~ s/\s*$//; # split into words to respect word boundaries # and ignore space-containing bigrams @words = split /\s+/, $line; @ngrams = @words; #a copy @ngramLengths = (2,3); #now perform the grouping foreach $slength (@ngramLengths) { for(my $i=0;$i+$slength<=$#words+1;$i++){ my @tempwords= @words; # print "tempwords: i: $i length $slength " ; print @tempwords ; print "\n"; @tsome = splice(@tempwords,$i,$slength); # print "after splice: "; print @tsome; print "\n"; push(@ngrams,join(" ",@tsome)); } } return @ngrams; } ## end of tokenize function my $file= 'swbd_50k_42tags.xml'; my $tree = XML::TreeBuilder->new(); my %textHash; $tree->parse_file($file); foreach my $dialogue ($tree->find_by_tag_name ('dialogue')){ $dialogue_name = $dialogue->attr_get_i('name'); foreach my $turn ($dialogue->find_by_tag_name('turn')){ $turn_no = $turn->attr_get_i('no'); $turn_speaker = $turn->attr_get_i('speaker'); foreach my $utt ($dialogue->find_by_tag_name('utt')){ $da = $utt->attr_get_i('da'); $id = $utt->attr_get_i('id'); $inline = $utt->as_text; @textarray = tokenizefast( $inline); # print join(",",@textarray); print "\n"; if ($textHash{$da}) { @someArray = @{ $textHash{$da} }; push @someArray , @textarray; #print @someArray; } else { $textHash{$da} = [ @textarray]; } } } } # extract different elements of xml doc #write the final thing to a file for $somekey (keys %textHash) { @ans = @{ $textHash{$somekey} }; print " the key: $somekey" ;print " text {" ; print join(", ", @ans); print "}\n"; }
20050404 Edit by ysth: code tags
Retitled by BazB from 'Cry for Help'.
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Help needed to understand hashes and hashes of arrays
by tall_man (Parson) on Apr 04, 2005 at 15:04 UTC | |
|
Re: Help needed to understand hashes and hashes of arrays
by Nevtlathiel (Friar) on Apr 04, 2005 at 13:35 UTC | |
|
Re: Help needed to understand hashes and hashes of arrays
by Jaap (Curate) on Apr 04, 2005 at 12:51 UTC |