jonc has asked for the wisdom of the Perl Monks concerning the following question:
I have been trying to get this to work, without success: http://stackoverflow.com/questions/6541701/help-returning-2-arrays-from-subroutine-depending-on-stop-list
THIS HAS BEEN CHANGED TO A TEST CASE:
For original, look at stackoverflow, or davido's excellent reproduction below.
This produces the same errors MAINLY an uninitialized value. The length is mainly from my trying to add a test file, and some comments. Sorry ,don't know how else to replicate opening a file.
#!usr/bin/perl use strict; use warnings; my @parsed = ( '[sent. 1 len. 27]: [Others, ,, the, sea, butterflies, and, heteropods +, ,, have, a, modified, foot, that, functions, as, a, swimming, organ +, with, which, they, move, through, open, ocean, waters, .] (ROOT(S(NP(NP (NNS Others))(, ,)(NP (DT the) (NN sea) (NNS butterflies +)(CC and)(NNS heteropods))(, ,))(VP (VBP have)(NP(NP (DT a) (VBN modi +fied) (NN foot))(SBAR(WHNP (WDT that))(S(VP (VBZ functions)(PP (IN as +)(NP(NP (DT a) (VBG swimming) (NN organ))(SBAR(WHPP (IN with)(WHNP (W +DT which)))(S(NP (PRP they))(VP (VBP move)(PP (IN through)(NP (JJ ope +n) (NN ocean) (NNS waters))))))))))))) (. .))) nsubj(have-9, Others-1) conj_and(butterflies-5, heteropods-7) dobj(have-9, foot-12) nsubj(functions-14, foot-12) nsubj(move-22, they-21) prep_through(move-22, waters-26)' , '[sent. 2 len. 10]: [Radially, symmetrical, animals, move, slowly, or, + not, at, all, .] (ROOT(S(NP(ADJP (RB Radially) (JJ symmetrical))(NNS animals))(VP (VBP +move)(ADVP (RB slowly)(CC or)(RB not))(ADVP (IN at) (DT all))) (. +.))) nsubj(move-4, animals-3) advmod(move-4, at-8) pobj(at-8, all-9)' , 'Parsing [sent. 155 len. 31]: [Flatworms, -LRB-, phylum, Platyhelminth +es, -RRB-, have, no, body, cavity, ,, lack, organs, for, oxygen, tran +sport, ,, have, only, one, entrance, to, the, gut, ,, and, move, by, +beating, their, cilia, .] (ROOT(S(NP(NP (NNS Flatworms))(PRN (-LRB- -LRB-)(NP (NNP phylum) (NNP +Platyhelminthes))(-RRB- -RRB-)))(VP (VBP have)(S(NP(NP (DT no) (NN bo +dy) (NN cavity))(, ,)(NP(NP (NN lack) (NNS organs))(PP (IN for)(NP (N +N oxygen) (NN transport))))(, ,))(VP(VP (VB have)(NP (RB only) (CD on +e) (NN entrance))(PP (TO to)(NP (DT the) (NN gut))))(, ,)(CC and)(VP +(VB move)(PP (IN by)(S(VP (VBG beating)(NP (PRP$ their) (NN cilia)))) +))))) (. .))) nsubj(have-6, Flatworms-1) nsubj(move-26, cavity-9)' , 'Parsing [sent. 27 len. 20]: [Because, fluids, are, relatively, incomp +ressible, ,, they, move, to, another, part, of, the, cavity, when, mu +scles, surrounding, them, contract, .] (ROOT(S(SBAR (IN Because)(S(NP (NNS fluids))(VP (VBP are)(ADJP (RB rel +atively) (JJ incompressible)))))(, ,)(NP (PRP they))(VP (VBP move)(PP + (TO to)(NP(NP (DT another) (NN part))(PP (IN of)(NP (DT the) (NN cav +ity)))(SBAR(WHADVP (WRB when))(S(NP (NNS muscles))(VP (JJ surrounding +)(NP (PRP them))(NP (NN contract)))))))) (. .))) advcl(move-8, incompressible-5) nsubj(move-8, they-7)' , 'Parsing [sent. 18 len. 27]: [Cnidarians, also, have, epithelial, cell +s, with, muscle, fibers, whose, contractions, enable, the, animals, t +o, move, ,, as, well, as, nerve, nets, that, integrate, their, body, +activities, .] (ROOT(S(NP (NNS Cnidarians))(ADVP (RB also))(VP (VBP have)(NP(NP(NP (J +J epithelial) (NNS cells))(PP (IN with)(NP (NN muscle) (NNS fibers))) +(SBAR(WHNP (WP$ whose) (NNS contractions))(S(VP (VBP enable)(S(NP (DT + the) (NNS animals))(VP (TO to)(VP (VB move))))))))(, ,)(CONJP (RB as +) (RB well) (IN as))(NP(NP (NN nerve) (NNS nets))(SBAR(WHNP (WDT that +))(S(VP (VB integrate)(NP (PRP$ their) (NN body) (NNS activities))))) +))) (. .))) advmod(have-3, also-2) nsubj(move-15, animals-13)' ); # --- If qq is the same as reading in a file. : # local $/ = 'Parsing'; # open(my $parse_corpus, '<', "/Users/jon/Desktop/stanford-postagger-f +ull-2011-04-20/parsedLife2.txt") or die "Couldn't open directory $!"; # # Note: split at Parsed, which is before each [sent. ...] my @stopListNoun = ("theirs", "they"); # --- Unsure if same as real version: -- # # open my $stop_list_noun, '<', $stopListNounFile or die "could not op +en 'stoplist_noun.txt' $!"; ##Just open, no writing or reading? # my @stopListNoun = <$stop_list_noun>; # chomp @stopListNoun; # close $stop_list_noun or die "could not close 'stoplist_noun.txt' $! +"; # # the file has a word each line. </readmore> my $search_key = "move"; my (@all_matches, @all_pronoun_matches); my ($chapternumber, $sentencenumber, $sentence, $grammar_relation, $argument1, $argument2); foreach my $sentblock (@parsed) { chomp $sentblock; next unless ($sentblock =~ /\[sent. (\d+) len. \d+\]: \[(.+)\]/); $sentencenumber = $1; $sentence = $2; $sentence =~ s/, / /g; $chapternumber = "1_1"; #From regex next unless ($sentblock =~ /\b$search_key\b/i); ##Ensure the sente +nce contains the searchkey next unless ($sentblock =~ /\(VB\w*\s+\b$search_key\b[\)\s]+/i); # +#Ensure searchkey is a verb my ($arg1, $arg2, $goodmatch); my @lines = split ("\n",$sentblock); ##Split by a newline for my $l (0..$#lines) { if (($lines[$l] =~ /subj\w*\(/) && ($lines[$l] =~ /\b$ +search_key\b/i)) { next unless ($lines[$l] =~ /\w+\(\w+\-\d+\,\s(\w+)\-\d ++\)/); my ($matches, $pronoun_matches) = &dependency_checks($ +lines[$l], $search_key, $chapternumber, $sentencenumber, $sentence); push @all_matches, $matches if ($matches); push @all_pronoun_matches, $pronoun_matches if ($prono +un_matches); } } } my %counts; foreach my $rowref (@all_matches) { $counts{lc($rowref->[5])}++; } my %pronouncounts; foreach my $pronounrowref (@all_pronoun_matches) { $pronouncounts{lc($pronounrowref->[5])}++; } @all_matches = sort { $counts{lc($b->[5])} <=> $counts{lc($a->[5])} || lc($a->[5]) cmp lc($b->[5]) } @all_matches; # for pronoun_matches, same sort, then concatenate to all_matches @all_pronoun_matches = sort { $pronouncounts{lc($b->[5])} <=> $pronoun +counts{lc($a->[5])} || lc($a->[5]) cmp lc($b->[5]) } @all_pronoun_matches; @all_matches = (@all_matches, @all_pronoun_matches); my %seen_header; my %seen_subheader; foreach my $match (@all_matches) { $match->[3] = "Subject"; my $header = $counts{lc $match->[5]}." match(es) in which the ".$m +atch->[3]." of ".$match->[4]." is ".$match->[5]." :\n\n"; print $header unless $seen_subheader{lc $match->[5]}++; print "Section ".$match->[0].": ".$match->[2]."\n\n"; ##Section an +d sentence (formatted) } #Foreach match sub dependency_checks { my ($line, $verbform, $chapternumber, $sentencenumber, $sentence) += @_; my @matches; my @pronoun_matches; return unless ($line =~ /(\w+)\((\w+)\-\d+\,\s(\w+)\-\d+\)/); #Cou +ld pass this in $grammar_relation = $1; $argument1 = $2; $argument2 = $3; foreach my $pronoun (@stopListNoun) { if ((lc $pronoun eq lc $argument1) || (lc $pronoun eq lc $argu +ment2)) { push (@pronoun_matches, $chapternumber, $sentencenumber, $ +sentence, $grammar_relation, $argument2, $argument1) if ($argument2 = +~ /$verbform/i); push (@pronoun_matches, $chapternumber, $sentencenumber, $ +sentence, $grammar_relation, $argument1, $argument2) if ($argument1 = +~ /$verbform/i); return; } } #Make sure searchkey is 1st arg: push (@matches, $chapternumber, $sentencenumber, $sentence, $gramm +ar_relation, $argument2, $argument1) if ($argument2 =~ /$verbform/i); push (@matches, $chapternumber, $sentencenumber, $sentence, $gramm +ar_relation, $argument1, $argument2) if ($argument1 =~ /$verbform/i); return (\@matches, \@pronoun_matches); }
|
|---|