in reply to Re^4: Return 2 arrays, sort the same, and concatenate them
in thread Return 2 arrays, sort the same, and concatenate them
Here it is, 99% of what you wanted.
2 match(es) in which the Subject of move is animals : Section 1_1: Radially symmetrical animals move slowly or not at all . Section 1_1: Cnidarians also have epithelial cells with muscle fibers +whose contractions enable the animals to move , as well as nerve nets + that integrate their body activities . 1 match(es) in which the Subject of move is cavity : Section 1_1: Flatworms -LRB- phylum Platyhelminthes -RRB- have no body + cavity , lack organs for oxygen transport , have only one entrance t +o the gut , and move by beating their cilia . 2 match(es) in which the Subject of move is they : Section 1_1: Others , the sea butterflies and heteropods , have a modi +fied foot that functions as a swimming organ with which they move thr +ough open ocean waters . Section 1_1: Because fluids are relatively incompressible , they move +to another part of the cavity when muscles surrounding them contract +.
The code with my debugging comments (top down chronologically). I merely tweaked it instead of modularizing like I outlined in Re^2: Return 2 arrays, sort the same, and concatenate them, which I would definitely do if this were my project. Heck, I didn't even perltidy until preparing this node.
#!usr/bin/perl use strict; use warnings; my @parsed = ( '[sent. 1 len. 27]: [Others, ,, the, sea, butterflies, and, heteropods +, ,, have, a, modified, foot, that, functions, as, a, swimming, organ +, with, which, they, move, through, open, ocean, waters, .] (ROOT(S(NP(NP (NNS Others))(, ,)(NP (DT the) (NN sea) (NNS butterflies +)(CC and)(NNS heteropods))(, ,))(VP (VBP have)(NP(NP (DT a) (VBN modi +fied) (NN foot))(SBAR(WHNP (WDT that))(S(VP (VBZ functions)(PP (IN as +)(NP(NP (DT a) (VBG swimming) (NN organ))(SBAR(WHPP (IN with)(WHNP (W +DT which)))(S(NP (PRP they))(VP (VBP move)(PP (IN through)(NP (JJ ope +n) (NN ocean) (NNS waters))))))))))))) (. .))) nsubj(have-9, Others-1) conj_and(butterflies-5, heteropods-7) dobj(have-9, foot-12) nsubj(functions-14, foot-12) nsubj(move-22, they-21) prep_through(move-22, waters-26)', '[sent. 2 len. 10]: [Radially, symmetrical, animals, move, slowly, or, + not, at, all, .] (ROOT(S(NP(ADJP (RB Radially) (JJ symmetrical))(NNS animals))(VP (VBP +move)(ADVP (RB slowly)(CC or)(RB not))(ADVP (IN at) (DT all))) (. +.))) nsubj(move-4, animals-3) advmod(move-4, at-8) pobj(at-8, all-9)', 'Parsing [sent. 155 len. 31]: [Flatworms, -LRB-, phylum, Platyhelminth +es, -RRB-, have, no, body, cavity, ,, lack, organs, for, oxygen, tran +sport, ,, have, only, one, entrance, to, the, gut, ,, and, move, by, +beating, their, cilia, .] (ROOT(S(NP(NP (NNS Flatworms))(PRN (-LRB- -LRB-)(NP (NNP phylum) (NNP +Platyhelminthes))(-RRB- -RRB-)))(VP (VBP have)(S(NP(NP (DT no) (NN bo +dy) (NN cavity))(, ,)(NP(NP (NN lack) (NNS organs))(PP (IN for)(NP (N +N oxygen) (NN transport))))(, ,))(VP(VP (VB have)(NP (RB only) (CD on +e) (NN entrance))(PP (TO to)(NP (DT the) (NN gut))))(, ,)(CC and)(VP +(VB move)(PP (IN by)(S(VP (VBG beating)(NP (PRP$ their) (NN cilia)))) +))))) (. .))) nsubj(have-6, Flatworms-1) nsubj(move-26, cavity-9)', 'Parsing [sent. 27 len. 20]: [Because, fluids, are, relatively, incomp +ressible, ,, they, move, to, another, part, of, the, cavity, when, mu +scles, surrounding, them, contract, .] (ROOT(S(SBAR (IN Because)(S(NP (NNS fluids))(VP (VBP are)(ADJP (RB rel +atively) (JJ incompressible)))))(, ,)(NP (PRP they))(VP (VBP move)(PP + (TO to)(NP(NP (DT another) (NN part))(PP (IN of)(NP (DT the) (NN cav +ity)))(SBAR(WHADVP (WRB when))(S(NP (NNS muscles))(VP (JJ surrounding +)(NP (PRP them))(NP (NN contract)))))))) (. .))) advcl(move-8, incompressible-5) nsubj(move-8, they-7)', 'Parsing [sent. 18 len. 27]: [Cnidarians, also, have, epithelial, cell +s, with, muscle, fibers, whose, contractions, enable, the, animals, t +o, move, ,, as, well, as, nerve, nets, that, integrate, their, body, +activities, .] (ROOT(S(NP (NNS Cnidarians))(ADVP (RB also))(VP (VBP have)(NP(NP(NP (J +J epithelial) (NNS cells))(PP (IN with)(NP (NN muscle) (NNS fibers))) +(SBAR(WHNP (WP$ whose) (NNS contractions))(S(VP (VBP enable)(S(NP (DT + the) (NNS animals))(VP (TO to)(VP (VB move))))))))(, ,)(CONJP (RB as +) (RB well) (IN as))(NP(NP (NN nerve) (NNS nets))(SBAR(WHNP (WDT that +))(S(VP (VB integrate)(NP (PRP$ their) (NN body) (NNS activities))))) +))) (. .))) advmod(have-3, also-2) nsubj(move-15, animals-13)' ); # --- If qq is the same as reading in a file. : # local $/ = 'Parsing'; # open(my $parse_corpus, '<', "/Users/jon/Desktop/stanford-postagger-f +ull-2011-04-20/parsedLife2.txt") or die "Couldn't open directory $!"; # # Note: split at Parsed, which is before each [sent. ...] my @stopListNoun = ( "theirs", "they" ); # --- Unsure if same as real version: -- # # open my $stop_list_noun, '<', $stopListNounFile or die "could not op +en 'stoplist_noun.txt' $!"; ##Just open, no writing or reading? # my @stopListNoun = <$stop_list_noun>; # chomp @stopListNoun; # close $stop_list_noun or die "could not close 'stoplist_noun.txt' $! +"; # # the file has a word each line. my $search_key = "move"; my ( @all_matches, @all_pronoun_matches ); my ( $chapternumber, $sentencenumber, $sentence, $grammar_relation, $a +rgument1, $argument2 ); foreach my $sentblock (@parsed) { chomp $sentblock; next unless ( $sentblock =~ /\[sent. (\d+) len. \d+\]: \[(.+)\]/ ) +; $sentencenumber = $1; $sentence = $2; $sentence =~ s/, / /g; $chapternumber = "1_1"; #From regex next unless ( $sentblock =~ /\b\Q$search_key\E\b/i ) ; ##Ensure the sentence contains the sear +chkey next unless ( $sentblock =~ /\(VB\w*\s+\b\Q$search_key\E\b[\)\s]+/i ) ; ##Ensure searchkey is a verb my ( $arg1, $arg2, $goodmatch ); my @lines = split( "\n", $sentblock ); ##Split by a newline for my $l ( 0 .. $#lines ) { if ( ( $lines[$l] =~ /subj\w*\(/ ) && ( $lines[$l] =~ /\b\Q$search_key\E\b/i ) ) { next unless ( $lines[$l] =~ /\w+\(\w+\-\d+\,\s(\w+)\-\d+\) +/ ); my ( $matches, $pronoun_matches ) = &dependency_checks( $lines[$l], $search_key, $chapternum +ber, $sentencenumber, $sentence ); push @all_matches, $matches if ( $matches and @$matches ); push @all_pronoun_matches, $pronoun_matches if ( $pronoun_matches and @$pronoun_matches ); } ## end if ( ( $lines[$l] =~ /subj\w*\(/...))) } ## end for my $l ( 0 .. $#lines) } ## end foreach my $sentblock (@parsed) #~ use DDS; die Dump( \@all_matches, \@all_pronoun_matches ); #~ $ARRAY1 = [ #~ [ #~ '1_1', #~ 2, #~ 'Radially symmetrical animals move slowly or not at a +ll .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ], #~ [ #~ '1_1', #~ 155, #~ 'Flatworms -LRB- phylum Platyhelminthes -RRB- have no + body cavity , lack organs for oxygen transport , have only one entra +nce to the gut , and move by beating their cilia .', #~ 'nsubj', #~ 'move', #~ 'cavity' #~ ], #~ [ #~ '1_1', #~ 18, #~ 'Cnidarians also have epithelial cells with muscle fi +bers whose contractions enable the animals to move , #~ as well as nerve nets that integrate their body activities .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ] #~ ]; #~ $ARRAY2 = [ #~ [], #~ [], #~ [] #~ ]; ## after replacing return/last #~ use DDS; die Dump( \@all_matches, \@all_pronoun_matches ); #~ $ARRAY1 = [ #~ [ #~ '1_1', #~ 1, #~ 'Others , the sea butterflies and heteropods , have a + modified foot that functions as a swimming organ with which they mov +e through open ocean waters .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 2, #~ 'Radially symmetrical animals move slowly or not at a +ll .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ], #~ [ #~ '1_1', #~ 155, #~ 'Flatworms -LRB- phylum Platyhelminthes -RRB- have no + body cavity , lack organs for oxygen transport , have only one entra +nce to the gut , and move by beating their cilia .', #~ 'nsubj', #~ 'move', #~ 'cavity' #~ ], #~ [ #~ '1_1', #~ 27, #~ 'Because fluids are relatively incompressible , they +move to another part of the cavity when muscles surrounding them cont +ract .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 18, #~ 'Cnidarians also have epithelial cells with muscle fi +bers whose contractions enable the animals to move , as well as nerve + nets that integrate their body activities .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ] #~ ]; #~ $ARRAY2 = [ #~ [ #~ '1_1', #~ 1, #~ 'Others , the sea butterflies and heteropods , have a + modified foot that functions as a swimming organ with which they mov +e through open ocean waters .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [], #~ [], #~ [ #~ '1_1', #~ 27, #~ 'Because fluids are relatively incompressible , they +move to another part of the cavity when muscles surrounding them cont +ract .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [] #~ ]; #~ 2011-07-04-00:38:35 add #~ and @$matches #~ and @$pronoun_matches); #~ use DDS; die Dump( \@all_matches, \@all_pronoun_matches ); #~ $ARRAY1 = [ #~ [ #~ '1_1', #~ 1, #~ 'Others , the sea butterflies and heteropods , have a + modified foot that functions as a swimming organ with which they mov +e through open ocean waters .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 2, #~ 'Radially symmetrical animals move slowly or not at a +ll .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ], #~ [ #~ '1_1', #~ 155, #~ 'Flatworms -LRB- phylum Platyhelminthes -RRB- have no + body cavity , lack organs for oxygen transport , have only one entra +nce to the gut , and move by beating their cilia .', #~ 'nsubj', #~ 'move', #~ 'cavity' #~ ], #~ [ #~ '1_1', #~ 27, #~ 'Because fluids are relatively incompressible , they +move to another part of the cavity when muscles surrounding them cont +ract .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 18, #~ 'Cnidarians also have epithelial cells with muscle fi +bers whose contractions enable the animals to move , as well as nerve + nets that integrate their body activities .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ] #~ ]; #~ $ARRAY2 = [ #~ [ #~ '1_1', #~ 1, #~ 'Others , the sea butterflies and heteropods , have a + modified foot that functions as a swimming organ with which they mov +e through open ocean waters .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 27, #~ 'Because fluids are relatively incompressible , they +move to another part of the cavity when muscles surrounding them cont +ract .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ] #~ ]; my %counts; foreach my $rowref (@all_matches) { $counts{ lc( $rowref->[5] ) }++; } my %pronouncounts; foreach my $pronounrowref (@all_pronoun_matches) { $pronouncounts{ lc( $pronounrowref->[5] ) }++; } #~ use DDS; die Dump( \@all_matches, \@all_pronoun_matches, \%counts, +\%pronouncounts ); #~ $HASH1 = { #~ animals => 2, #~ cavity => 1, #~ they => 2 #~ }; #~ $HASH2 = { they => 2 }; @all_matches = sort { $counts{ lc( $b->[5] ) } <=> $counts{ lc( $a->[5] ) } || lc( $a->[5] ) cmp lc( $b->[5] ) } @all_matches; # for pronoun_matches, same sort, then concatenate to all_matches @all_pronoun_matches = sort { $pronouncounts{ lc( $b->[5] ) } <=> $pronouncounts{ lc( $a->[5] ) +} || lc( $a->[5] ) cmp lc( $b->[5] ) } @all_pronoun_matches; #~ use DDS; die Dump( \@all_matches, \@all_pronoun_matches ); #~ $ARRAY1 = [ #~ [ #~ '1_1', #~ 2, #~ 'Radially symmetrical animals move slowly or not at a +ll .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ], #~ [ #~ '1_1', #~ 18, #~ 'Cnidarians also have epithelial cells with muscle fi +bers whose contractions enable the animals to move , as well as nerve + nets that integrate their body activities .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ], #~ [ #~ '1_1', #~ 1, #~ 'Others , the sea butterflies and heteropods , have a + modified foot that functions as a swimming organ with which they mov +e through open ocean waters .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 27, #~ 'Because fluids are relatively incompressible , they +move to another part of the cavity when muscles surrounding them cont +ract .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 155, #~ 'Flatworms -LRB- phylum Platyhelminthes -RRB- have no + body cavity , lack organs for oxygen transport , have only one entra +nce to the gut , and move by beating their cilia .', #~ 'nsubj', #~ 'move', #~ 'cavity' #~ ] #~ ]; #~ $ARRAY2 = [ #~ [ #~ '1_1', #~ 1, #~ 'Others , the sea butterflies and heteropods , have a + modified foot that functions as a swimming organ with which they mov +e through open ocean waters .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 27, #~ 'Because fluids are relatively incompressible , they +move to another part of the cavity when muscles surrounding them cont +ract .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ] #~ ]; #~ 2011-07-04-00:51:27 after $foundPronouns #~ use DDS; die Dump( \@all_matches, \@all_pronoun_matches, \%counts, +\%pronouncounts ); #~ $ARRAY1 = [ #~ [ #~ '1_1', #~ 2, #~ 'Radially symmetrical animals move slowly or not at a +ll .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ], #~ [ #~ '1_1', #~ 18, #~ 'Cnidarians also have epithelial cells with muscle fi +bers whose contractions enable the animals to move , as well as nerve + nets that integrate their body activities .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ], #~ [ #~ '1_1', #~ 155, #~ 'Flatworms -LRB- phylum Platyhelminthes -RRB- have no + body cavity , lack organs for oxygen transport , have only one entra +nce to the gut , and move by beating their cilia .', #~ 'nsubj', #~ 'move', #~ 'cavity' #~ ] #~ ]; #~ $ARRAY2 = [ #~ [ #~ '1_1', #~ 1, #~ 'Others , the sea butterflies and heteropods , have a + modified foot that functions as a swimming organ with which they mov +e through open ocean waters .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 27, #~ 'Because fluids are relatively incompressible , they +move to another part of the cavity when muscles surrounding them cont +ract .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ] #~ ]; #~ $HASH1 = { #~ animals => 2, #~ cavity => 1 #~ }; #~ $HASH2 = { they => 2 }; @all_matches = ( @all_matches, @all_pronoun_matches ); my %seen_header; my %seen_subheader; foreach my $match (@all_matches) { $match->[3] = "Subject"; my $header = ( do { my $key = lc $match->[5]; $counts{$key} || $pronouncounts{$key}; } ) . " match(es) in which the " . $match->[3] . " of " . $match->[4] . " is " . $match->[5] . " :\n\n"; print $header unless $seen_subheader{ lc $match->[5] }++; print "Section " . $match->[0] . ": " . $match->[2] . "\n\n"; ##Section and sentence (formatted) } #Foreach match sub dependency_checks { my ( $line, $verbform, $chapternumber, $sentencenumber, $sentence +) = @_; my @matches; my @pronoun_matches; return unless ( $line =~ /(\w+)\((\w+)\-\d+\,\s(\w+)\-\d+\)/ ) ; #Could pass this in $grammar_relation = $1; $argument1 = $2; $argument2 = $3; my $foundPronouns = 0; foreach my $pronoun (@stopListNoun) { if ( ( lc $pronoun eq lc $argument1 ) || ( lc $pronoun eq lc $argument2 ) ) { push( @pronoun_matches, $chapternumber, $sentencenumber, $sentence, $grammar_r +elation, $argument2, $argument1 ) if ( $argument2 =~ /$verbform/i ); push( @pronoun_matches, $chapternumber, $sentencenumber, $sentence, $grammar_r +elation, $argument1, $argument2 ) if ( $argument1 =~ /$verbform/i ); # return; $foundPronouns++; last; } ## end if ( ( lc $pronoun eq ...)) } ## end foreach my $pronoun (@stopListNoun) #Make sure searchkey is 1st arg: if ( not $foundPronouns ) { push( @matches, $chapternumber, $sentencenumber, $sentence, $grammar_relat +ion, $argument2, $argument1 ) if ( $argument2 =~ /\Q$verbform\E/i ); push( @matches, $chapternumber, $sentencenumber, $sentence, $grammar_relat +ion, $argument1, $argument2 ) if ( $argument1 =~ /\Q$verbform\E/i ); } ## end if ( not $foundPronouns) return ( \@matches, \@pronoun_matches ); } ## end sub dependency_checks __END__
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^6: Return 2 arrays, sort the same, and concatenate them
by Anonymous Monk on Jul 04, 2011 at 08:28 UTC | |
by jonc (Beadle) on Jul 04, 2011 at 14:13 UTC | |
by Anonymous Monk on Jul 05, 2011 at 11:51 UTC |