2 match(es) in which the Subject of move is animals : Section 1_1: Radially symmetrical animals move slowly or not at all . Section 1_1: Cnidarians also have epithelial cells with muscle fibers whose contractions enable the animals to move , as well as nerve nets that integrate their body activities . 1 match(es) in which the Subject of move is cavity : Section 1_1: Flatworms -LRB- phylum Platyhelminthes -RRB- have no body cavity , lack organs for oxygen transport , have only one entrance to the gut , and move by beating their cilia . 2 match(es) in which the Subject of move is they : Section 1_1: Others , the sea butterflies and heteropods , have a modified foot that functions as a swimming organ with which they move through open ocean waters . Section 1_1: Because fluids are relatively incompressible , they move to another part of the cavity when muscles surrounding them contract . #### #!usr/bin/perl use strict; use warnings; my @parsed = ( '[sent. 1 len. 27]: [Others, ,, the, sea, butterflies, and, heteropods, ,, have, a, modified, foot, that, functions, as, a, swimming, organ, with, which, they, move, through, open, ocean, waters, .] (ROOT(S(NP(NP (NNS Others))(, ,)(NP (DT the) (NN sea) (NNS butterflies)(CC and)(NNS heteropods))(, ,))(VP (VBP have)(NP(NP (DT a) (VBN modified) (NN foot))(SBAR(WHNP (WDT that))(S(VP (VBZ functions)(PP (IN as)(NP(NP (DT a) (VBG swimming) (NN organ))(SBAR(WHPP (IN with)(WHNP (WDT which)))(S(NP (PRP they))(VP (VBP move)(PP (IN through)(NP (JJ open) (NN ocean) (NNS waters))))))))))))) (. .))) nsubj(have-9, Others-1) conj_and(butterflies-5, heteropods-7) dobj(have-9, foot-12) nsubj(functions-14, foot-12) nsubj(move-22, they-21) prep_through(move-22, waters-26)', '[sent. 2 len. 10]: [Radially, symmetrical, animals, move, slowly, or, not, at, all, .] (ROOT(S(NP(ADJP (RB Radially) (JJ symmetrical))(NNS animals))(VP (VBP move)(ADVP (RB slowly)(CC or)(RB not))(ADVP (IN at) (DT all))) (. .))) nsubj(move-4, animals-3) advmod(move-4, at-8) pobj(at-8, all-9)', 'Parsing [sent. 155 len. 31]: [Flatworms, -LRB-, phylum, Platyhelminthes, -RRB-, have, no, body, cavity, ,, lack, organs, for, oxygen, transport, ,, have, only, one, entrance, to, the, gut, ,, and, move, by, beating, their, cilia, .] (ROOT(S(NP(NP (NNS Flatworms))(PRN (-LRB- -LRB-)(NP (NNP phylum) (NNP Platyhelminthes))(-RRB- -RRB-)))(VP (VBP have)(S(NP(NP (DT no) (NN body) (NN cavity))(, ,)(NP(NP (NN lack) (NNS organs))(PP (IN for)(NP (NN oxygen) (NN transport))))(, ,))(VP(VP (VB have)(NP (RB only) (CD one) (NN entrance))(PP (TO to)(NP (DT the) (NN gut))))(, ,)(CC and)(VP (VB move)(PP (IN by)(S(VP (VBG beating)(NP (PRP$ their) (NN cilia))))))))) (. .))) nsubj(have-6, Flatworms-1) nsubj(move-26, cavity-9)', 'Parsing [sent. 27 len. 20]: [Because, fluids, are, relatively, incompressible, ,, they, move, to, another, part, of, the, cavity, when, muscles, surrounding, them, contract, .] (ROOT(S(SBAR (IN Because)(S(NP (NNS fluids))(VP (VBP are)(ADJP (RB relatively) (JJ incompressible)))))(, ,)(NP (PRP they))(VP (VBP move)(PP (TO to)(NP(NP (DT another) (NN part))(PP (IN of)(NP (DT the) (NN cavity)))(SBAR(WHADVP (WRB when))(S(NP (NNS muscles))(VP (JJ surrounding)(NP (PRP them))(NP (NN contract)))))))) (. .))) advcl(move-8, incompressible-5) nsubj(move-8, they-7)', 'Parsing [sent. 18 len. 27]: [Cnidarians, also, have, epithelial, cells, with, muscle, fibers, whose, contractions, enable, the, animals, to, move, ,, as, well, as, nerve, nets, that, integrate, their, body, activities, .] (ROOT(S(NP (NNS Cnidarians))(ADVP (RB also))(VP (VBP have)(NP(NP(NP (JJ epithelial) (NNS cells))(PP (IN with)(NP (NN muscle) (NNS fibers)))(SBAR(WHNP (WP$ whose) (NNS contractions))(S(VP (VBP enable)(S(NP (DT the) (NNS animals))(VP (TO to)(VP (VB move))))))))(, ,)(CONJP (RB as) (RB well) (IN as))(NP(NP (NN nerve) (NNS nets))(SBAR(WHNP (WDT that))(S(VP (VB integrate)(NP (PRP$ their) (NN body) (NNS activities)))))))) (. .))) advmod(have-3, also-2) nsubj(move-15, animals-13)' ); # --- If qq is the same as reading in a file. : # local $/ = 'Parsing'; # open(my $parse_corpus, '<', "/Users/jon/Desktop/stanford-postagger-full-2011-04-20/parsedLife2.txt") or die "Couldn't open directory $!"; # # Note: split at Parsed, which is before each [sent. ...] my @stopListNoun = ( "theirs", "they" ); # --- Unsure if same as real version: -- # # open my $stop_list_noun, '<', $stopListNounFile or die "could not open 'stoplist_noun.txt' $!"; ##Just open, no writing or reading? # my @stopListNoun = <$stop_list_noun>; # chomp @stopListNoun; # close $stop_list_noun or die "could not close 'stoplist_noun.txt' $!"; # # the file has a word each line. my $search_key = "move"; my ( @all_matches, @all_pronoun_matches ); my ( $chapternumber, $sentencenumber, $sentence, $grammar_relation, $argument1, $argument2 ); foreach my $sentblock (@parsed) { chomp $sentblock; next unless ( $sentblock =~ /\[sent. (\d+) len. \d+\]: \[(.+)\]/ ); $sentencenumber = $1; $sentence = $2; $sentence =~ s/, / /g; $chapternumber = "1_1"; #From regex next unless ( $sentblock =~ /\b\Q$search_key\E\b/i ) ; ##Ensure the sentence contains the searchkey next unless ( $sentblock =~ /\(VB\w*\s+\b\Q$search_key\E\b[\)\s]+/i ) ; ##Ensure searchkey is a verb my ( $arg1, $arg2, $goodmatch ); my @lines = split( "\n", $sentblock ); ##Split by a newline for my $l ( 0 .. $#lines ) { if ( ( $lines[$l] =~ /subj\w*\(/ ) && ( $lines[$l] =~ /\b\Q$search_key\E\b/i ) ) { next unless ( $lines[$l] =~ /\w+\(\w+\-\d+\,\s(\w+)\-\d+\)/ ); my ( $matches, $pronoun_matches ) = &dependency_checks( $lines[$l], $search_key, $chapternumber, $sentencenumber, $sentence ); push @all_matches, $matches if ( $matches and @$matches ); push @all_pronoun_matches, $pronoun_matches if ( $pronoun_matches and @$pronoun_matches ); } ## end if ( ( $lines[$l] =~ /subj\w*\(/...))) } ## end for my $l ( 0 .. $#lines) } ## end foreach my $sentblock (@parsed) #~ use DDS; die Dump( \@all_matches, \@all_pronoun_matches ); #~ $ARRAY1 = [ #~ [ #~ '1_1', #~ 2, #~ 'Radially symmetrical animals move slowly or not at all .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ], #~ [ #~ '1_1', #~ 155, #~ 'Flatworms -LRB- phylum Platyhelminthes -RRB- have no body cavity , lack organs for oxygen transport , have only one entrance to the gut , and move by beating their cilia .', #~ 'nsubj', #~ 'move', #~ 'cavity' #~ ], #~ [ #~ '1_1', #~ 18, #~ 'Cnidarians also have epithelial cells with muscle fibers whose contractions enable the animals to move , #~ as well as nerve nets that integrate their body activities .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ] #~ ]; #~ $ARRAY2 = [ #~ [], #~ [], #~ [] #~ ]; ## after replacing return/last #~ use DDS; die Dump( \@all_matches, \@all_pronoun_matches ); #~ $ARRAY1 = [ #~ [ #~ '1_1', #~ 1, #~ 'Others , the sea butterflies and heteropods , have a modified foot that functions as a swimming organ with which they move through open ocean waters .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 2, #~ 'Radially symmetrical animals move slowly or not at all .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ], #~ [ #~ '1_1', #~ 155, #~ 'Flatworms -LRB- phylum Platyhelminthes -RRB- have no body cavity , lack organs for oxygen transport , have only one entrance to the gut , and move by beating their cilia .', #~ 'nsubj', #~ 'move', #~ 'cavity' #~ ], #~ [ #~ '1_1', #~ 27, #~ 'Because fluids are relatively incompressible , they move to another part of the cavity when muscles surrounding them contract .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 18, #~ 'Cnidarians also have epithelial cells with muscle fibers whose contractions enable the animals to move , as well as nerve nets that integrate their body activities .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ] #~ ]; #~ $ARRAY2 = [ #~ [ #~ '1_1', #~ 1, #~ 'Others , the sea butterflies and heteropods , have a modified foot that functions as a swimming organ with which they move through open ocean waters .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [], #~ [], #~ [ #~ '1_1', #~ 27, #~ 'Because fluids are relatively incompressible , they move to another part of the cavity when muscles surrounding them contract .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [] #~ ]; #~ 2011-07-04-00:38:35 add #~ and @$matches #~ and @$pronoun_matches); #~ use DDS; die Dump( \@all_matches, \@all_pronoun_matches ); #~ $ARRAY1 = [ #~ [ #~ '1_1', #~ 1, #~ 'Others , the sea butterflies and heteropods , have a modified foot that functions as a swimming organ with which they move through open ocean waters .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 2, #~ 'Radially symmetrical animals move slowly or not at all .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ], #~ [ #~ '1_1', #~ 155, #~ 'Flatworms -LRB- phylum Platyhelminthes -RRB- have no body cavity , lack organs for oxygen transport , have only one entrance to the gut , and move by beating their cilia .', #~ 'nsubj', #~ 'move', #~ 'cavity' #~ ], #~ [ #~ '1_1', #~ 27, #~ 'Because fluids are relatively incompressible , they move to another part of the cavity when muscles surrounding them contract .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 18, #~ 'Cnidarians also have epithelial cells with muscle fibers whose contractions enable the animals to move , as well as nerve nets that integrate their body activities .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ] #~ ]; #~ $ARRAY2 = [ #~ [ #~ '1_1', #~ 1, #~ 'Others , the sea butterflies and heteropods , have a modified foot that functions as a swimming organ with which they move through open ocean waters .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 27, #~ 'Because fluids are relatively incompressible , they move to another part of the cavity when muscles surrounding them contract .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ] #~ ]; my %counts; foreach my $rowref (@all_matches) { $counts{ lc( $rowref->[5] ) }++; } my %pronouncounts; foreach my $pronounrowref (@all_pronoun_matches) { $pronouncounts{ lc( $pronounrowref->[5] ) }++; } #~ use DDS; die Dump( \@all_matches, \@all_pronoun_matches, \%counts, \%pronouncounts ); #~ $HASH1 = { #~ animals => 2, #~ cavity => 1, #~ they => 2 #~ }; #~ $HASH2 = { they => 2 }; @all_matches = sort { $counts{ lc( $b->[5] ) } <=> $counts{ lc( $a->[5] ) } || lc( $a->[5] ) cmp lc( $b->[5] ) } @all_matches; # for pronoun_matches, same sort, then concatenate to all_matches @all_pronoun_matches = sort { $pronouncounts{ lc( $b->[5] ) } <=> $pronouncounts{ lc( $a->[5] ) } || lc( $a->[5] ) cmp lc( $b->[5] ) } @all_pronoun_matches; #~ use DDS; die Dump( \@all_matches, \@all_pronoun_matches ); #~ $ARRAY1 = [ #~ [ #~ '1_1', #~ 2, #~ 'Radially symmetrical animals move slowly or not at all .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ], #~ [ #~ '1_1', #~ 18, #~ 'Cnidarians also have epithelial cells with muscle fibers whose contractions enable the animals to move , as well as nerve nets that integrate their body activities .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ], #~ [ #~ '1_1', #~ 1, #~ 'Others , the sea butterflies and heteropods , have a modified foot that functions as a swimming organ with which they move through open ocean waters .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 27, #~ 'Because fluids are relatively incompressible , they move to another part of the cavity when muscles surrounding them contract .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 155, #~ 'Flatworms -LRB- phylum Platyhelminthes -RRB- have no body cavity , lack organs for oxygen transport , have only one entrance to the gut , and move by beating their cilia .', #~ 'nsubj', #~ 'move', #~ 'cavity' #~ ] #~ ]; #~ $ARRAY2 = [ #~ [ #~ '1_1', #~ 1, #~ 'Others , the sea butterflies and heteropods , have a modified foot that functions as a swimming organ with which they move through open ocean waters .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 27, #~ 'Because fluids are relatively incompressible , they move to another part of the cavity when muscles surrounding them contract .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ] #~ ]; #~ 2011-07-04-00:51:27 after $foundPronouns #~ use DDS; die Dump( \@all_matches, \@all_pronoun_matches, \%counts, \%pronouncounts ); #~ $ARRAY1 = [ #~ [ #~ '1_1', #~ 2, #~ 'Radially symmetrical animals move slowly or not at all .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ], #~ [ #~ '1_1', #~ 18, #~ 'Cnidarians also have epithelial cells with muscle fibers whose contractions enable the animals to move , as well as nerve nets that integrate their body activities .', #~ 'nsubj', #~ 'move', #~ 'animals' #~ ], #~ [ #~ '1_1', #~ 155, #~ 'Flatworms -LRB- phylum Platyhelminthes -RRB- have no body cavity , lack organs for oxygen transport , have only one entrance to the gut , and move by beating their cilia .', #~ 'nsubj', #~ 'move', #~ 'cavity' #~ ] #~ ]; #~ $ARRAY2 = [ #~ [ #~ '1_1', #~ 1, #~ 'Others , the sea butterflies and heteropods , have a modified foot that functions as a swimming organ with which they move through open ocean waters .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ], #~ [ #~ '1_1', #~ 27, #~ 'Because fluids are relatively incompressible , they move to another part of the cavity when muscles surrounding them contract .', #~ 'nsubj', #~ 'move', #~ 'they' #~ ] #~ ]; #~ $HASH1 = { #~ animals => 2, #~ cavity => 1 #~ }; #~ $HASH2 = { they => 2 }; @all_matches = ( @all_matches, @all_pronoun_matches ); my %seen_header; my %seen_subheader; foreach my $match (@all_matches) { $match->[3] = "Subject"; my $header = ( do { my $key = lc $match->[5]; $counts{$key} || $pronouncounts{$key}; } ) . " match(es) in which the " . $match->[3] . " of " . $match->[4] . " is " . $match->[5] . " :\n\n"; print $header unless $seen_subheader{ lc $match->[5] }++; print "Section " . $match->[0] . ": " . $match->[2] . "\n\n"; ##Section and sentence (formatted) } #Foreach match sub dependency_checks { my ( $line, $verbform, $chapternumber, $sentencenumber, $sentence ) = @_; my @matches; my @pronoun_matches; return unless ( $line =~ /(\w+)\((\w+)\-\d+\,\s(\w+)\-\d+\)/ ) ; #Could pass this in $grammar_relation = $1; $argument1 = $2; $argument2 = $3; my $foundPronouns = 0; foreach my $pronoun (@stopListNoun) { if ( ( lc $pronoun eq lc $argument1 ) || ( lc $pronoun eq lc $argument2 ) ) { push( @pronoun_matches, $chapternumber, $sentencenumber, $sentence, $grammar_relation, $argument2, $argument1 ) if ( $argument2 =~ /$verbform/i ); push( @pronoun_matches, $chapternumber, $sentencenumber, $sentence, $grammar_relation, $argument1, $argument2 ) if ( $argument1 =~ /$verbform/i ); # return; $foundPronouns++; last; } ## end if ( ( lc $pronoun eq ...)) } ## end foreach my $pronoun (@stopListNoun) #Make sure searchkey is 1st arg: if ( not $foundPronouns ) { push( @matches, $chapternumber, $sentencenumber, $sentence, $grammar_relation, $argument2, $argument1 ) if ( $argument2 =~ /\Q$verbform\E/i ); push( @matches, $chapternumber, $sentencenumber, $sentence, $grammar_relation, $argument1, $argument2 ) if ( $argument1 =~ /\Q$verbform\E/i ); } ## end if ( not $foundPronouns) return ( \@matches, \@pronoun_matches ); } ## end sub dependency_checks __END__