#!usr/bin/perl use strict; use warnings; my @parsed = ( '[sent. 1 len. 27]: [Others, ,, the, sea, butterflies, and, heteropods, ,, have, a, modified, foot, that, functions, as, a, swimming, organ, with, which, they, move, through, open, ocean, waters, .] (ROOT(S(NP(NP (NNS Others))(, ,)(NP (DT the) (NN sea) (NNS butterflies)(CC and)(NNS heteropods))(, ,))(VP (VBP have)(NP(NP (DT a) (VBN modified) (NN foot))(SBAR(WHNP (WDT that))(S(VP (VBZ functions)(PP (IN as)(NP(NP (DT a) (VBG swimming) (NN organ))(SBAR(WHPP (IN with)(WHNP (WDT which)))(S(NP (PRP they))(VP (VBP move)(PP (IN through)(NP (JJ open) (NN ocean) (NNS waters))))))))))))) (. .))) nsubj(have-9, Others-1) conj_and(butterflies-5, heteropods-7) dobj(have-9, foot-12) nsubj(functions-14, foot-12) nsubj(move-22, they-21) prep_through(move-22, waters-26)' , '[sent. 2 len. 10]: [Radially, symmetrical, animals, move, slowly, or, not, at, all, .] (ROOT(S(NP(ADJP (RB Radially) (JJ symmetrical))(NNS animals))(VP (VBP move)(ADVP (RB slowly)(CC or)(RB not))(ADVP (IN at) (DT all))) (. .))) nsubj(move-4, animals-3) advmod(move-4, at-8) pobj(at-8, all-9)' , 'Parsing [sent. 155 len. 31]: [Flatworms, -LRB-, phylum, Platyhelminthes, -RRB-, have, no, body, cavity, ,, lack, organs, for, oxygen, transport, ,, have, only, one, entrance, to, the, gut, ,, and, move, by, beating, their, cilia, .] (ROOT(S(NP(NP (NNS Flatworms))(PRN (-LRB- -LRB-)(NP (NNP phylum) (NNP Platyhelminthes))(-RRB- -RRB-)))(VP (VBP have)(S(NP(NP (DT no) (NN body) (NN cavity))(, ,)(NP(NP (NN lack) (NNS organs))(PP (IN for)(NP (NN oxygen) (NN transport))))(, ,))(VP(VP (VB have)(NP (RB only) (CD one) (NN entrance))(PP (TO to)(NP (DT the) (NN gut))))(, ,)(CC and)(VP (VB move)(PP (IN by)(S(VP (VBG beating)(NP (PRP$ their) (NN cilia))))))))) (. .))) nsubj(have-6, Flatworms-1) nsubj(move-26, cavity-9)' , 'Parsing [sent. 27 len. 20]: [Because, fluids, are, relatively, incompressible, ,, they, move, to, another, part, of, the, cavity, when, muscles, surrounding, them, contract, .] (ROOT(S(SBAR (IN Because)(S(NP (NNS fluids))(VP (VBP are)(ADJP (RB relatively) (JJ incompressible)))))(, ,)(NP (PRP they))(VP (VBP move)(PP (TO to)(NP(NP (DT another) (NN part))(PP (IN of)(NP (DT the) (NN cavity)))(SBAR(WHADVP (WRB when))(S(NP (NNS muscles))(VP (JJ surrounding)(NP (PRP them))(NP (NN contract)))))))) (. .))) advcl(move-8, incompressible-5) nsubj(move-8, they-7)' , 'Parsing [sent. 18 len. 27]: [Cnidarians, also, have, epithelial, cells, with, muscle, fibers, whose, contractions, enable, the, animals, to, move, ,, as, well, as, nerve, nets, that, integrate, their, body, activities, .] (ROOT(S(NP (NNS Cnidarians))(ADVP (RB also))(VP (VBP have)(NP(NP(NP (JJ epithelial) (NNS cells))(PP (IN with)(NP (NN muscle) (NNS fibers)))(SBAR(WHNP (WP$ whose) (NNS contractions))(S(VP (VBP enable)(S(NP (DT the) (NNS animals))(VP (TO to)(VP (VB move))))))))(, ,)(CONJP (RB as) (RB well) (IN as))(NP(NP (NN nerve) (NNS nets))(SBAR(WHNP (WDT that))(S(VP (VB integrate)(NP (PRP$ their) (NN body) (NNS activities)))))))) (. .))) advmod(have-3, also-2) nsubj(move-15, animals-13)' ); # --- If qq is the same as reading in a file. : # local $/ = 'Parsing'; # open(my $parse_corpus, '<', "/Users/jon/Desktop/stanford-postagger-full-2011-04-20/parsedLife2.txt") or die "Couldn't open directory $!"; # # Note: split at Parsed, which is before each [sent. ...] my @stopListNoun = ("theirs", "they"); # --- Unsure if same as real version: -- # # open my $stop_list_noun, '<', $stopListNounFile or die "could not open 'stoplist_noun.txt' $!"; ##Just open, no writing or reading? # my @stopListNoun = <$stop_list_noun>; # chomp @stopListNoun; # close $stop_list_noun or die "could not close 'stoplist_noun.txt' $!"; # # the file has a word each line. my $search_key = "move"; my (@all_matches, @all_pronoun_matches); my ($chapternumber, $sentencenumber, $sentence, $grammar_relation, $argument1, $argument2); foreach my $sentblock (@parsed) { chomp $sentblock; next unless ($sentblock =~ /\[sent. (\d+) len. \d+\]: \[(.+)\]/); $sentencenumber = $1; $sentence = $2; $sentence =~ s/, / /g; $chapternumber = "1_1"; #From regex next unless ($sentblock =~ /\b$search_key\b/i); ##Ensure the sentence contains the searchkey next unless ($sentblock =~ /\(VB\w*\s+\b$search_key\b[\)\s]+/i); ##Ensure searchkey is a verb my ($arg1, $arg2, $goodmatch); my @lines = split ("\n",$sentblock); ##Split by a newline for my $l (0..$#lines) { if (($lines[$l] =~ /subj\w*\(/) && ($lines[$l] =~ /\b$search_key\b/i)) { next unless ($lines[$l] =~ /\w+\(\w+\-\d+\,\s(\w+)\-\d+\)/); my ($matches, $pronoun_matches) = &dependency_checks($lines[$l], $search_key, $chapternumber, $sentencenumber, $sentence); push @all_matches, $matches if ($matches); push @all_pronoun_matches, $pronoun_matches if ($pronoun_matches); } } } my %counts; foreach my $rowref (@all_matches) { $counts{lc($rowref->[5])}++; } my %pronouncounts; foreach my $pronounrowref (@all_pronoun_matches) { $pronouncounts{lc($pronounrowref->[5])}++; } @all_matches = sort { $counts{lc($b->[5])} <=> $counts{lc($a->[5])} || lc($a->[5]) cmp lc($b->[5]) } @all_matches; # for pronoun_matches, same sort, then concatenate to all_matches @all_pronoun_matches = sort { $pronouncounts{lc($b->[5])} <=> $pronouncounts{lc($a->[5])} || lc($a->[5]) cmp lc($b->[5]) } @all_pronoun_matches; @all_matches = (@all_matches, @all_pronoun_matches); my %seen_header; my %seen_subheader; foreach my $match (@all_matches) { $match->[3] = "Subject"; my $header = $counts{lc $match->[5]}." match(es) in which the ".$match->[3]." of ".$match->[4]." is ".$match->[5]." :\n\n"; print $header unless $seen_subheader{lc $match->[5]}++; print "Section ".$match->[0].": ".$match->[2]."\n\n"; ##Section and sentence (formatted) } #Foreach match sub dependency_checks { my ($line, $verbform, $chapternumber, $sentencenumber, $sentence) = @_; my @matches; my @pronoun_matches; return unless ($line =~ /(\w+)\((\w+)\-\d+\,\s(\w+)\-\d+\)/); #Could pass this in $grammar_relation = $1; $argument1 = $2; $argument2 = $3; foreach my $pronoun (@stopListNoun) { if ((lc $pronoun eq lc $argument1) || (lc $pronoun eq lc $argument2)) { push (@pronoun_matches, $chapternumber, $sentencenumber, $sentence, $grammar_relation, $argument2, $argument1) if ($argument2 =~ /$verbform/i); push (@pronoun_matches, $chapternumber, $sentencenumber, $sentence, $grammar_relation, $argument1, $argument2) if ($argument1 =~ /$verbform/i); return; } } #Make sure searchkey is 1st arg: push (@matches, $chapternumber, $sentencenumber, $sentence, $grammar_relation, $argument2, $argument1) if ($argument2 =~ /$verbform/i); push (@matches, $chapternumber, $sentencenumber, $sentence, $grammar_relation, $argument1, $argument2) if ($argument1 =~ /$verbform/i); return (\@matches, \@pronoun_matches); }