use strict; # driveBase drives single tokens to baseform. Its returned value is # true for success. It will always inscribe $wordCache{tok}{group} if # it isn't already done. sub driveBase{ my($tok, $Args) =@_; return $wordCache{$tok} if $wordCache{$tok}{inscript}; my $checkPrefix # Check the prefix unless explicitly told not to. = defined($Args->{checkPrefix}) ? $Args->{checkPrefix} : 1; my $skipPrefix = ! $checkPrefix; my($base,$atail,$ntail) = &split_token($tok); my($sing, $unprefBase, $savgrp, $savbase); if($atail and $ntail ne ''){ # If there's both an atail and an ntail. &driveBase("$base,$atail", $Args); # The base,tail can be a different group than the base. &equivWord($tok, "$base,$atail", {ntail => $ntail})} # Treat GYROSCOPE,CONTROL MOMENT,3 same as GYROSCOPE,CONTROL MOMENT # Assume that if $wordCache{$base,,$ntail}{group}, it's the same as $wordCache{$base}{group} # E.g. if SOFTGOODS LAB is a type of LAB, then it has the same group as LAB, unless we're told otherwise. elsif(!$atail and $ntail ne ''){ # Has a numeric tail but no alpha tail. &driveBase($base, $Args); &equivWord($tok, $base, {ntail => $ntail})} elsif($wordCache{$base} and $wordCache{$base}{group} and $wordCache{$base}{group} ne 'UNKNOWN' and $base ne $tok){ unless(member($wordCache{$base}{group}, qw(TIMEPERIOD PARTNO RANGE EXTENSION)) or $Args -> {nocomplain}){ warn_once('driveBase', "WordCache should have been filled in when we asserted '$atail' isa type of '$base' ")}; &equivWord($tok, $base, {atail => $atail}); 1} elsif(not ($atail eq '' and $ntail eq '')){ # There's either an atail or ntail but the form hasn't been seen before. &driveBase($base, $Args); &equivWord($tok, $base, {atail =>$atail, ntail=>$ntail})} elsif($checkPrefix and # This is the main check for prefixed forms &is_prefixed_partspeechAnnotated($base)){ # wordCache{base} gets filled in. unless($tok eq $base){ #$prefixLessFm Mostly, base WILL eq tok, but catch the odd case. &equivWord($tok, $base, {atail => $atail})}; 1} # If $checkPrefix is false, then we've already stripped a prefix, # so it can't be an abbreviation, a number or an uncannical form. elsif($checkPrefix and $canon{$tok} and $canon{$tok} ne $tok){ &driveBase($canon{$tok}, {%$Args, checkPrefix=>0}); &equivWord($tok, $canon{$tok}, {})} elsif(($sing) = $tok =~ /(.+)S$/ and $canon{$sing} and $canon{$sing} ne $tok){ # So REQTS => REQT => REQUIREMENT &equivWord($tok, $canon{$sing}, {plural=>1})} elsif($checkPrefix and $abbrev{$tok}){ # Not normally invoked, as abbrevs have already been expanded usually. &equivWord($tok, $abbrev{$tok}, {abbreviation =>1})} elsif($checkPrefix and (&part_number_p($base) or $atail eq 'PARTNO')){ # P/N and PN: get transformed in immediate_transforms; see the improves file &baseInscript($tok, {group => 'PARTNO', root => $tok})} elsif($checkPrefix and &power_loc_p($tok)){ &baseInscript($tok, {group => 'POWERLOC', root => $tok})} elsif($checkPrefix and &is_loc_code($tok)){ &baseInscript($tok, {group => 'LOC_CODE', root => $tok})} elsif($checkPrefix and $savbase = &is_measure_physics_frac($tok)){ $num_item{$tok} = 1 unless defined($num_item{$tok}); &baseInscript($tok, {group => $savbase, num_item =>1, root => $tok})} elsif($savbase = &is_gerund($base, 1)){ warn_once('GroupOf2', "Gerund $tok with base $base and tails '$atail', '$ntail'") if $atail or $ntail; &equivWord($tok, $savbase, {group=> 'GERUND', atail => $atail})} elsif($savbase = &is_pastverb($base, 1)){ &equivWord($tok, $savbase, {group=> 'VERB', # Even if the baseform is NVRB. atail => $atail, verbtense => 'PAST'})} elsif(&is_prefixed_verb($base, 1)){ warn_once('GroupOf2.b', "Why wasn't $base of $tok found earlier?")} elsif($savbase = &is_ize_verb($base, 1)){ &driveBase($savbase, {checkPrefix => 0}); # usually a no-op, but every once in a while... &equivWord($tok, $savbase, {group=> 'VERB', # Even if the baseform is NVRB. atail => $atail})} elsif($sing = &is_3rdpersonverb($base, 1)){ # BOXES Could be either the plural or the 3rd person sing. &equivWord($tok, $sing, {s_ending => 1, atail => $atail})} elsif($savbase = &is_derived_noun($base, 1)){ # '-TION' words etc., REFUSAL, REBUTTAL, GOODNESS, BUSINESS &equivWord($tok, $savbase, {group => 'NOUN', # A few NVRBS like POSITION and CONDITION will be called out explicitly. atail => $atail}, {no_use =>[qw(actorform)]})} elsif($savbase = &is_derived_adj($base, 1)){ &equivWord($tok, $savbase, {group => 'ADJECTIVE', atail => $atail}, {no_use =>[qw(actorform)]})} elsif($savbase = &is_adverb($base, 1)){ &equivWord($tok, $savbase, {group => 'ADVB', atail => $atail})} elsif($types{$base}){ # presumably this token will get gobbled warn_once('GroupOf2.c', # by one of the neighboring tokens. "Why does modifier $base have a tail $atail") if $atail; &baseInscript($tok, {group => 'MODIFIER', root => $tok})} elsif($sing = &singularOfNoun($tok, 1)){ &equivWord($tok, $sing, {plural=>1})} elsif($base =~ /^[a-z,A-Z]$/){ &baseInscript($tok, {group => 'LETTER', root => $tok})} # if $checkPrefix is false, then we've already stripped a prefix off the tok, so it can't be a roman numeral elsif($checkPrefix and (&number_p($tok) or &roman_number_p($tok))){ &baseInscript($tok, {group => 'NUMBER', root => $tok})} elsif(is_alphnum($base)){ &baseInscript($tok, {group => 'ALPHNUM', root => $tok})} elsif($base =~ /^$punctuation_tokens$/){ &baseInscript($tok, {group => 'PUNCTUATION', root => $tok})} # So COUNTERDOFFABLE gets tested as DOFFABLE and returns ADJECTIVE elsif($checkPrefix and $unprefBase and # Set in an earlier test, above $savgrp = &group_of($unprefBase, 1) and # This time, try to derive a group for the base. &member($metagroup{$savgrp}, 'NVRB', 'NOUN','VERB','ADJECTIVE','ADVB', 'GERUND')){} else{ &baseInscript($tok, {group => 'UNKNOWN', root => $tok})}; $wordCache{$tok}}