121st Century Holding Co.NASDAQTCHC06/27/2000S.D. New York 22TheMart.comOTC-BBTMRT09/13/1999C.D. California 3360Networks, Inc.OTC-BBTSIXQ06/21/2002S.D. New York 43Com Corporation 97NASDAQCOMS12/05/1997N.D. California #### use LWP; use HTML::LinkExtor; use utf8; # The mother function sub GetLink { my $thelink = $_[0]; my $theagent = LWP::UserAgent->new; my $therequest = HTTP::Request->new(GET => $thelink); my $theresponse = $theagent->request($therequest); $theresponse->is_success or die "$thelink: ",$theresponse->message,"\n"; my $thearesp = $theresponse->content; return $thearesp; } # Retrieve each entry into the Stanford database # Read in a preferences and/or file with a firm code (ticker, permno, etc..) my ($theFirmObs, $theFile); my $line = ""; my %firmArray; open FIRMS, ") { # LAYOUT NEEDED VARIABLES my ($ticker, $name, $court, $docketNumber) = ""; my ($dateFiled, $classStart, $classEnd, $plaintiffFirms) = 0; my ($tenB5, $SEA1933, $SEA1934) = 0; my ($settle, $insurance, $fAndM) = 0; # FIND AND HIT EACH URL if ($line =~ m%(http://securities.stanford.edu/\w*/\w*\-?\w*\-?\w*)%) { $tempIndex++; $theFirmObs = $1; $theFile = &GetLink($theFirmObs); # Use the mother function print "$tempIndex: $theFirmObs\n";# DEBUG # Kill the header and left column to make it easier? # Output file: including name based on code given, date, etc. name it .htm open FILETEMP, "+>" . "$tempIndex.htm" or die "The outfile didn't work"; print FILETEMP $theFile; open FILETEMP2, "+>" . "$tempIndex" . "z.htm" or die "The second outfile didn't work"; if ($theFile =~ m%Conclusion:%) { print "went in A\n"; # print FILETEMP2 "conclusion "; } if ($theFile =~ m%Summary:%) { print "went in B\n"; # print FILETEMP2 "Summary "; } if ($theFile =~ m%the%) { print "went in 1\n"; # print FILETEMP2 "$tempIndex: the\n"; } if ($tempIndex > 3) {die "done now.\n";} } }