<tr bgcolor="#cccccc">
<th align="center"><font size="2" color="#330066" FACE="Garmond,Helvet
+ica,Times">1</font></th><td align="left"><a href="http://securities.s
+tanford.edu/1014/TCHC00"><font size="2" color="#330066" FACE="Garmond
+,Helvetica,Times">21st Century Holding Co.</font></a></td><td align="
+left"><font size="2" color="#330066" FACE="Garmond,Helvetica,Times"><
+a href="http://biz.yahoo.com/p/T/TCHC.html">NASDAQ</a></font></td><td
+ align="left"><font size="2" color="#330066" FACE="Garmond,Helvetica,
+Times"><a href="http://finance.yahoo.com/q?s=TCHC&d=t">TCHC</a></font
+></td><td align="left"><font size="2" color="#330066" FACE="Garmond,H
+elvetica,Times">06/27/2000</font></td><td align="left"><font size="2"
+ color="#330066" FACE="Garmond,Helvetica,Times">S.D. New York</font><
+/td>
</tr>
<tr>
<th align="center"><font size="2" color="#330066" FACE="Garmond,Helvet
+ica,Times">2</font></th><td align="left"><a href="http://securities.s
+tanford.edu/1009/TMRT99"><font size="2" color="#330066" FACE="Garmond
+,Helvetica,Times">2TheMart.com</font></a></td><td align="left"><font
+size="2" color="#330066" FACE="Garmond,Helvetica,Times"><a href="http
+://biz.yahoo.com/p/T/TMRT.html">OTC-BB</a></font></td><td align="left
+"><font size="2" color="#330066" FACE="Garmond,Helvetica,Times"><a hr
+ef="http://finance.yahoo.com/q?s=TMRT&d=t">TMRT</a></font></td><td al
+ign="left"><font size="2" color="#330066" FACE="Garmond,Helvetica,Tim
+es">09/13/1999</font></td><td align="left"><font size="2" color="#330
+066" FACE="Garmond,Helvetica,Times">C.D. California</font></td>
</tr>
<tr bgcolor="#cccccc">
<th align="center"><font size="2" color="#330066" FACE="Garmond,Helvet
+ica,Times">3</font></th><td align="left"><a href="http://securities.s
+tanford.edu/1024/SIXQPK02-01"><font size="2" color="#330066" FACE="Ga
+rmond,Helvetica,Times">360Networks, Inc.</font></a></td><td align="le
+ft"><font size="2" color="#330066" FACE="Garmond,Helvetica,Times"><a
+href="http://biz.yahoo.com/p/T/TSIXQ.html">OTC-BB</a></font></td><td
+align="left"><font size="2" color="#330066" FACE="Garmond,Helvetica,T
+imes"><a href="http://finance.yahoo.com/q?s=TSIXQ&d=t">TSIXQ</a></fon
+t></td><td align="left"><font size="2" color="#330066" FACE="Garmond,
+Helvetica,Times">06/21/2002</font></td><td align="left"><font size="2
+" color="#330066" FACE="Garmond,Helvetica,Times">S.D. New York</font>
+</td>
</tr>
<tr>
<th align="center"><font size="2" color="#330066" FACE="Garmond,Helvet
+ica,Times">4</font></th><td align="left"><a href="http://securities.s
+tanford.edu/1009/COMS97"><font size="2" color="#330066" FACE="Garmond
+,Helvetica,Times">3Com Corporation 97</font></a></td><td align="left"
+><font size="2" color="#330066" FACE="Garmond,Helvetica,Times"><a hre
+f="http://biz.yahoo.com/p/C/COMS.html">NASDAQ</a></font></td><td alig
+n="left"><font size="2" color="#330066" FACE="Garmond,Helvetica,Times
+"><a href="http://finance.yahoo.com/q?s=COMS&d=t">COMS</a></font></td
+><td align="left"><font size="2" color="#330066" FACE="Garmond,Helvet
+ica,Times">12/05/1997</font></td><td align="left"><font size="2" colo
+r="#330066" FACE="Garmond,Helvetica,Times">N.D. California</font></td
+>
</tr>
Code:
use LWP;
use HTML::LinkExtor;
use utf8;
# The mother function
sub GetLink {
my $thelink = $_[0];
my $theagent = LWP::UserAgent->new;
my $therequest = HTTP::Request->new(GET => $thelink);
my $theresponse = $theagent->request($therequest);
$theresponse->is_success or die "$thelink: ",$theresponse->message,"\n
+";
my $thearesp = $theresponse->content;
return $thearesp;
}
# Retrieve each entry into the Stanford database
# Read in a preferences and/or file with a firm code (ticker, permno,
+etc..)
my ($theFirmObs, $theFile);
my $line = "";
my %firmArray;
open FIRMS, "<firms.htm" or die "The master list file didn't open";
# Use this to unpack? file and strip all but href
my $tempIndex = 0;
while ($line = <FIRMS>)
{
# LAYOUT NEEDED VARIABLES
my ($ticker, $name, $court, $docketNumber) = "";
my ($dateFiled, $classStart, $classEnd, $plaintiffFirms) = 0;
my ($tenB5, $SEA1933, $SEA1934) = 0;
my ($settle, $insurance, $fAndM) = 0;
# FIND AND HIT EACH URL
if ($line =~ m%(http://securities.stanford.edu/\w*/\w*\-?\w*\-?\w*
+)%) {
$tempIndex++;
$theFirmObs = $1;
$theFile = &GetLink($theFirmObs); # Use the mother function
print "$tempIndex: $theFirmObs\n";# DEBUG
# Kill the header and left column to make it easier?
# Output file: including name based on code given, date, etc. name
+ it .htm
open FILETEMP, "+>" . "$tempIndex.htm" or die "The outfile did
+n't work";
print FILETEMP $theFile;
open FILETEMP2, "+>" . "$tempIndex" . "z.htm" or die "The seco
+nd outfile didn't work";
if ($theFile =~ m%Conclusion:%) {
print "went in A\n";
# print FILETEMP2 "conclusion ";
}
if ($theFile =~ m%Summary:%) {
print "went in B\n";
# print FILETEMP2 "Summary ";
}
if ($theFile =~ m%the%) {
print "went in 1\n";
# print FILETEMP2 "$tempIndex: the\n";
}
if ($tempIndex > 3) {die "done now.\n";}
}
}
|