The main problem is that the script is only finding the first match (pam anderson). I started out just trying to match in the first text document, in the table row, but I actually want to record all the information between the text documents, so my log reads:
Pam Anderson|Stacked|nc|#1|images/hof.gif|295
Paris Hilton|Nicole Out|nc|#2|images/hof.gif|65
#!/perl/bin/perl use LWP::UserAgent; use LWP::Simple; $getURL = "http://localhost/~owner/50.htm"; $script_name = "Fetcher"; print "Content-type: text/html\n\n"; $page_data; $content_type; $request; $response; $fiftypath = "data/fifty.dat"; $ua = LWP::UserAgent->new; $ua->timeout(10); $ua->agent("$script_name"); $request = HTTP::Request->new('GET',$getURL); $response = $ua->request($request); #@page_data = $response->content; @page_data = <DATA>; foreach $line (@page_data) { if ($line =~ /class=\"fifty\">([^<]+)<\/a>/) { open (LOG,">>$fiftypath"); flock(LOG, 2); print LOG "$1"; flock(LOG, 8); close(LOG); print "$1"; } } __DATA__ <table> <tr> <td valign="middle"><b><font size="2">1</font></b> </td> <td valign="middle"><a href="http://localhost/~owner/pam.htm" class="t +itle">Pam Anderson</a><br>Stacked</td> <td align="center" width="50">nc</td> <td align="center" width="50">#1</td> <td valign="middle" width="50%"><img src="images/hof.gif" width="15" h +eight="14" border="0" hspace="2"></td> <td valign="middle" width="50%">295</td> </tr> <tr> <td valign="middle"><b><font size="2">1</font></b> </td> <td valign="middle"><a href="http://localhost/~owner/paris.htm" class= +"title">Paris Hilton</a><br>Nicole Out</td> <td align="center" width="50">nc</td> <td align="center" width="50">#2</td> <td valign="middle" width="50%"><img src="images/hof.gif" width="15" h +eight="14" border="0" hspace="2"></td> <td valign="middle" width="50%">65</td> </tr> </table>
In reply to Searching HTML by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |