When i run this
i get this#!/usr/bin/perl use strict; use warnings; #use WWW::Mechanize ; #my $mech = WWW::Mechanize->new(); my $txt_url ; my $pdf_url ; my $html_url ; my $output ; my @output ; #my $file_content = "D:/Perl/Perl output/garbage_content.csv" ; my $file_content = "1189436input.txt" ; # my $get_file = "http://securities.stanford.edu/filings-case.html? +id=101092"; my $filename = "file_output_101092" ; # my $response = $mech->get($get_file, # ':content_file'=> $file_content,); open my $fh, '<', $file_content or die $?; my $i =1 ; while (<$fh>) { chomp; $txt_url=''; $pdf_url=''; $html_url=''; if(index($_,".txt")>=0) { $txt_url = $_ ; }else { if(index($_,".pdf")>=0) { $pdf_url = $_ ; }else { if(index($_,".html")>=0) { $html_url = $_ ; } } } if (($txt_url)|($pdf_url)|($html_url)) { $output = join ",", $filename, $txt_url, $pdf_url, $html_u +rl ; push @output, ($output) ; } } close $fh ; foreach (@output) { print "$_\n" ; }
I assume that the few "duplicates" there are from multiple references in the page or "#" referencesfile_output_101092,,, <a href="index.html">Home</a> file_output_101092,,, <a href="filings.html">Filings Da +tabase</a> file_output_101092,,, <a href="resources.html">Resource +s</a> file_output_101092,,, <a href="litigation-activity-indi +ces.html">Litigation Activity Indices</a> file_output_101092,,, <a href="clearinghouse-research.h +tml">Clearinghouse Research</a> file_output_101092,,, <a href="about-the-scac.html">Abo +ut</a> file_output_101092,,, <!-- <div style="float: left;positi +on: relative;top: 5px;"><a href="filings.html">Browse Filings Databas +e</a></div> --> file_output_101092,,, <!-- <div style="float: left;positi +on: relative;top: 5px;"><a href="filings.html">Browse Filings Databas +e</a></div> --> file_output_101092,,, + <tr class +="table-link" onclick="window.location='filings-documents/1010/ALTSE9 +8/001.html'" target="_blank"> file_output_101092, + <tr class="table-link" onclick="window.location='filings-document +s/1010/ALTSE98/000.txt'" target="_blank">,, file_output_101092,, + <tr class +="table-link" onclick="window.location='filings-documents/1010/ALTSE9 +8/19981029_r04c_9800528.pdf'" target="_blank">, file_output_101092,, + <tr class="table-link" onclick="window.location='filings-document +s/1010/ALTSE98/1999730_r07o_98CV00528.pdf'" target="_blank">, file_output_101092,, + <tr class="table-link" onclick="window.location='filings-document +s/1010/ALTSE98/1999830_r04c_98CV00528.pdf'" target="_blank">, file_output_101092,, + <tr class="table-link" onclick="window.location='filings-document +s/1010/ALTSE98/2002726_r03k_9800528.pdf'" target="_blank">, file_output_101092,,, <div class="span12"><p><a href="filings-case-p +ages-url-map.html" style="color: #ffffff"><strong>Cases</strong></a>< +/p></div> file_output_101092,,, <legend style="margin +-left: 0px;margin-bottom: 0px"><a href="about-the-scac.html" style="c +olor: #565656">About</a></legend> file_output_101092,,, <dd><a + href="about-the-scac.html#about" style="color: #151616">About Us</a> +</dd> file_output_101092,,, <dd><a + href="about-the-scac.html#methodology" style="color: #151616">Method +ology</a></dd> file_output_101092,,, <dd><a + href="about-the-scac.html#faq" style="color: #151616">FAQ</a></dd> file_output_101092,,, <dd st +yle="color: #565656"><a href="about-the-scac.html#sponsors" style="co +lor: #151616">Sponsors & Partners</a></dd> file_output_101092,,, <dd><a + href="about-the-scac.html#register" style="color: #151616">Register< +/a></dd> file_output_101092,,, <dd><a + href="about-the-scac.html#contacts" style="color: #151616">Contact U +s</a></dd> file_output_101092,,, <dd><a + href="about-the-scac.html#legal" style="color: #151616">Legal Notice +s</a></dd> file_output_101092,,, <!-- < +dd><a href="about-the-scac.html#sitemap" style="color: #151616">Site +Map</a></dd> -->
In reply to Re^5: Why does this code continue to loop?
by huck
in thread Why does this code continue to loop?
by rachard11
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |