#!/usr/bin/perl use strict; use warnings; #use WWW::Mechanize ; #my $mech = WWW::Mechanize->new(); my $txt_url ; my $pdf_url ; my $html_url ; my $output ; my @output ; #my $file_content = "D:/Perl/Perl output/garbage_content.csv" ; my $file_content = "1189436input.txt" ; # my $get_file = "http://securities.stanford.edu/filings-case.html?id=101092"; my $filename = "file_output_101092" ; # my $response = $mech->get($get_file, # ':content_file'=> $file_content,); open my $fh, '<', $file_content or die $?; my $i =1 ; while (<$fh>) { chomp; $txt_url=''; $pdf_url=''; $html_url=''; if(index($_,".txt")>=0) { $txt_url = $_ ; }else { if(index($_,".pdf")>=0) { $pdf_url = $_ ; }else { if(index($_,".html")>=0) { $html_url = $_ ; } } } if (($txt_url)|($pdf_url)|($html_url)) { $output = join ",", $filename, $txt_url, $pdf_url, $html_url ; push @output, ($output) ; } } close $fh ; foreach (@output) { print "$_\n" ; } #### file_output_101092,,, Home file_output_101092,,, Filings Database file_output_101092,,, Resources file_output_101092,,, Litigation Activity Indices file_output_101092,,, Clearinghouse Research file_output_101092,,, About file_output_101092,,, file_output_101092,,, file_output_101092,,, file_output_101092, ,, file_output_101092,, , file_output_101092,, , file_output_101092,, , file_output_101092,, , file_output_101092,,,

Cases

file_output_101092,,, About file_output_101092,,,
About Us
file_output_101092,,,
Methodology
file_output_101092,,,
FAQ
file_output_101092,,,
Sponsors & Partners
file_output_101092,,,
Register
file_output_101092,,,
Contact Us
file_output_101092,,,
Legal Notices
file_output_101092,,,