#!/usr/bin/perl use strict; use warnings; #use WWW::Mechanize ; #my $mech = WWW::Mechanize->new(); my $txt_url ; my $pdf_url ; my $html_url ; my $output ; my @output ; #my $file_content = "D:/Perl/Perl output/garbage_content.csv" ; my $file_content = "1189436input.txt" ; # my $get_file = "http://securities.stanford.edu/filings-case.html?id=101092"; my $filename = "file_output_101092" ; # my $response = $mech->get($get_file, # ':content_file'=> $file_content,); open my $fh, '<', $file_content or die $?; my $i =1 ; while (<$fh>) { chomp; $txt_url=''; $pdf_url=''; $html_url=''; if(index($_,".txt")>=0) { $txt_url = $_ ; }else { if(index($_,".pdf")>=0) { $pdf_url = $_ ; }else { if(index($_,".html")>=0) { $html_url = $_ ; } } } if (($txt_url)|($pdf_url)|($html_url)) { $output = join ",", $filename, $txt_url, $pdf_url, $html_url ; push @output, ($output) ; } } close $fh ; foreach (@output) { print "$_\n" ; } ##

##

file_output_101092,,,                Home
file_output_101092,,,                Filings Database
file_output_101092,,,                Resources
file_output_101092,,,                Litigation Activity Indices
file_output_101092,,,                Clearinghouse  Research
file_output_101092,,,                About
file_output_101092,,,              
file_output_101092,,,              
file_output_101092,,,                                                                                                             
file_output_101092,                                                       ,,
file_output_101092,,                                                                                                              ,
file_output_101092,,                                                      ,
file_output_101092,,                                                      ,
file_output_101092,,                                                      ,
file_output_101092,,,   Cases
file_output_101092,,,                           About
file_output_101092,,,                                           About Us
file_output_101092,,,                                           Methodology
file_output_101092,,,                                           FAQ
file_output_101092,,,                                           Sponsors & Partners
file_output_101092,,,                                           Register
file_output_101092,,,                                           Contact Us
file_output_101092,,,                                           Legal Notices
file_output_101092,,,