arvin has asked for the wisdom of the Perl Monks concerning the following question:

I am trying to write a perl script that will log into a site and download the latest file. So far, with the script I have written I can login but cannot download. Below is the source code of the site connecting too and then my script

Thank you in advance !

Source Code

<head><script type="text/javascript" src="/CFIDE/scripts/cfform.js"></ +script> <script type="text/javascript" src="/CFIDE/scripts/masks.js"></script> <title>TNSC Home Page</title> <link rel="stylesheet" href="tnsc.css" type="text/css"> <style Type="text/css"> <!-- a { text-decoration: underline; color: blue; font-size: 12pt;} //--> </style> <script type="text/javascript"> <!-- _CF_checkCFForm_1 = function(_CF_this) { //reset on submit _CF_error_exists = false; _CF_error_messages = new Array(); _CF_error_fields = new Object(); _CF_FirstErrorField = null; //display error messages and return success if( _CF_error_exists ) { if( _CF_error_messages.length > 0 ) { // show alert() message _CF_onErrorAlert(_CF_error_messages); // set focus to first form error, if the field support +s js focus(). if( _CF_this[_CF_FirstErrorField].type == "text" ) { _CF_this[_CF_FirstErrorField].focus(); } } return false; }else { return true; } } //--> </script> </head><body> <table width = '700px' cellspacing="0" cellpadding="0" border="0" alig +n = 'center'> <tr> <td width = '600px' align ='left'> <div id="content"> <div "header_logo"><img src="images/logo.gif" width="147" heig +ht="30" alt="iconectiv"></div><P>&nbsp;</P> <form name="CFForm_1" id="CFForm_1" action="download.cfm" meth +od="post" onsubmit="return _CF_checkCFForm_1(this)"> <p>&nbsp;</P> <hr noshade><center> <b>Files for Download</b> </center><hr noshade> <table width = "90%" border = "0" align = "center" cellpad +ding = "3" cellspacing = "0"> <tr> <th width = "40%" align = "left">File</th> <th width = "30%" align = "left">Size</th> <th width = "30%" align = "left">Date</th> </tr> <tr bgcolor ="#ffffff"> <td><a href = "fdown.cfm?flname=NDCACT_Interim_201 +30403.zip&prod=GDD">NDCACT_Interim_20130403.zip</a></td> <td> 1,425 KB</td> <td>04/03/13</td> </tr> <tr bgcolor ="#cccccc"> <td><a href = "fdown.cfm?flname=NDCACT_Interim_201 +30326.zip&prod=GDD">NDCACT_Interim_20130326.zip</a></td> <td> 590 KB</td> <td>03/26/13</td> </tr> <tr bgcolor ="#ffffff"> <td><a href = "fdown.cfm?flname=NDCACT_Interim_201 +30322.zip&prod=GDD">NDCACT_Interim_20130322.zip</a></td> <td> 473 KB</td> <td>03/22/13</td> </tr> <tr bgcolor ="#cccccc"> <td><a href = "fdown.cfm?flname=NDCACT_Interim_201 +30320.zip&prod=GDD">NDCACT_Interim_20130320.zip</a></td> <td> 3,263 KB</td> <td>03/20/13</td> </tr> <tr bgcolor ="#ffffff"> <td><a href = "fdown.cfm?flname=GDDS0313.zip&prod= +GDD">GDDS0313.zip</a></td> <td>79,343,310 KB</td> <td>03/08/13</td> </tr> <tr bgcolor ="#cccccc"> <td><a href = "fdown.cfm?flname=GDDS_0213.zip&prod +=GDD">GDDS_0213.zip</a></td> <td>47,531,855 KB</td> <td>02/08/13</td> </tr> <tr bgcolor ="#ffffff"> <td><a href = "fdown.cfm?flname=GDDS_0113.zip&prod +=GDD">GDDS_0113.zip</a></td> <td>48,008,616 KB</td> <td>01/11/13</td> </tr> </table> <p>&nbsp;</P> <hr noshade> <p>&nbsp;</P> <center><a href = "index.cfm">Main Menu</a></center> </form>

My Script

#!/usr/local/bin/perl use strict; use warnings; use LWP; use Crypt::SSLeay; use HTTP::Cookies; use WWW::Mechanize; use HTML::Parser; use HTTP::Request; use HTML::TableExtract; use HTML::Query; use HTML::Form; use File::Path qw{mkpath}; $ENV{'PERL_LWP_SSL_VERIFY_HOSTNAME'} = 0; my $HOME = $ENV{HOME} . '/tmp'; mkpath $HOME unless -d $HOME; # create a cookie jar on disk my $cookies = HTTP::Cookies->new( file => $HOME.'/cookies.txt', autosave => 1, ); # create an user-agent and assign the cookie jar to it my $http = LWP::UserAgent->new(); $http->cookie_jar($cookies); # Get Main page my $browser = LWP::UserAgent->new; my $response = $browser->post( 'https://prism.telcordia.com/tnsc/index.cfm', [ 'LOGIN' => 'GDD',] ); # check if log in succeeded if($response->is_success){ print "- Main page passed"."\n"; print "- try to log on now"."\n"; } # try to log in my $login = $http->post( 'https://prism.telcordia.com/tnsc/login.cfm', [ userName => 'XXXX', password => 'XXXX', prod => 'GDD', LOGIN => 'action',] ); # check if log in succeeded if($login->is_success){ print "- logged in successfully"."\n"; print "- requesting file, might take a while"."\n"; } # Get to download page my $url = 'https://prism.telcordia.com/tnsc/confirm_login.cfm'; $browser = LWP::UserAgent->new; $response = $browser->get($url); # check if log in succeeded if($response->is_success){ print "- Download page passed"."\n"; print "- try to download now"."\n"; } # make request to download the file # my $url = 'https://prism.telcordia.com/tnsc/confirm_login.c +fm'; #my $file_req = HTTP::Request->new('GET', $url); #my $get_file = $http->request( $file_req ); my $te = HTML::TableExtract->new( headers => [qw(File Size Date)] +); $te->parse($url); # Examine all matching tables foreach my $ts ($te->tables) { print "Table (", join(',', $ts->coords), "):\n"; foreach my $row ($ts->rows) { print join(',', @$row), "\n"; } } # check request status if($get_file->is_success){ print "--> downloaded $url, saving it to file"; # save the file content to disk open my $fh, '>', $HOME.'/*.zip' or die "ERROR: $!n"; print $fh $get_file->decoded_content; close $fh; print "saved file"."\n"; print "------------"."\n"; print "filename: ".$get_file->filename."\n"; print "size: ".(-s $HOME.'/*.zip')."\n"; } else { die "ERROR: download of $url failed: " . $get_file->status_lin +e . "\n"; } } else { die "ERROR: login failed: " .$login->status_line . "\n"; }

Replies are listed 'Best First'.
Re: Download files from website
by LanX (Saint) on Apr 04, 2013 at 18:18 UTC

    this page is full of JavaScript, I suppose you need to look at WWW::Mechanize::Firefox or Selenium for web-browser automation.

    and please use <readmore>-tags when posting such long chunks of code.

    Cheers Rolf

    ( addicted to the Perl Programming Language)