Here is the code. I m reading zipcodes from a spreadsheet and scrapping data related to that zipcode from a website.
use strict;
use warnings;
use Spreadsheet::ParseExcel;
use WWW::Mechanize;
use HTML::Form;
#Variable declaration
my ( $zicodes_file , $website , $xls_parser , $xls_workbook ,
$xls_worksheet, $row_min , $row_max , $col_min , $col_max , @zip_
+codes ,
$mech , @zip_inputs , $input , @dealer_links );
#Variable Initialization
$zicodes_file = shift
or die "1st Cmd Param(zipcodes spreadsheet) Missing..Exi
+ting!!";
$website = shift
or die "2nd Cmd Param(honda dealer website) Missing..Exi
+ting!!";
#STEP - 1 - Read in zipcodes from Zipcodes Spreadsheet
$xls_parser = Spreadsheet::ParseExcel->new();
$xls_workbook = $xls_parser->parse( $zicodes_file );
die $xls_parser->error(), ".\n" if ( !defined $xls_workbook );
##ZipCodes are in 2nd worksheet
$xls_worksheet = $xls_workbook->worksheet(1);
( $row_min, $row_max ) = $xls_worksheet->row_range();
( $col_min, $col_max ) = $xls_worksheet->col_range();
for my $row ( 1 .. $row_max ) {
my $col = 0; #Zipcodes are in first column
my $cell = $xls_worksheet->get_cell( $row, $col );
next unless $cell;
push @zip_codes,$cell->value();
}
#STEP - 2 - Read in related data for zipcodes using the website
$mech = WWW::Mechanize->new();
$mech->get( $website );
die "Could not fetch $website ",
$mech->status," \n" if ( !$mech->success );
$mech->form_name( 'searchdealer' );
@zip_inputs = $mech->find_all_inputs(
type => 'text',
id => 'searchform_txt_zip',
);
#testing with only one zip code
$input = $zip_inputs[0];
$input->value( $zip_codes[ 0 ] );
$mech->submit;
@dealer_links = $mech->find_all_links(
url_regex => qr/results.+?dealer\=\d+$/i,
);
$mech->get( $dealer_links[0]->[0] );
print $mech->content;
I don't see any method in Mechanize that can help me parse the data in div html tags. |