#!/usr/bin/perl use strict; use warnings; use WWW::Mechanize; use Data::Dumper; use HTML::TableExtract; use XML::FeedPP; use UTF8; # initialize my $cols; my $url; my $depth; my $count; my $data; my $tracking_code; my $location; my $job_title; my $date_posted; my $out_fh; # get the data from the web. Typically this is: # https://commvault.silkroad.com/epostings/index.cfm?fuseaction=app.jobsearch# # Either pass this in as --url when invoking or just set it. $cols = 'tracking_code,job_title,location,date_posted'; $url = "https://commvault.silkroad.com/epostings/index.cfm?fuseaction=app.jobsearch"; my $input; my $directory = "/Users/coblem/testing/"; my $outfile = "cvlt_jobs.csv"; open( $out_fh, '>', $ directory . $outfile) or die("Unable to create output file \"$out_fh\": $!\n"); my $m = WWW::Mechanize->new(); $m->get($url); $input = $m->content; my $te; if ( defined ($cols)) { print ("columns ", $cols, "\n"); my @headers = split(/,/, $cols); # $te = HTML::TableExtract->new( attribs => { border => 1 } ); $te = HTML::TableExtract->new( headers => [qw( tracking_code job_title location date_posted )] ) or die qq{$!}; print Dumper($te); } else { $te = new HTML::TableExtract( depth => $depth, count=>$count); }; $te->parse($input); foreach my $row ($te->rows) { $tracking_code = $ { $row }[0]; $job_title = $ { $row }[1]; $location = $ { $row }[2]; $date_posted = $ { $row }[3]; print "positions: $tracking_code $job_title $location $date_posted \n"; }