#!/usr/bin/perl use strict; use warnings; use WWW::Mechanize; use Data::Dumper; use HTML::TableExtract; use HTML::TreeBuilder::XPath; use UTF8; # initialize my $cols; my $url; my $depth; my $count; my $data; my $ship; my @position; my @name; my $callsign; # get the data from the web. Typically this is: # http://www.sailwx.info/shiptrack/cruiseships.phtml # Either pass this in as --url when invoking or just set it. $cols = 'Ship,last reported (UTC),position,Callsign'; $url = "http://www.sailwx.info/shiptrack/cruiseships.phtml"; my $input; my $out_fn = 'C:\\Program Files\\cron\\Cruise Ships\\ship_data.csv'; open(my $out_fh, '>', $out_fn) or die("Unable to create output file \"$out_fn\": $!\n"); my $m = WWW::Mechanize->new(); $m->get($url); $input = $m->content; my $te; if ( defined ($cols)) { my @headers = split(/,/, $cols); $te = HTML::TableExtract->new( attribs => { border => 1 } ); $te = HTML::TableExtract->new( headers => [qw( Ship position last Callsign )] ) or die qq{$!}; } else { $te = new HTML::TableExtract( depth => $depth, count=>$count); }; $te->parse($input); foreach my $ship ($te->rows) { # extract name from row data using XPath my $tree = HTML::TreeBuilder::XPath->new_from_content($te); my @name = $tree->findvalues('//shipposition'); print $name[0], "\n"; # extract position from row data using XPath my @position = $tree->findvalues('//shiplocations'); print @position; # my $re = qr/([NS]?)\s*(\d+)(?:\D*)(\d*).*?,\s*([EW]?)\s*(\d+)(?:\D*)(\d*)/; # unless ($position =~ /$re/) { # die "unable to parse position\n"; # } # my $lat = $2 + $3/60; # my $long = $5 + $6/60; # if ($1 eq 'S') { $lat = -$lat; } # if ($4 eq 'W') { $long = -$long; } # return sprintf("%.2f,%.2f", $lat, $long); # $time = $ { $ship }[2]; # $callsign = $ { $ship }[3]; # print "positions: $position \n"; }