#!/usr/bin/perl -w # based on: extract-table.pl,v 24.1 2006/10/21 01:19:37 from Raman @ Koders.com # Accepts a URI and table spec; returns a csv file use strict; use FileHandle; use LWP::UserAgent; use HTML::TableExtract; #use IO::File; use Getopt::Long; use WWW::Mechanize; use vars qw (%options); my ($url, $file, $task, $depth, $count, $cols); my %options = (task => \$task, url => \$url, file => \$file, depth => \$depth, count => \$count, headers => \$cols); GetOptions (\%options, 'file=s', 'url=s', 'task=s', 'depth=i', 'count=i', 'headers=s'); # get the data from the web. Typically this is http://www.sailwx.info/shiptrack/cruiseships.phtml # either pass this in as --url when invoking or just set it. $cols = "Ship,'last reported (UTC)',position,Callsign"; $url = "http://www.sailwx.info/shiptrack/cruiseships.phtml"; my $input; my $output = new OUTFILE ('>C:\Program Files\cron\Cruise Ships\ship_data.csv'); open (OUTFILE, '>C:\Program Files\cron\Cruise Ships\ship_data.csv'); my $m = WWW::Mechanize->new(); $m->get($url); $input = $m->content; print (OUTFILE $input); my $te; if ( defined ($cols)) { my @headers = split(',', $cols); $te = new HTML::TableExtract(headers=>\@headers); } else { $te = new HTML::TableExtract( depth => $depth, count=>$count); } $te->parse_file($input); my ($ts,$row); foreach $ts ($te->table_states) { foreach $row ($ts->rows) { $output->print ( join(',', @$row), "\n"); } } close (OUTFILE); if (defined ($url)) { unlink ($input); }