#!/usr/bin/perl -w
# based on: extract-table.pl,v 24.1 2006/10/21 01:19:37 from Raman @ Koders.com
# Accepts a URI and table spec; returns a csv file
use strict;
use FileHandle;
use LWP::UserAgent;
use HTML::TableExtract;
#use IO::File;
use Getopt::Long;
use WWW::Mechanize;
use vars qw (%options);
my ($url, $file, $task, $depth, $count, $cols);

my %options = (task => \$task,
               url => \$url,
               file => \$file,
               depth => \$depth,
               count => \$count,
               headers => \$cols);
GetOptions (\%options,
            'file=s',
            'url=s',
            'task=s',
            'depth=i',
            'count=i',
            'headers=s');

# get the data from the web.  Typically this is http://www.sailwx.info/shiptrack/cruiseships.phtml
# either pass this in as --url <page_url> when invoking or just set it.

$cols = "Ship,'last reported (UTC)',position,Callsign";
$url = "http://www.sailwx.info/shiptrack/cruiseships.phtml";

my $input;
my $output = new OUTFILE ('>C:\Program Files\cron\Cruise Ships\ship_data.csv');
open (OUTFILE, '>C:\Program Files\cron\Cruise Ships\ship_data.csv');

my $m = WWW::Mechanize->new();
   $m->get($url);
   $input = $m->content;
print (OUTFILE $input); 

my $te;
if ( defined ($cols)) 
{
  my @headers = split(',', $cols);
  $te = new HTML::TableExtract(headers=>\@headers);
} else 
   {
    $te = new HTML::TableExtract( depth => $depth, count=>$count); 
   }
$te->parse_file($input);
 

my ($ts,$row);
foreach $ts ($te->table_states) 
{
   foreach $row ($ts->rows) 
   {
      $output->print ( join(',', @$row), "\n");
   }
}

close (OUTFILE);

if (defined ($url)) {
  unlink ($input);
}