mcoblentz has asked for the wisdom of the Perl Monks concerning the following question:
#!/usr/bin/perl -w # based on: extract-table.pl,v 24.1 2006/10/21 01:19:37 from Raman @ K +oders.com # Accepts a URI and table spec; returns a csv file use strict; use FileHandle; use LWP::UserAgent; use HTML::TableExtract; #use IO::File; use Getopt::Long; use WWW::Mechanize; use vars qw (%options); my ($url, $file, $task, $depth, $count, $cols); my %options = (task => \$task, url => \$url, file => \$file, depth => \$depth, count => \$count, headers => \$cols); GetOptions (\%options, 'file=s', 'url=s', 'task=s', 'depth=i', 'count=i', 'headers=s'); # get the data from the web. Typically this is http://www.sailwx.info +/shiptrack/cruiseships.phtml # either pass this in as --url <page_url> when invoking or just set it +. $cols = "Ship,'last reported (UTC)',position,Callsign"; $url = "http://www.sailwx.info/shiptrack/cruiseships.phtml"; my $input; my $output = new OUTFILE ('>C:\Program Files\cron\Cruise Ships\ship_da +ta.csv'); open (OUTFILE, '>C:\Program Files\cron\Cruise Ships\ship_data.csv'); my $m = WWW::Mechanize->new(); $m->get($url); $input = $m->content; print (OUTFILE $input); my $te; if ( defined ($cols)) { my @headers = split(',', $cols); $te = new HTML::TableExtract(headers=>\@headers); } else { $te = new HTML::TableExtract( depth => $depth, count=>$count); } $te->parse_file($input); my ($ts,$row); foreach $ts ($te->table_states) { foreach $row ($ts->rows) { $output->print ( join(',', @$row), "\n"); } } close (OUTFILE); if (defined ($url)) { unlink ($input); }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: File open problem with "GLOB"
by ikegami (Patriarch) on Mar 15, 2008 at 22:34 UTC | |
by mcoblentz (Scribe) on Mar 16, 2008 at 06:36 UTC | |
|
Re: File open problem with "GLOB"
by Narveson (Chaplain) on Mar 15, 2008 at 22:27 UTC | |
by ikegami (Patriarch) on Mar 15, 2008 at 22:36 UTC | |
by chromatic (Archbishop) on Mar 16, 2008 at 01:05 UTC | |
by ikegami (Patriarch) on Mar 16, 2008 at 01:43 UTC | |
by chromatic (Archbishop) on Mar 16, 2008 at 06:57 UTC | |
| |
by Narveson (Chaplain) on Mar 16, 2008 at 01:59 UTC | |
by Corion (Patriarch) on Mar 16, 2008 at 09:17 UTC | |
by kyle (Abbot) on Mar 16, 2008 at 19:37 UTC | |
by Narveson (Chaplain) on Mar 16, 2008 at 22:30 UTC | |
|
Re: File open problem with "GLOB"
by Cody Pendant (Prior) on Mar 16, 2008 at 04:32 UTC | |
by mcoblentz (Scribe) on Mar 16, 2008 at 05:45 UTC | |
|
Re: File open problem with "GLOB"
by ikegami (Patriarch) on Mar 16, 2008 at 04:38 UTC | |
by mcoblentz (Scribe) on Mar 16, 2008 at 06:33 UTC | |
by ikegami (Patriarch) on Mar 16, 2008 at 06:56 UTC |