#!/usr/bin/perl use strict; use warnings; use LWP::Simple qw(get); my $url = "http://www.oceanoutlook.com.au/table/perth.html"; my $source = get($url); use HTML::Tree; my $tree = HTML::Tree->new(); $tree->parse($source); my (@tds) = $tree->look_down( '_tag', 'td' ); my @headers; my @data; my $month; my $i = 0; foreach (@tds) { $i++; next if ($i < 5); # The first 4 headers we dont need next if ($i == 16); # Bad HTML... skip it next if ($i == scalar(@tds)); # The last one we dont need if (($i >= 5) && ($i <= 15)) { # These are the headers we want my $header = $_->as_text; if ($i == 5) { $month = $_->as_text; $header = "Date"; } # end-if $header = "Swell Direction" if ($header eq "Dir"); push(@headers, $header); } # end-if if ($i > 16) { # This is the data push(@data, $_->as_text); } # end-if } # end-foreach # Output to xml use XML::Writer; my $writer = new XML::Writer(); $writer->startTag("doc", class => "simple"); $writer->dataElement( 'title', "Swell Forecast"); $writer->dataElement( 'month', $month); my $count = -1; for ($i=0;$istartTag( "prediction", date => $data[$i]); $writer->dataElement( 'title', $data[$i]); $i++; $count++; } # end-if if ($count == 2) { $writer->startTag( "swell_height" ); $writer->dataElement( 'unit', 'meters'); } # end-if if ($count == 5) { $writer->startTag( "peak_period" ); $writer->dataElement( 'unit', 'seconds'); } # end-if if ($count == 8) { $writer->startTag( "wind" ); $writer->dataElement( 'unit', 'knots'); } # end-if $writer->dataElement( 'item', $data[$i], name => $headers[$count]); if ($count == 4) { $writer->endTag( "swell_height" ); } # end-if if ($count == 7) { $writer->endTag( "peak_period" ); } # end-if if ($count == 10) { $writer->endTag( "wind" ); } # end-if if ($count == scalar(@headers)-1) { $writer->endTag(); $count = -1; } # end-if } # end-for $writer->endTag(); $writer->end(); exit;