#!/usr/bin/perl use warnings; use strict; use LWP::Simple; use HTML::TableExtract; use Text::CSV; my $html= get 'http://192.68.214.70/km/asps/schulsuche.asp?q=a&a=20'; $html =~ tr/\r//d; # strip carriage returns $html =~ s/ / /g; # expand spaces my $te = new HTML::TableExtract(); $te->parse($html); my @cols = qw( rownum number name phone type website ); my @fields = qw( rownum number name street postal town phone fax type website ); my $csv = Text::CSV->new({ binary => 1 }); foreach my $ts ($te->table_states) { foreach my $row ($ts->rows) { # trim leading/trailing whitespace from base fields s/^\s+//, s/\s+$// for @$row; # load the fields into the hash using a "hash slice" my %h; @h{@cols} = @$row; # derive some fields from base fields, again using a hash slice @h{qw/name street postal town/} = split /\n+/, $h{name}; @h{qw/phone fax/} = split /\n+/, $h{phone}; # trim leading/trailing whitespace from derived fields s/^\s+//, s/\s+$// for @h{qw/name street postal town/}; $csv->combine(@h{@fields}); print $csv->string, "\n"; } }