#!/usr/bin/perl use strict; use warnings; use open ':locale'; # tell perl to use the $LANG environment encoding for STDOUT/IN/ERR # check 'man 3 open' for details; but this is quite important use HTML::Entities; # used to decode HTML &; entities use Encode qw(decode); # used to decode utf8/iso into perls internal representation (which is utf8) my $data = { 'href' => 'http://www.accountancyage.com/accountancyage/news/2159769/kpmg-sets-retail-think-tank', 'teaser' => '

AccountancyAge.com, Accountancy Age, Thursday 6 July 2006 at 00:00:00

Firm forms partnership with retail research group

KPMG has launched the ‘Retail Think Tank’ (RTT) aimed at establishing ‘the true health and status\' of the retail sector. The Big Four firm has joined forces with retail research group...

Read the full article

', 'title' => "KPMG sets up retail \x{e2}\x{80}\x{98}think tank\x{e2}\x{80}\x{99}", }; my $html = $data->{teaser}; decode_entities($html); # because it's html, we need to do this first $html = decode('utf8',$html); # now 'parse' the utf8 my $title = $data->{title}; # this is 'raw' utf8; the \x{e2} sequences indicate this $title = decode('utf8',$title); # so just parse it print "** $title:\n"; print "$html\n"; #### #!/usr/bin/perl use warnings; use strict; use HTML::TreeBuilder; use Data::Dumper; my $tree = HTML::TreeBuilder->new_from_file('IDQ60606.shtml'); my @cellnames = qw( station time temperature dewpoint relhumidity deltat wind_dir speedkmh gustkmh speedknt gistknt pressure rain ); my $region; my %data; for my $row ($tree->look_down('_tag'=>'tr')) { my @cells = $row->look_down('_tag'=>'td'); print scalar @cells, "\n"; if(@cells==1) { $region = $cells[0]->as_trimmed_text; } if(@cells == @cellnames) { my %row; @row{@cellnames} = map { $_->as_trimmed_text} @cells; push @{$data{$region}} => \%row; } } print "$_\n" for keys %data; print Dumper \%data; while (my ($region,$data) = each %data) { my @data = @$data; my $raintotal; for (@data) { my $rain = $_->{rain}; $rain = 0 if $rain eq '-'; $raintotal+=$rain; } my $rainaverage = @data ? ($raintotal / @data) : undef; print "$region: $rainaverage\n"; } #### #!/usr/bin/perl use warnings; use strict; use HTML::TreeBuilder; my $tree = HTML::TreeBuilder->new_from_file('IDQ60606.shtml'); my @cells = $tree->look_down( '_tag' => 'td', 'class' => 'rowlevel1', ); print $_->as_trimmed_text,"\n" for @cells; #### PENINSULA GULF COUNTRY NORTHERN GOLDFIELDS and UPPER FLINDERS NORTH TROPICAL COAST and TABLELANDS HERBERT and LOWER BURDEKIN CENTRAL COAST - WHITSUNDAYS CAPRICORNIA CENTRAL HIGHLANDS - COALFIELDS CENTRAL WEST NORTHWEST CHANNEL COUNTRY MARANOA and WARREGO DARLING DOWNS and GRANITE BELT WIDE BAY and BURNETT SOUTHEAST COAST CORAL SEA #### # All directories in our parent's path %dirs = map { /^.*\/(.*)/ => $_ } grep {-d} glob "../*"; #### # De-crapper (for use after Word HTML idiocy) my $file = join '',<>; $file =~ s//
  • /gms; $file =~ s//

    /gms; $file =~ s/<\/?o:.*?>//gms; $file =~ s///gms; $file =~ s//

    /gms; $file =~ s/
    //gms; $file =~ s/<\/?span.*?>//gms; print $file; #### # Password generator my @chars = ('.','!','#','@','$','/',0..9,'A'..'Z','a'..'z'); my $length = 8 + rand 4; my $pw = join '', @chars[ map { rand @chars } (1..$length)]; #### # Java namestyle to SQL namestyle regex s/(?## # Environment dumper while (my @set = each %ENV) {printf "%s=>%s\n",@set}