#!/usr/bin/perl use WWW::Mechanize; use HTML::TokeParser; use Switch; my $mech = WWW::Mechanize->new( autocheck => 1 ); #set url my $url = "http://www.realestate.com.au/cgi-bin/rsearch?a=o&id=106023887"; #$mech->get("http://search.cpan.org"); $mech->get($url); #pass the stream to tokeparser my $stream = HTML::TokeParser->new(\$mech->{content}); # go to first p tag my $tag = $stream->get_tag("p"); # loop through p tags until we find classes until ($tag->[1]{class} eq "officeFax") { switch ($tag->[1]{class}) { case "propertyID" { $propid = $stream->get_trimmed_text("/p"); # now get data straight after this tag $tag = $stream->get_tag("h1"); $address = $stream->get_trimmed_text("/h1"); $tag = $stream->get_tag("strong"); if ($tag->[1]{class} eq "price") { $price = $stream->get_trimmed_text("/strong"); } $tag = $stream->get_tag("h2"); $header = $stream->get_trimmed_text("/h2"); $tag = $stream->get_tag("h2"); # make sure it's correct part of source if ($tag->[1]{class} eq "propertySummary") { $summary = $stream->get_trimmed_text("/h2"); } # Due to information not appearing all the time replicate tag and stream for status $tag2 = $tag; $stream2 = $stream; $tag2 = $stream2->get_tag("h3"); # Check for under contract/offer etc if ($tag2->[1]{class} eq "highlighted") { $status = $stream2->get_trimmed_text("/h3"); } # Do the same for auction details $tag3 = $tag; $stream3 = $stream; =pod $tag3 = $stream3->get_tag("span"); # Get "Price Authority" - at the moment seems to be only auction if ($tag3->[1]{class} eq "price authority") { $priceauth = $stream3->get_trimmed_text("/span"); } $tag3 = $stream3->get_tag("span"); # Get Auction time if ($tag3->[1]{class} eq "price auction") { $auction = $stream3->get_trimmed_text("/span"); } =cut # Loop down to description $tag = $stream->get_tag("div"); until ($tag->[1]{class} eq "description") { $tag = $stream->get_tag("div"); } $description = $stream->get_trimmed_text("/div"); # Get Agent Name $tag = $stream->get_tag("div"); until ($tag->[1]{id} eq "contactAgentDetails") { $tag = $stream->get_tag("div"); } $tag = $stream->get_tag("p"); $agent = $stream->get_trimmed_text("/p"); } case "officePhone" { $officephone = $stream->get_trimmed_text("/p"); } case "officeFax" { $officefax = $stream->get_trimmed_text("/p"); } } # go to next p tag $tag = $stream->get_tag("p"); } # Loop down to property summary until ($tag->[1]{id} eq "propertySummary") { $tag = $stream->get_tag("div"); } $tag = $stream->get_tag("dt"); $mycat = $stream->get_trimmed_text("/dt"); # Get property summary details until ($mycat eq "Close to:") { switch ($mycat) { case "Category:" { $tag = $stream->get_tag("dd"); $proptype = $stream->get_trimmed_text("/dd"); } case "Bedrooms:" { $tag = $stream->get_tag("dd"); $bed = $stream->get_trimmed_text("/dd"); } case "Bathrooms:" { $tag = $stream->get_tag("dd"); $bath = $stream->get_trimmed_text("/dd"); } case "Land:" { $tag = $stream->get_tag("dd"); $land = $stream->get_trimmed_text("/dd"); } case "Carport:" { $tag = $stream->get_tag("dd"); $carnumport = $stream->get_trimmed_text("/dd"); } case "Garage:" { $tag = $stream->get_tag("dd"); $carnumgar = $stream->get_trimmed_text("/dd"); } case "Municipality:" { $tag = $stream->get_tag("dd"); $municipality = $stream->get_trimmed_text("/dd"); } } $tag = $stream->get_tag("dt"); $mycat = $stream->get_trimmed_text("/dt"); } print "$propid \n"; print "$status \n"; print $mech->title; print "Address: $address \n"; print "Price: $price \n"; print "Type: $proptype \n"; print "Bedrooms: $bed \n"; print "Bathrooms: $bath \n"; if (length($carnumport)>0) { print "Carport: $carnumport \n"; } if (length($carnumgar)>0) { print "Garage: $carnumgar \n"; } print "$header \n"; print "$summary \n"; print "$description \n"; print "Agent Details: \n"; print "$agent \n"; print "$officephone \n"; print "$officefax \n";