#!/usr/bin/perl
use WWW::Mechanize;
use HTML::TokeParser;
use Switch;
my $mech = WWW::Mechanize->new( autocheck => 1 );
#set url
my $url = "http://www.realestate.com.au/cgi-bin/rsearch?a=o&id=106023887";
#$mech->get("http://search.cpan.org");
$mech->get($url);
#pass the stream to tokeparser
my $stream = HTML::TokeParser->new(\$mech->{content});
# go to first p tag
my $tag = $stream->get_tag("p");
# loop through p tags until we find classes
until ($tag->[1]{class} eq "officeFax")
{
switch ($tag->[1]{class})
{
case "propertyID"
{
$propid = $stream->get_trimmed_text("/p");
# now get data straight after this tag
$tag = $stream->get_tag("h1");
$address = $stream->get_trimmed_text("/h1");
$tag = $stream->get_tag("strong");
if ($tag->[1]{class} eq "price")
{
$price = $stream->get_trimmed_text("/strong");
}
$tag = $stream->get_tag("h2");
$header = $stream->get_trimmed_text("/h2");
$tag = $stream->get_tag("h2");
# make sure it's correct part of source
if ($tag->[1]{class} eq "propertySummary")
{
$summary = $stream->get_trimmed_text("/h2");
}
# Due to information not appearing all the time replicate tag and stream for status
$tag2 = $tag;
$stream2 = $stream;
$tag2 = $stream2->get_tag("h3");
# Check for under contract/offer etc
if ($tag2->[1]{class} eq "highlighted")
{
$status = $stream2->get_trimmed_text("/h3");
}
# Do the same for auction details
$tag3 = $tag;
$stream3 = $stream;
=pod
$tag3 = $stream3->get_tag("span");
# Get "Price Authority" - at the moment seems to be only auction
if ($tag3->[1]{class} eq "price authority")
{
$priceauth = $stream3->get_trimmed_text("/span");
}
$tag3 = $stream3->get_tag("span");
# Get Auction time
if ($tag3->[1]{class} eq "price auction")
{
$auction = $stream3->get_trimmed_text("/span");
}
=cut
# Loop down to description
$tag = $stream->get_tag("div");
until ($tag->[1]{class} eq "description")
{
$tag = $stream->get_tag("div");
}
$description = $stream->get_trimmed_text("/div");
# Get Agent Name
$tag = $stream->get_tag("div");
until ($tag->[1]{id} eq "contactAgentDetails")
{
$tag = $stream->get_tag("div");
}
$tag = $stream->get_tag("p");
$agent = $stream->get_trimmed_text("/p");
}
case "officePhone"
{
$officephone = $stream->get_trimmed_text("/p");
}
case "officeFax"
{
$officefax = $stream->get_trimmed_text("/p");
}
}
# go to next p tag
$tag = $stream->get_tag("p");
}
# Loop down to property summary
until ($tag->[1]{id} eq "propertySummary")
{
$tag = $stream->get_tag("div");
}
$tag = $stream->get_tag("dt");
$mycat = $stream->get_trimmed_text("/dt");
# Get property summary details
until ($mycat eq "Close to:")
{
switch ($mycat)
{
case "Category:"
{
$tag = $stream->get_tag("dd");
$proptype = $stream->get_trimmed_text("/dd");
}
case "Bedrooms:"
{
$tag = $stream->get_tag("dd");
$bed = $stream->get_trimmed_text("/dd");
}
case "Bathrooms:"
{
$tag = $stream->get_tag("dd");
$bath = $stream->get_trimmed_text("/dd");
}
case "Land:"
{
$tag = $stream->get_tag("dd");
$land = $stream->get_trimmed_text("/dd");
}
case "Carport:"
{
$tag = $stream->get_tag("dd");
$carnumport = $stream->get_trimmed_text("/dd");
}
case "Garage:"
{
$tag = $stream->get_tag("dd");
$carnumgar = $stream->get_trimmed_text("/dd");
}
case "Municipality:"
{
$tag = $stream->get_tag("dd");
$municipality = $stream->get_trimmed_text("/dd");
}
}
$tag = $stream->get_tag("dt");
$mycat = $stream->get_trimmed_text("/dt");
}
print "$propid \n";
print "$status \n";
print $mech->title;
print "Address: $address \n";
print "Price: $price \n";
print "Type: $proptype \n";
print "Bedrooms: $bed \n";
print "Bathrooms: $bath \n";
if (length($carnumport)>0)
{
print "Carport: $carnumport \n";
}
if (length($carnumgar)>0)
{
print "Garage: $carnumgar \n";
}
print "$header \n";
print "$summary \n";
print "$description \n";
print "Agent Details: \n";
print "$agent \n";
print "$officephone \n";
print "$officefax \n";