#!/usr/bin/env perl use strict; use warnings; use utf8; use Data::Dumper; use LWP::Simple qw(get); use Text::Unidecode qw(unidecode); use HTML::TreeBuilder 5 -weak; # Ensure weak references in use my $review_pages = [ [ 'Jorges #1', 'http://www.yelp.com/biz/jorges-mexicatessen-encinitas' ], [ 'Jorges #2', 'http://www.yelp.com/biz/jorges-mexicatessen-encinitas-2' ] ]; for my $page (@$review_pages) { my $html = get $page->[1]; $html =~ s/([^[:ascii:]]+)/unidecode($1)/ge; my $tree = HTML::TreeBuilder->new; # empty tree $tree->parse($html); print "Review for $page->[0]\n"; my @items = $tree->look_down( 'class', 'review-content' ) or die("no items: $!\n"); for my $item (@items) { my $rating = $item->look_down( '_tag', 'i' ) or die("no rating$!\n"); my $rating_title = $rating->attr('title'); print "$rating_title\n"; } }