#!/usr/bin/perl use strict; use warnings; use HTML::TokeParser::Simple; my $html = do {local $/; }; my $p = HTML::TokeParser::Simple->new(\$html) or die "can't parse: $!"; my ($in_full, @href); while (my $t = $p->get_token){ next if $t->is_start_tag('div') and $t->get_attr('class') and $t->get_attr('class') eq 'content'; $in_full++, next if $t->is_start_tag('div') and $t->get_attr('class') eq 'full'; $in_full = 0, next if $t->is_start_tag('div') and $t->get_attr('class') ne 'full'; next unless $in_full; push @href, $t->get_attr('href') if $t->is_start_tag('a'); } print "$_\n" for @href; __DATA__