#!/usr/bin/perl -w
use strict;
use HTML::TokeParser;
my $sample_HTML =
"link 1 " .
"link 2 " .
"link 3 ";
my $p = HTML::TokeParser->new( \$sample_HTML );
my $token;
my $link_count = 1;
while( $token = $p->get_token() ) {
if( $token->[0] eq 'S' &&
$token->[1] eq 'a' )
{
my $text = $token->[2]->{'href'};
print "Found link $link_count: $text\n";
$link_count++;
}
if( $token->[0] eq 'C' &&
$token->[1] eq '' )
{
while ( $token->[0] ne 'E' &&
$token->[1] ne 'a' )
{
$token = $p->get_token();
}
}
}
__output___
%perl ignore_some.pl
Found link 1: http://www.foobar1.com/
Found link 2: http://www.foobar3.com/