in reply to Getting the Linking Text from a page
This should give you about 75% of what you need :
Hope that helps.#!/usr/bin/perl -w use strict; use HTML::Parser; my $parser = HTML::Parser->new(api_version => 3, start_h => [ \&start,"self,tagname,attr +" ]); $parser->parse(<<EOFOO); <P><A HREF="www.url.com"><I>URL Name</I></A><FONT SIZE="+2">Blah Blah</FONT><A HREF="www.url.com/url/">Another Link</A></P> EOFOO for (@{$parser->{urls}}) { print "$_->[0] $_->[1]\n"; } sub start { my ($self,$tag,$attr) = @_; if ( $tag eq 'a' && exists $attr->{href} ) { $self->{_current_url} = $attr->{href}; $self->handler(text => sub { my ( $self,$text ) = @_; $self->{_current_text} .= $text; }, "self, dtext"); $self->handler( end => \&end,"self, tagname"); } } sub end { my ( $self, $tag ) = @_; if ( $tag eq 'a' ) { push @{$self->{urls}},[$self->{_current_url}, $self->{_current_text}]; delete $self->{_current_url}; delete $self->{_current_text}; $self->handler(text => undef); $self->handler(end => undef); } }
/J\
|
|---|