Check out YAPE::HTML -- It is pure perl (ie regexes)
#!/usr/bin/perl
use YAPE::HTML;
use Data::Dumper;
use warnings;
use strict;
my $content = "
<html>
<title>
yes a title
</title>
<body>
yes a body
</body>
</html>
";
my $parser = YAPE::HTML->new($content);
my $extor = $parser->extract( 'title' => []);
while (my $chunk = $extor->()) {
print Dumper $chunk;
print $/,'>>>>',$chunk->text()->[0]->string(),'<<<<',$/x5;
}
__END__
$VAR1 = bless( {
'TYPE' => 'tag',
'ATTR' => {},
'TAG' => 'title',
'TEXT' => [
bless( {
'TYPE' => 'text',
'TEXT' => '
yes a title
'
}, 'YAPE::HTML::text' )
],
'IMPLIED' => '',
'CLOSED' => 1
}, 'YAPE::HTML::tag' );
>>>>
yes a title
<<<<
MJD says you
can't just make shit up and expect the computer to know what you mean, retardo!
I run a Win32 PPM
repository for perl 5.6x+5.8x. I take requests.
** The Third rule of perl club is a statement of fact: pod is sexy.
|
|