There have been plenty of working regexes posted for this
thread, i thought there should be at least one reply that
uses a parser. I'll assume that when you say you want the
description, you really want the content from the meta
tag.
use strict;
use warnings;
use HTML::TokeParser::Simple;
my $description;
my $p = HTML::TokeParser::Simple->new(*DATA);
while (my $token = $p->get_token) {
if ($token->is_start_tag('meta')) {
my $attr = $token->return_attr;
if (defined $attr->{name}) {
$description = $attr->{content};
last;
}
}
}
print "TokeParser got '$description'\n";
__DATA__
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http:/
+/www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="description" content="Hello HTML Parsing!" />
<meta name="keywords" content="make me the top hit!" />
<meta name="generator" content="Perl, baby. Perl." />
</head>
<body>
Hello World
</body>
</html>
jeffa
L-LL-L--L-LL-L--L-LL-L--
-R--R-RR-R--R-RR-R--R-RR
B--B--B--B--B--B--B--B--
H---H---H---H---H---H---
(the triplet paradiddle with high-hat)
|