This isn't done, but maybe it'll help someone who is looking around for this too :) thanks for all your help and suggestions.. -brad..### # Parse the URLs my $ua = new LWP::UserAgent; $ua->agent("AgentName/0.1 " . $ua->agent); my $req = new HTTP::Request GET => $URL; $req->content_type('application/x-www-form-urlencoded'); my $res = $ua->request($req); if ($res->is_success) { $DATA = $res->content; } $WEIGHT = 1; HTML::Parser->new(api_version => 3, handlers => [start => [\&tag, "self,tagname +,attr"], end => [\&tag_end, "self,tag +name,attr"], text => [\&text, "'$WEIGHT',d +text"] ], marked_sections => 1, )->parse($DATA) || die "Huh $!\n"; .... # Parsing subroutines.. sub tag { my $self = shift; my $tagname = shift; my $attr = shift; my $stuff; $inside{$tagname} += 1; if($tagname eq "meta") { if($attr{'name'} eq ("keywords" || "description")) { $stuff = +$attr{'content'}; &text($WEIGHT,$stuff); } } elsif($tagname eq "title") { $WEIGHT = "2"; } } sub tag_end { my $self = shift; my $tagname = shift; my $attr = shift; $inside{$tagname} -= 1; if($tagname eq "title") { $WEIGHT = "1"; } } sub text { my $weight = shift; my $test_to_parse = shift; #do whatever we want to do.. }
In reply to RE: RE: Re: Getting Words out of HTML :)
by reyjrar
in thread Getting Words out of HTML :)
by reyjrar
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |