You may start with something like below. This is my first attempt with HTML::Parser, so treat it as a starter only.
use warnings; use strict; use LWP::Simple; use HTML::Parser; # Get the question node my $doc = get('http://perlmonks.org/?node_id=761525'); die "Couldn't get the document!" unless defined $doc; # Parse it skipping all but the text my @lines; my $parser = HTML::Parser->new( text_h => [ sub { push @lines, shift }, 'text'], default_h => [ "" ] ); $parser->parse($doc); # Create keyword => counter hash my %hsh; for my $l (@lines) { my @f = split /\s+/, $l; next unless @f; ++$hsh{$_} for @f; }
In reply to Re: web content parser
by przemo
in thread web content parser
by stan131
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |