use warnings; use strict; use LWP::Simple; use HTML::Parser; # Get the question node my $doc = get('http://perlmonks.org/?node_id=761525'); die "Couldn't get the document!" unless defined $doc; # Parse it skipping all but the text my @lines; my $parser = HTML::Parser->new( text_h => [ sub { push @lines, shift }, 'text'], default_h => [ "" ] ); $parser->parse($doc); # Create keyword => counter hash my %hsh; for my $l (@lines) { my @f = split /\s+/, $l; next unless @f; ++$hsh{$_} for @f; }