#!/usr/bin/perl open (STDERR, ">>/home/sulfericacid/public_html/test/error.log") or die "Cannot open error log, weird...an error opening an error log: $!"; use warnings; use strict; use CGI qw(:standard); use LWP::Simple qw(!head); use HTML::TokeParser; use diagnostics; # url or file to scan my $url = "http://www.yahoo.com"; my $file = "test.txt"; my $count = "0"; my $content = get($url); getstore($url, $file); my $p = HTML::TokeParser->new(shift||"$file"); while (my $token = $p->get_tag("title")) { my $title = $p->get_trimmed_text("/title"); } $p = HTML::TokeParser->new(shift||"$file"); while (my $token = $p->get_tag("td")) { my $text = $p->get_trimmed_text("/td"); } my (@words, %search, $first, $second, $line); my @ignore = qw(a and the this i me us our ok abc def my of in this that you if not is it td div align width); open (FILE, $file) or die "Error $!"; @words = ; chomp(@words); close FILE; my @search= @words; foreach my $line (@words) { $line = lc $line; foreach my $ignore (@ignore) { $line =~ s/\b$ignore\b//g; } # splitting words on a white space but allowing contractions and hyphens while ($line =~ /([[:alpha:]]+(?:'[[:alpha:]]+)?)/g) { if (exists ($search{$1})) { $search{$1}++; } else { $search{$1}=""; $search{$1}++; } } } print header, start_html; print ""; print "\n" for sort {$search{$b} <=> $search{$a}} keys %search; print "
$_ $search{$_}
";