in reply to Re: How can I use Clusterize
in thread How can I use Clusterize
This works fine, but as I said, I want now to cluster different articles, to find the similar ones.use WWW::Newsgrabber; use LWP::Simple; use HTML::ContentExtractor; use LWP::UserAgent; $kot_tekst='HASH|albania.htmlcomments|lajmet|free_web_stats|index.html +$|ne.html$|arkivi.html$|IMG|html#c'; my $extractor = HTML::ContentExtractor->new(); my $agent=LWP::UserAgent->new; $dirname = "C:\\Users\\Administrator\\Desktop\\corpus"; my $j=1; $obj[0]= WWW::Newsgrabber->new( url => 'http://www.shekulli.com.al/biz +nes/', regex => '\.html' ); $obj[1] = WWW::Newsgrabber->new( url => 'http://www.gazeta-shqip.com/# +/ekonomi', regex => '\.html' ); foreach $item (@obj){ my $ResultHashRef = $item->getNews(); while ( my ($url,$name)=each(%{$ResultHashRef})){ if ($url !~ /$kot_tekst/){ $counter++; my $res=$agent->get($url); my $HTML = $res->decoded_content(); $extractor->extract($url,$HTML); $c= $extractor->as_text(); $c =~ m/ KOMENTE/g; $c=substr($c,1,pos($c)-7); $hash_biznes{$url}=$c; }}};
|
|---|