in reply to Re: How can I use Clusterize
in thread How can I use Clusterize

Yes, I would appreciate your help. Actually, I'm trying to get news articles from different sources and cluster them according to their content (similar to GoogleNews). This is my code for getting the contenst of news articles:
use WWW::Newsgrabber; use LWP::Simple; use HTML::ContentExtractor; use LWP::UserAgent; $kot_tekst='HASH|albania.htmlcomments|lajmet|free_web_stats|index.html +$|ne.html$|arkivi.html$|IMG|html#c'; my $extractor = HTML::ContentExtractor->new(); my $agent=LWP::UserAgent->new; $dirname = "C:\\Users\\Administrator\\Desktop\\corpus"; my $j=1; $obj[0]= WWW::Newsgrabber->new( url => 'http://www.shekulli.com.al/biz +nes/', regex => '\.html' ); $obj[1] = WWW::Newsgrabber->new( url => 'http://www.gazeta-shqip.com/# +/ekonomi', regex => '\.html' ); foreach $item (@obj){ my $ResultHashRef = $item->getNews(); while ( my ($url,$name)=each(%{$ResultHashRef})){ if ($url !~ /$kot_tekst/){ $counter++; my $res=$agent->get($url); my $HTML = $res->decoded_content(); $extractor->extract($url,$HTML); $c= $extractor->as_text(); $c =~ m/ KOMENTE/g; $c=substr($c,1,pos($c)-7); $hash_biznes{$url}=$c; }}};
This works fine, but as I said, I want now to cluster different articles, to find the similar ones.