use WWW::Newsgrabber; use LWP::Simple; use HTML::ContentExtractor; use LWP::UserAgent; $kot_tekst='HASH|albania.htmlcomments|lajmet|free_web_stats|index.html$|ne.html$|arkivi.html$|IMG|html#c'; my $extractor = HTML::ContentExtractor->new(); my $agent=LWP::UserAgent->new; $dirname = "C:\\Users\\Administrator\\Desktop\\corpus"; my $j=1; $obj[0]= WWW::Newsgrabber->new( url => 'http://www.shekulli.com.al/biznes/', regex => '\.html' ); $obj[1] = WWW::Newsgrabber->new( url => 'http://www.gazeta-shqip.com/#/ekonomi', regex => '\.html' ); foreach $item (@obj){ my $ResultHashRef = $item->getNews(); while ( my ($url,$name)=each(%{$ResultHashRef})){ if ($url !~ /$kot_tekst/){ $counter++; my $res=$agent->get($url); my $HTML = $res->decoded_content(); $extractor->extract($url,$HTML); $c= $extractor->as_text(); $c =~ m/ KOMENTE/g; $c=substr($c,1,pos($c)-7); $hash_biznes{$url}=$c; }}};