CUFP
karlgoethebier
<h4>The role</h4>
<h5>1.12</h5>
<p>Please note that this version contains some annoying <strike>errors</strike> mistakes. Use <a href="#1.17">1.17</a> instead. See the explanations from [marioroy] below in this thread.</p>
<c>
package MyRole;
# $Id: MyRole.pm,v 1.12 2017/06/17 14:00:17 karl Exp karl $
use Role::Tiny;
use threads;
use MCE::Loop;
use MCE::Shared;
use MCE::Mutex;
use WWW::Curl::Easy;
use Config::Tiny;
my $cfg = Config::Tiny->read(q(MyRole.cfg));
MCE::Loop::init {
max_workers => $cfg->{params}->{workers},
chunk_size => 1,
interval => $cfg->{params}->{interval},
};
my $fetch = sub {
my $curl = WWW::Curl::Easy->new;
my ( $header, $body );
$curl->setopt( CURLOPT_URL, shift );
$curl->setopt( CURLOPT_WRITEHEADER, \$header );
$curl->setopt( CURLOPT_WRITEDATA, \$body );
$curl->setopt( CURLOPT_FOLLOWLOCATION, $cfg->{params}->{followlocation} );
$curl->setopt( CURLOPT_TIMEOUT, $cfg->{params}->{timeout} );
$curl->perform;
{
header => $header,
body => $body,
info => $curl->getinfo(CURLINFO_HTTP_CODE),
error => $curl->errbuf,
};
};
sub uagent {
my $urls = $_[1];
my $shared = MCE::Shared->hash;
my $mutex = MCE::Mutex->new;
mce_loop {
MCE->yield;
$mutex->enter( $shared->set( $_ => $fetch->($_) ) );
}
$urls;
my $iter = $shared->iterator();
my $result;
while ( my ( $url, $data ) = $iter->() ) {
$result->{$url} = $data;
}
$result;
}
1;
__END__
</c>
<h5><a name="1.17">1.17</a></h5>
<c>
package MyRole;
# $Id: MyRole.pm,v 1.17 2017/06/18 08:45:19 karl Exp karl $
use Role::Tiny;
use threads;
use MCE::Loop;
use MCE::Shared;
use WWW::Curl::Easy;
use Config::Tiny;
my $cfg = Config::Tiny->read(q(MyRole.cfg));
MCE::Loop::init {
max_workers => $cfg->{params}->{workers},
chunk_size => 1,
interval => $cfg->{params}->{interval},
};
my $fetch = sub {
my $curl = WWW::Curl::Easy->new;
my ( $header, $body );
$curl->setopt( CURLOPT_URL, shift );
$curl->setopt( CURLOPT_WRITEHEADER, \$header );
$curl->setopt( CURLOPT_WRITEDATA, \$body );
$curl->setopt( CURLOPT_FOLLOWLOCATION, $cfg->{params}->{followlocation} );
$curl->setopt( CURLOPT_TIMEOUT, $cfg->{params}->{timeout} );
$curl->perform;
{
header => $header,
body => $body,
info => $curl->getinfo(CURLINFO_HTTP_CODE),
error => $curl->errbuf,
};
};
sub uagent {
my $urls = $_[1];
my $shared = MCE::Shared->hash;
mce_loop {
MCE->yield;
$shared->set( $_ => $fetch->($_) );
}
$urls;
$shared->export;
}
1;
__END__
</c>
<h4>The config file</h4>
<c>
# $Id: MyRole.cfg,v 1.4 2017/06/17 13:48:19 karl Exp karl $
[params]
timeout=10
followlocation=1
interval=0.005
workers=auto
</c>
<h4>The class</h4>
<c>
# $Id: MyClass.pm,v 1.5 2017/06/16 15:35:32 karl Exp karl $
package MyClass;
use Class::Tiny;
use Role::Tiny::With;
with qw(MyRole);
1;
__END__
</c>
<h4>The app</h4>
<c>
#!/usr/bin/env perl
# $Id: run.pl,v 1.14 2017/06/17 14:43:57 karl Exp karl $
use strict;
use warnings;
use MyClass;
use Data::Dump;
use HTML::Strip::Whitespace qw(html_strip_whitespace);
use feature qw(say);
my @urls = grep { $_ ne "" } <DATA>;
chomp @urls;
my $object = MyClass->new;
my $result = $object->uagent( \@urls );
# dd $result;
while ( my ( $url, $data ) = each %$result ) {
say qq($url);
say $data->{header};
# my $html;
# html_strip_whitespace(
# 'source' => \$data->{body},
# 'out' => \$html
# );
# say $html;
}
__DATA__
http://fantasy.xecu.net
http://perlmonks.org
http://stackoverflow.com
http://www.trumptowerny.com
http://www.maralagoclub.com
http://www.sundialservices.com
</c>
<p><b>Update: </b> Fixed mistakes. Thank you [marioroy].</p>
<p><b>Update2: </b> Deleted unused module.</p>
<p>Best regards, Karl</p>
<!-- Node text goes above. Div tags should contain sig only -->
<div class="pmsig"><div class="pmsig-1001958">
<p>«The Crux of the Biscuit is the Apostrophe»</p>
<p>Furthermore I consider that Donald Trump must be impeached as soon as possible</p>
</div></div>