##
#!/usr/bin/perl
use strict;
use warnings;
use Mojo::URL;
use Mojo::Util qw(dumper);
use Mojo::UserAgent;
use Data::Dump;
use Log::Log4perl;
use 5.016;
use Mojo::DOM;
my $log_conf3 = "/home/hogan/Documents/hogan/logs/conf_files/3.conf";
my $log_conf4 = "/home/hogan/Documents/hogan/logs/conf_files/4.conf";
#Log::Log4perl::init($log_conf3); #debug
Log::Log4perl::init($log_conf4); #info
my $logger = Log::Log4perl->get_logger();
$logger->info("$0");
# pretend to be a browser
my $uaname =
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36';
my $ua = Mojo::UserAgent->new;
$ua->max_redirects(5)->connect_timeout(20)->request_timeout(20);
$ua->transactor->name($uaname);
my $first_title = 'Virgin+River';
my $imdburl = "http://www.imdb.com/search/title?title=$first_title";
say "imdburl is $imdburl";
# find search results
my $dom = $ua->get($imdburl)->res->dom;
my @nodes = @$dom;
# c-style for is good for array output with index
for ( my $i = 0 ; $i < @nodes ; $i++ ) {
$logger->info("i is $i ==============");
$logger->info("$nodes[$i]");
}
sleep 2; #good hygiene
__END__
####
2020/12/31 13:53:39 INFO i is 1 ==============
2020/12/31 13:53:39 INFO
2020/12/31 13:53:39 INFO i is 2 ==============
2020/12/31 13:53:39 INFO
####
##
$ mojo get https://www.imdb.com/ '*' attr id >1.txt
$ grep search 1.txt
navSearch-searchState
suggestion-search-container
nav-search-form
navbar-search-category-select
navbar-search-category-select-contents
suggestion-search
suggestion-search-button
imdbHeader-searchClose
imdbHeader-searchOpen
$
####
$ ./1.dom.pl
./1.dom.pl
123
Test
123
a
b
b
a
a:Test
b:123
Test
123
789
456
$ cat 1.dom.pl
#!/usr/bin/perl
use strict;
use warnings;
use Mojo::URL;
use Mojo::Util qw(dumper);
use Mojo::UserAgent;
use Data::Dump;
use Log::Log4perl;
use 5.016;
use Mojo::DOM;
my $log_conf3 = "/home/hogan/Documents/hogan/logs/conf_files/3.conf";
my $log_conf4 = "/home/hogan/Documents/hogan/logs/conf_files/4.conf";
#Log::Log4perl::init($log_conf3); #debug
Log::Log4perl::init($log_conf4); #info
my $logger = Log::Log4perl->get_logger();
$logger->info("$0");
# pretend to be a browser
my $uaname =
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36';
my $ua = Mojo::UserAgent->new;
$ua->max_redirects(5)->connect_timeout(20)->request_timeout(20);
$ua->transactor->name($uaname);
## example from https://docs.mojolicious.org/Mojo/DOM
#use Mojo::DOM;
# Parse
my $dom = Mojo::DOM->new('Test
123
');
# Find
say $dom->at('#b')->text;
say $dom->find('p')->map('text')->join("\n");
say $dom->find('[id]')->map( attr => 'id' )->join("\n");
# Iterate
$dom->find('p[id]')->reverse->each( sub { say $_->{id} } );
# Loop
for my $e ( $dom->find('p[id]')->each ) {
say $e->{id}, ':', $e->text;
}
# Modify
$dom->find('div p')->last->append('456
');
$dom->at('#c')->prepend( $dom->new_tag( 'p', id => 'd', '789' ) );
$dom->find(':not(p)')->map('strip');
# Render
say "$dom";
__END__
$ ./4.dom.pl
./4.dom.pl
Test
bar
bar
foo
baz
=====
comment
doctype
pi
text
root
tag
text
$ cat 4.dom.pl
#!/usr/bin/perl
use strict;
use warnings;
use Mojo::URL;
use Mojo::Util qw(dumper);
use Mojo::UserAgent;
use Data::Dump;
use Log::Log4perl;
use 5.016;
use Mojo::DOM;
my $log_conf3 = "/home/hogan/Documents/hogan/logs/conf_files/3.conf";
my $log_conf4 = "/home/hogan/Documents/hogan/logs/conf_files/4.conf";
#Log::Log4perl::init($log_conf3); #debug
Log::Log4perl::init($log_conf4); #info
my $logger = Log::Log4perl->get_logger();
$logger->info("$0");
## examples from https://docs.mojolicious.org/Mojo/DOM
my $dom7 = Mojo::DOM->new();
my $str7 =
$dom7->parse('Test
123
')->at('h2')->previous;
$logger->info($str7);
# "bar"
my $dom8 = Mojo::DOM->new();
my $str8 = $dom8->parse("foobar
baz")->at('p')->text;
say "$str8";
$logger->info($str8);
# "foo\nbaz\n"
my $dom9 = Mojo::DOM->new();
my $str9 = $dom9->parse("foo\nbar
baz\n")->at('div')->text;
$logger->info($str9);
$logger->info('=====');
my $dom1 = Mojo::DOM->new();
my $str1 = $dom1->parse('')->child_nodes->first->type;
$logger->info($str1);
# "doctype"
$str1 = $dom1->parse('')->child_nodes->first->type;
$logger->info($str1);
# "pi"
$str1 = $dom1->parse('')->child_nodes->first->type;
$logger->info($str1);
$str1 =
$dom1->parse('Test ')->at('title')->child_nodes->first->type;
$logger->info($str1);
$str1 = $dom1->parse('Test
')->type;
$logger->info($str1);
$str1 = $dom1->parse('Test
')->at('p')->type;
$logger->info($str1);
$str1 = $dom1->parse('Test
')->at('p')->child_nodes->first->type;
$logger->info($str1);
__END__
$
####
$ ./2.dom.pl
./2.dom.pl
ads_tarnhelm ads_doWithAds ads_monitoring_setup ads_safeframe_setup ads_general_setup IMDbHomepageSiteReactViews imdbHeader nblogin imdbHeader-navDrawerOpen imdbHeader-navDrawerOpen--desktop imdbHeader-navDrawer nav-link-categories-mov nav-link-categories-tvshows nav-link-categories-video nav-link-categories-awards nav-link-categories-celebs nav-link-categories-comm home_img_holder home_img navSearch-searchState suggestion-search-container nav-search-form navbar-search-category-select navbar-search-category-select-contents suggestion-search suggestion-search-button imdbHeader-searchClose imdbHeader-searchOpen ipc-svg-gradient-tv-logo-t ipc-svg-gradient-tv-logo-v ipc-wrap-background-id inline20_wrapper placeholderPattern b a b a b a b a b a b a b a inline40_wrapper placeholderPattern from-your-watchlist fan-picks teconsent ftr__a ftr__c ftr__e ftr__g ftr__i ftr__k ftr__m ftr__o ftr__q ftr__s ftr__u ftr__w ftr__y ftr__A ftr__C ftr__E ftr__G ftr__b ftr__d ftr__f ftr__h ftr__j ftr__l ftr__n ftr__p ftr__r ftr__t ftr__v ftr__x ftr__z ftr__B ftr__D ftr__F ftr__H ipc-svg-gradient-tv-logo-t ipc-svg-gradient-tv-logo-v ipc-svg-gradient-tv-logo-t ipc-svg-gradient-tv-logo-v be
$ cat 2.dom.pl
#!/usr/bin/perl
use strict;
use warnings;
use Log::Log4perl;
use 5.016;
use Mojo::DOM;
use Mojo::UserAgent;
my $log_conf3 = "/home/hogan/Documents/hogan/logs/conf_files/3.conf";
my $log_conf4 = "/home/hogan/Documents/hogan/logs/conf_files/4.conf";
#Log::Log4perl::init($log_conf3); #debug
Log::Log4perl::init($log_conf4); #info
my $logger = Log::Log4perl->get_logger();
$logger->info("$0");
# represent $0 as browser to server
my $uaname =
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36';
my $ua = Mojo::UserAgent->new;
$ua->max_redirects(5)->connect_timeout(20)->request_timeout(20);
$ua->transactor->name($uaname);
## main page of imdb contains search box
my $imdburl = "http://www.imdb.com/";
## example from https://docs.mojolicious.org/Mojo/DOM
my $dom = $ua->get($imdburl)->res->dom;
# say "$dom"; works
#
my @ids= $dom->find('[id]')->map(attr => 'id')->each;
$logger->info("@ids");
__END__
$
####
$ ./2.1.dom.pl
./2.1.dom.pl
navSearch-searchState suggestion-search-container nav-search-form navbar-search-category-select navbar-search-category-select-contents suggestion-search suggestion-search-button imdbHeader-searchClose imdbHeader-searchOpen
Can't locate object method "find" via package "Mojo::UserAgent" at ./2.1.dom.pl line 48.
$ cat 2.1.dom.pl
#!/usr/bin/perl
use strict;
use warnings;
use Log::Log4perl;
use 5.016;
use Mojo::DOM;
use Mojo::UserAgent;
use Mojo::URL;
use Mojo::Util qw(trim);
my $log_conf3 = "/home/hogan/Documents/hogan/logs/conf_files/3.conf";
my $log_conf4 = "/home/hogan/Documents/hogan/logs/conf_files/4.conf";
#Log::Log4perl::init($log_conf3); #debug
Log::Log4perl::init($log_conf4); #info
my $logger = Log::Log4perl->get_logger();
$logger->info("$0");
# represent $0 as browser to server
my $uaname =
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36';
my $ua = Mojo::UserAgent->new;
$ua->max_redirects(5)->connect_timeout(20)->request_timeout(20);
$ua->transactor->name($uaname);
## main page of imdb contains search box
my $imdburl = "http://www.imdb.com/";
## example from https://docs.mojolicious.org/Mojo/DOM
my $dom = $ua->get($imdburl)->res->dom;
# say "$dom"; works
#
my @ids = $dom->find('[id]')->map( attr => 'id' )->each;
#$logger->info("@ids");
my @matches = grep { /search/ } @ids;
$logger->info("@matches");
my $vid = 'Virgin River';
$ua->post( $imdburl => form => { 'suggestion-search' => $vid } );
# assume first match
my $filmurl = $ua->find('a[href^=/title]')->first->attr('href');
# extract film id
my $filmid = Mojo::URL->new($filmurl)->path->parts->[-1];
# get details of film
$dom = $ua->get("https://www.imdb.com/title/$filmid/")->res->dom;
# print film details
say trim( $dom->at('div.title_wrapper > h1')->text ) . ' ('
. trim( $dom->at('#titleYear > a')->text ) . ')';
# print actor/character names
foreach my $cast ( $dom->find('table.cast_list > tr:not(:first-child)')->each )
{
say trim ( $cast->at('td:nth-of-type(2) > a')->text ) . ' as '
. trim( $cast->at('td.character')->all_text );
}
__END__
$