perl ./download.pl --url 'http://www.gocomics.com/9chickweedlane/2019/04/17' --tags div --tags class="comic container js-comic-"
####
@results = $tree->look_down( _tag => "div", "class" => qr(comic container js-comic-) ); # HARDCODED, should by dynamic
####
# PROCESSING THE TAG LIST....
my $first_tag = $self->{ tags(0) };
my $second_tag = $self->{ tags(1) };
if( $second_tag =~ "=" )
{ # Split apart the key-value pair
my @words = split /\=/,$second_tag;
}
@results = $tree->look_down( _tag => $first_tag, "$words[0]" => qr("$words[1]") );
####
### MAIN PROGRAM
sub main
{
use File::Spec;
use Getopt::Long;
my $url = undef;
my @tags = undef;
GetOptions(
"tags=s" => \@tags,
"url=s" => \$url,
)
or die("Error in command line arguments. $!\n");
my $dlobj = DownloadObject->new( $url, \@tags );
$dlobj->download();
}
####
### DOWNLOAD OBJECT CLASS
#!/usr/bin/perl -w
use warnings;
use strict;
package DownloadObject;
# Simple Constructor
sub new
{
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = {};
$self->{url} = undef; # URL to target
$self->{tags_list_process_order} = undef; # List of html-tags, in sequential order, to process in order to extract the target content
$self->{url} = $_[0];
$self->{tags_list_process_order} = $_[1];
bless ($self, $class);
return $self;
}
sub download
{
my $self = shift;
require LWP::UserAgent;
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0');
$ua->timeout(10);
$ua->env_proxy;
my $response = $ua->get( $self->{url} ); # Download the content
if( $response->is_success )
{
use HTML::TreeBuilder 5 -weak;
my $tree = HTML::TreeBuilder->new_from_content( $response->content() ); # Put the contents into HTML-Treebuilder
my @results = ();
# THIS IS THE PROBLEM LINE. It is HARDCODED, and I want to make it dynamic.
@results = $tree->look_down( _tag => "div", "class" => qr(comic container js-comic-) );
foreach(@results)
{
say " Data-Image URL: " . $_->attr('data-image');
# Gonna do something with result......
}
}
}