And for fetching the chunk of html you are interested in you may want something along the lines of...package Filecontrol; @ISA = qw(Exporter); @EXPORT_OK = qw(get_files get_files_graubold get_files_h4); use strict; use warnings; use File::Find; sub get_files { my $directory = shift; opendir (DIR, $directory) or die "couldn't open directory: $direct +ory"; my @myfiles; my @directories_to_search = ("$directory"); find( sub { if ( is_html_type_file($_) && ! ($File::Find::dir =~ m!(\\|/)i +ncludes$!i) ) { push @myfiles, $File::Find::name; } }, @directories_to_search); return @myfiles; } sub is_html_type_file { my $file = shift or die "no file"; my $is_html_type_file; if ( ( $file =~ /\.s?html?$/i ) && ! ( $file =~ /\.backup\.before/ +i ) ) { $is_html_type_file = 1; } return $is_html_type_file; } #.... and on and on.....
In this case, it finds an image that should be embedded in a n tag. Then it gets the parent, confirms it's an a tag, and gets the target url.sub html_tree { defined(my $self = shift) or die "no self"; defined(my $html = shift) or die "no html content"; return HTML::TreeBuilder->new_from_content($html) or die "couldn't + put html content into tree"; } #....... #get the right bestellung link, which then redirects. if ( my $bestellung_link = $html_tree->look_down("_tag"=>"img","sr +c"=>"images/but_zur_detailseite.gif")->parent() ) { if ( $bestellung_link->tag eq "a" && $bestellung_link->attr("h +ref") && ( my $target_url = $bestellung_link->attr("href") ) ) { #do stuff... } }
In reply to Re: Particular HTML contents to CSV or DB
by tphyahoo
in thread Particular HTML contents to CSV or DB
by nicpon
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |