The site is developed in asp , so the source contents are not exact HTML format. That's why I am facing lots of problem in fetching data from this site.use WWW::Mechanize; my $mech = WWW::Mechanize->new(); my @urls = ('http://www.folkeferie.dk/da/ferier/Aktuelle-chartertilbud +---afbudsrejser/'); foreach my $url (@urls) { $mech->get($url); my $hsh={}; $links = $mech->find_all_links(url_regex=>qr/templates\/textPage\. +aspx\?id/i, text_regex=>qr/Afbudsrejser/i); foreach my $link (@$links) { $url = $link->url_abs(); print "\n\n\n".$url."\n\n"; $mech->get($url); my $content = $mech->content(); print $content; while ($content=~/tr class="bgrow1"><td>(.*?)<\/td><td clas +s="countryValue">(.*?)<\/td><td class="destnameValue">(.*?)<\/td><td +class="hotelNameValue">(.*?)<\/td><td class="durationValue">(.*?)<\/t +d><td align="RIGHT" class="priceValue"><a target="_blank" href="(.*?) +">(.*?)<\/a><\/td>/gisxm) { $hsh->{'url'} = $6; $hsh->{'crap_id'} = ''; $hsh->{'date'} = $1; $hsh->{'country'} = $2; $hsh->{'destination'} = $3; $hsh->{'trip_type'} = $4; $hsh->{'trip_length'} = $5; $hsh->{'price'}=$7; print "$hsh->{'date'}, $hsh->{'country'}, $hsh->{'destinat +ion'}, $hsh->{'trip_type'}, $hsh->{'trip_length'}, $hsh->{'price'}, $ +hsh->{'crap_id'}, $hsh->{'url'}, $airport\n\n"; } } }
In reply to Re: need help in scrapping asp site
by Anonymous Monk
in thread How to scraper ASP websites
by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |