Can you explain how usage of regex lead to out of memory (in my situation)?#!/usr/bin/perl use strict; use warnings; use WWW::Mechanize; my $city_id = 9; my $main_catalogue_url = "http://somesite.ru/catalog.aspx?&cityId=$cit +y_id"; my $city = "CityName"; my $mech = WWW::Mechanize->new(); #$mech->stack_depth(0); #i think this is a problem $mech->get( $main_catalogue_url ); #getting first level links my (@L1_list) = $mech->find_all_links( url_regex => qr/catalog\.aspx\? +rubricId=\d+/io ); #i add /o switch. why? i'm don't know :-) foreach my $L1_link (@L1_list) { my $L1_rubrik = $L1_link->text(); #first level rubrik name $mech->get( $L1_link->url() ); #getting second level links my (@L2_list) = $mech->find_all_links( url_regex => qr/catalog\.as +px\?rubricId=\d+/io ); #/o switch again. is this right? foreach my $L2_link (@L2_list) { my $L2_rubrik = $L2_link->text(); #second level rubrik name $mech->get( $L2_link->url() ); #getting third level links my (@L3_list) = $mech->find_all_links( url_regex => qr/catalog\.as +px\?rubricId=\d+/io ); #yes, /o switch foreach my $L3_link (@L3_list) { print "=" x 20, "\n"; my $L3_rubrik = $L3_link->text(); #third level rubrik name my $offset = 0; #getting rubriks results ORG_LIST: my $firms = $mech->get( $L3_link->url() . "&offset=" . $offset +++ ); my (@firms) = $mech->find_all_links( url_regex => qr/catalog\. +aspx\?firmId=\d+/io ); foreach my $FIRM_link (@firms) { my $res = $mech->get( $FIRM_link->url() ); my ($name) = $res->content =~ m{<h1>([^<]+)</h1>}so; my $result = $res->content; while ($result =~ m{<p></p>(\s+<p>.+?</p>\s+<p>.+?</p>\s+<p>.+ +?</p>)}sgo) { my $firm = $1; my ($address) = $firm =~ m{class="address">([^<]+)</a>}so; my ($phone) = $firm =~ m{<p>\s+<a href='map\.aspx?[^>]+>[^ +<]+</a>\s+</p>\s+<p>(.+?)</p>}so; if ($phone) { $phone =~ s/\r\n/ /go; $phone =~ s/<br>/; /go; } my ($www) = $firm =~ m{<a href="http://[^"]+" target="_bla +nk">([^<]+)</a>}so; my ($email) = $firm =~ m{<a href="mailto:[^"]+">([^<]+)</a +>}so; foreach ($L1_rubrik, $L2_rubrik, $L3_rubrik, $city, $name, + $address, $phone, $www, $email) { if ($_) { s/^\s+//go; s/\s+$//go; s/\s+/ /go; } else { $_ = ""; } } open TEST, ">>", "2gis_$city_id.txt" or die $!; print TEST join("\t", ($L1_rubrik, $L2_rubrik, $L3_rubrik, + $city, $name, $address, $phone, $www, $email)), "\n"; close TEST; } } if ($firms->content =~ m{<img src="images/but_redo\.gif" borde +r="0">}o) { goto ORG_LIST; } } } }
In reply to Re^4: Out of Memory problem
by Gangabass
in thread Out of Memory problem
by Gangabass
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |