OK, I try it. Nothing changes :-(
Here is my new code:
#!/usr/bin/perl use strict; use warnings; use WWW::Mechanize; my $city_id = 9; my $main_catalogue_url = "http://somesite.ru/catalog.aspx?&cityId=$cit +y_id"; my $city = "CityName"; my $mech = WWW::Mechanize->new(); #$mech->stack_depth(0); #i think this is a problem $mech->get( $main_catalogue_url ); #getting first level links my (@L1_list) = $mech->find_all_links( url_regex => qr/catalog\.aspx\? +rubricId=\d+/io ); #i add /o switch. why? i'm don't know :-) foreach my $L1_link (@L1_list) { my $L1_rubrik = $L1_link->text(); #first level rubrik name $mech->get( $L1_link->url() ); #getting second level links my (@L2_list) = $mech->find_all_links( url_regex => qr/catalog\.as +px\?rubricId=\d+/io ); #/o switch again. is this right? foreach my $L2_link (@L2_list) { my $L2_rubrik = $L2_link->text(); #second level rubrik name $mech->get( $L2_link->url() ); #getting third level links my (@L3_list) = $mech->find_all_links( url_regex => qr/catalog\.as +px\?rubricId=\d+/io ); #yes, /o switch foreach my $L3_link (@L3_list) { print "=" x 20, "\n"; my $L3_rubrik = $L3_link->text(); #third level rubrik name my $offset = 0; #getting rubriks results ORG_LIST: my $firms = $mech->get( $L3_link->url() . "&offset=" . $offset +++ ); my (@firms) = $mech->find_all_links( url_regex => qr/catalog\. +aspx\?firmId=\d+/io ); foreach my $FIRM_link (@firms) { my $res = $mech->get( $FIRM_link->url() ); my ($name) = $res->content =~ m{<h1>([^<]+)</h1>}so; my $result = $res->content; while ($result =~ m{<p></p>(\s+<p>.+?</p>\s+<p>.+?</p>\s+<p>.+ +?</p>)}sgo) { my $firm = $1; my ($address) = $firm =~ m{class="address">([^<]+)</a>}so; my ($phone) = $firm =~ m{<p>\s+<a href='map\.aspx?[^>]+>[^ +<]+</a>\s+</p>\s+<p>(.+?)</p>}so; if ($phone) { $phone =~ s/\r\n/ /go; $phone =~ s/<br>/; /go; } my ($www) = $firm =~ m{<a href="http://[^"]+" target="_bla +nk">([^<]+)</a>}so; my ($email) = $firm =~ m{<a href="mailto:[^"]+">([^<]+)</a +>}so; foreach ($L1_rubrik, $L2_rubrik, $L3_rubrik, $city, $name, + $address, $phone, $www, $email) { if ($_) { s/^\s+//go; s/\s+$//go; s/\s+/ /go; } else { $_ = ""; } } open TEST, ">>", "2gis_$city_id.txt" or die $!; print TEST join("\t", ($L1_rubrik, $L2_rubrik, $L3_rubrik, + $city, $name, $address, $phone, $www, $email)), "\n"; close TEST; } } if ($firms->content =~ m{<img src="images/but_redo\.gif" borde +r="0">}o) { goto ORG_LIST; } } } }
Can you explain how usage of regex lead to out of memory (in my situation)?

In reply to Re^4: Out of Memory problem by Gangabass
in thread Out of Memory problem by Gangabass

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.