comment on

OK, I try it. Nothing changes :-(
Here is my new code:

#!/usr/bin/perl

use strict;
use warnings;
use WWW::Mechanize;

my $city_id = 9;
my $main_catalogue_url = "http://somesite.ru/catalog.aspx?&cityId=$cit
+y_id";
my $city = "CityName";
my $mech = WWW::Mechanize->new();
#$mech->stack_depth(0); #i think this is a problem
$mech->get( $main_catalogue_url );
#getting first level links
my (@L1_list) = $mech->find_all_links( url_regex => qr/catalog\.aspx\?
+rubricId=\d+/io ); #i add /o switch. why? i'm don't know :-)
foreach my $L1_link (@L1_list) {
    my $L1_rubrik = $L1_link->text(); #first level rubrik name
    $mech->get( $L1_link->url() );
    #getting second level links
    my (@L2_list) = $mech->find_all_links( url_regex => qr/catalog\.as
+px\?rubricId=\d+/io ); #/o switch again. is this right?
    foreach my $L2_link (@L2_list) {
    my $L2_rubrik = $L2_link->text(); #second level rubrik name
    $mech->get( $L2_link->url() );
    #getting third level links
    my (@L3_list) = $mech->find_all_links( url_regex => qr/catalog\.as
+px\?rubricId=\d+/io ); #yes, /o switch
    foreach my $L3_link (@L3_list) {
        print "=" x 20, "\n";
        my $L3_rubrik = $L3_link->text(); #third level rubrik name
        my $offset = 0;
        #getting rubriks results
        ORG_LIST:
        my $firms = $mech->get( $L3_link->url() . "&offset=" . $offset
+++ );
        my (@firms) = $mech->find_all_links( url_regex => qr/catalog\.
+aspx\?firmId=\d+/io );
        foreach my $FIRM_link (@firms) {
        my $res = $mech->get( $FIRM_link->url() );
        my ($name) = $res->content =~ m{<h1>([^<]+)</h1>}so;
        my $result = $res->content;
        while ($result =~ m{<p></p>(\s+<p>.+?</p>\s+<p>.+?</p>\s+<p>.+
+?</p>)}sgo) {
            my $firm = $1;
            my ($address) = $firm =~ m{class="address">([^<]+)</a>}so;
            my ($phone) = $firm =~ m{<p>\s+<a href='map\.aspx?[^>]+>[^
+<]+</a>\s+</p>\s+<p>(.+?)</p>}so;
            if ($phone) {
            $phone =~ s/\r\n/ /go;
            $phone =~ s/<br>/; /go;
            }
            my ($www) = $firm =~ m{<a href="http://[^"]+" target="_bla
+nk">([^<]+)</a>}so;
            my ($email) = $firm =~ m{<a href="mailto:[^"]+">([^<]+)</a
+>}so;
            foreach ($L1_rubrik, $L2_rubrik, $L3_rubrik, $city, $name,
+ $address, $phone, $www, $email) {
            if ($_) {
                s/^\s+//go;
                s/\s+$//go;
                s/\s+/ /go;
            }
            else {
                $_ = "";
            }
            }
            open TEST, ">>", "2gis_$city_id.txt" or die $!;
            print TEST join("\t", ($L1_rubrik, $L2_rubrik, $L3_rubrik,
+ $city, $name, $address, $phone, $www, $email)), "\n";
            close TEST;
        }
        }
        if ($firms->content =~ m{<img src="images/but_redo\.gif" borde
+r="0">}o) {
        goto ORG_LIST;
        }
    }
    }
}
[download]

Can you explain how usage of regex lead to out of memory (in my situation)?

In reply to Re^4: Out of Memory problem by Gangabass
in thread Out of Memory problem by Gangabass

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.