my $lproc = 621557; my @sids = get_sids($url, $pid, $lproc); #### sub get_sids{ my ($url, $pid, $lproc) = @_; my $page = 1; my $uri = URI->new($url); my ($i, @sids); while ($page){ # build the uri $uri->query_form(page => $page, pid => $pid); my $uri_string = $uri->as_string; # get the content, check for success my $content = get $uri->as_string; die qq{LWP get failed: $!\n} unless $content; # build the tree my $t = HTML::TreeBuilder->new_from_content($content) or die qq{new from content failed: $!\n}; # get a list of all anchor tags my @anchors = $t->look_down(_tag => q{a}) or die qq{no tables found in : $!\n}; # look at each anchor my $more = 1; # flag for my $anchor (@anchors){ # get the href my $href = $anchor->attr(q{href}); if ($href){ # test for a sid in the query fragment my $uri = URI->new($href); my %q = $uri->query_form; my $sid = $q{sid}; next unless $sid; # exit the while loop if it # is the last processed sid $more--, last if $sid == $lproc; # otherwise save it push @sids, $sid; } } last unless $more; # see if there is another page $page = get_next_page($t); # avoid accidental indefinite loops # hammering the server, adjust to suit die if $i++ > 5; } # send 'em back return @sids; }