#!/usr/bin/perl -w use strict; use WWW::Mechanize; use HTML::TokeParser; my $login_un = "xxxxxxxx"; my $login_pwd ="yyyyyyyy"; my $agent = WWW::Mechanize->new(); $agent->get("http://somedomain.com"); $agent->form(1); $agent->field("login_un", $login_un); $agent->field("login_pwd", $login_pwd); $agent->click(); searchHTML(); #my $stream = HTML::TokeParser->new("source.html")|| die "Can't open: $!"; # # #Open # # sub searchHTML(){ my $stream = HTML::TokeParser->new(\$agent->{content}); while (my $token = $stream ->get_token) { # start searching from if($token->[0] eq "C") # start tag? { my $comment = $token->[1]; #print ("\n\nFound a comment $comment\n\n" ); if ($comment eq "") { print("FOUND $comment"); }; } ### search the A tags my $ttype = shift @{ $token }; if($ttype eq "S") # start tag? { my($tag, $attr, $attrseq, $rawtxt) = @{ $token }; if($tag eq "a") { my $a_href = $attr->{'href'}; if ($a_href =~ m/fnno/) { #this filters the correct links print("link found: $a_href \n\n"); $agent->get($a_href); #searchHTML(); }; } } ### end searching the A tags } print("All finished\n"); } # close searchHTML sub ############# comments #################### #1 search through html until the comment #2 find the next A href link #3 click the link it is associated with (this will expand a menu option ) #4 reload the page showing the expanded menu option (wait while this happens #5 if there are more links left then repeat the steps 1 -4 . #6 if there are no more nodes closed then print html to a file and exit function. ############## end comments ##################