May be u missed on the idea. nothing was written. I have attached, the complete source. please check and let me know.
#! /usr/bin/perl5 -w #use strict; use WWW::Mechanize; use WWW::Mechanize::Link; use LWP::Simple; use Thread; my $mech = WWW::Mechanize->new(autocheck => 0 , onerror =>undef ,cooki +e_jar => {}); my $item_mech = WWW::Mechanize->new(autocheck => 0, onerror =>undef , +cookie_jar => {}); my $download_mech = WWW::Mechanize->new(autocheck => 0, onerror =>unde +f , cookie_jar => {}); my $condFull = ""; my $QueryArea = "([A] = ZXWUNDOC)"; my $FH = "(Base = UNDOC)"; my $EH = "(Database = UNDOC)"; my $RH = "(???? ?????? = UNDOC)"; my $AH = "(????? ???????? = UNDOC)"; my $SH = "(Base de datos = UNDOC)"; my $CH = "(??? = UNDOC)"; my $fullText = ""; my $query = ""; my $QueryFullText = ""; my $fields = ""; my $response; my $r; my $number_of_files = 0; my $max_trails = 0; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(tim +e); $year = $year + 1900; my $date_tag = $mday."_".$mon."_".$year."_".$hour."_".$min."_unDocs"; mkdir($date_tag, 0777); my $entry = "President"; do { $response = $mech->get('http://documents.un.org/welcome.asp?la +nguage=E'); }while(!$response->is_success); $mech->click('simple1'); do { $response = $mech->get('http://documents.un.org/simple.asp'); }while(!$response->is_success); mkdir($date_tag."\\".$entry, 0777); my $t = Thread->new( \&parser_thread, $entry); my $retval = $t->join(); ###################################################################### # # # ###################################################################### sub parser_thread { print $_[0]."\n"; $fullText = "\"".$_[0]."\""; if($fullText ne "") { $fullText = "\"" . $fullText . "\""; } if ($fullText ne "") { $FH = ($FH eq "") ? "(texte recherché = " . $fullText . ")" : +FH . " ET" . " (texte recherché = " . $fullText . ")"; $EH = ($EH eq "") ? "(Full Text search text = " . $fullText . +")" : EH . " AND" . " (Full Text search text = " . $fullText . ")"; $RH = ($RH eq "") ? "(????? ?????? = " . $fullText . ")" : RH +. " ?" . " (????? ?????? = " . $fullText . ")"; $AH = ($AH eq "") ? "(?? ????? = " . $fullText . ")" : AH . " +? (?? ????? = " . $fullText . ")"; $SH = ($SH eq "") ? "(Texto buscado = " . $fullText . ")" : SH + . " AND" . " (Texto buscado = " . $fullText . ")"; $CH = ($CH eq "") ? "(???? = " . $fullText . ")" : CH . " ?" . + " (???? = " . $fullText . ")"; } $condFull = ($fullText eq "") ? "" : "([PDF]=" . $fullText . ")"; $QueryFullText = $condFull; $query = $QueryArea . " AND " . $QueryFullText; $fields = { 'FullTextSearch' => "convention", 'lastevent' => "simple.search", 'dbsearch' => "global.mother", 'pagename' => "simple", 'query2' => $query, 'frenchHeader' => $FH, 'englishHeader' => $EH, 'russianHeader' => $RH, 'arabicHeader' => $AH, 'spanishHeader' => $SH, 'chineseHeader' => $CH, 'Plang' => 'E' }; do { $r = $mech->submit_form(form_name => "DMKsform", fields => $fields); }while(!$r->is_success); $fields = { 'lastevent' => "results.display", 'pagename' => "results", 'target' => "results.asp", }; $max_trails = 0; do { $r = $mech->submit_form(form_name => "ResultsForm", fields => $fields); $max_trails++; }while((!$r->is_success) and ($max_trails < 10)); &decode_text(); print "Have come out\n"; } ###################################################################### # # # ###################################################################### sub decode_text { my $p = HTML::TokeParser->new(\$mech->content); my $item_dir = ""; do { $p = HTML::TokeParser->new(\$mech->content); while (my $tag_0 = $p->get_tag('td')) { if($tag_0->[1]{width} and $tag_0->[1]{width} eq "90") { $number_of_files++; my $tr_1 = Thread->new( \&item_page, $number_of_files) +; my $tr_ret = $tr_1->join(); #&item_page($number_of_files); } } $fields = { 'lastevent' => "results.next", 'pagename' => "results", 'target' => "results.asp", }; if($mech->content =~ /next/ ) { $max_trails = 0; do { $r = $mech->submit_form(form_name => "ResultsForm" +, fields => $fields); $max_trails++; print $max_trails."\n"; }while((!$r->is_success) and ($max_trails < 10)); $max_trails = 0; do { $response = $mech->get('http://documents.un.org/re +sults.asp'); $max_trails++; print $max_trails."\n"; }while((!$response->is_success) and ($max_trails < 10) +); $max_trails = 0; print $response->header_field_names()."\n"; } else { print "exit condition\n"; $r = -1; } }while($r != -1); #print DEBUG "\n\nnumber_of_files : $number_of_files\n\n"; } ###################################################################### # # # ###################################################################### sub item_page { my $check = shift; my $r_1; $item_mech = $mech->clone(); my $fields_1 = { 'lastevent' => "results.view", 'doc2view' => $check }; #print $item_mech->current_form()."\n"; #print $item_mech->form_name('ResultsForm')."\n"; while($item_mech->current_form() != $item_mech->form_name('Results +Form')) { sleep(5); $item_mech->reload(); print "form found\n"; } $max_trails = 0; do { $r_1 = $item_mech->submit_form(form_name => "ResultsForm", fields => $fields_1); $max_trails++; }while((!$r_1->is_success) and ($max_trails < 10 )); $max_trails = 0; do { $response = $item_mech->get('http://documents.un.org/parent_do +cument.asp'); $max_trails++; }while((!$response->is_success) and ($max_trails < 10 )); &decode_itempage(); } ###################################################################### # # # ###################################################################### sub decode_itempage { #print $item_mech->content(format => 'text'); my $tag = ""; my $item_link; my $title =""; my $symbol =""; my $pub_date =""; my $item_dir = ""; my $temp_dir = ""; my $lang_name =""; my $tag_1 = ""; my $eng = ""; my $fre = ""; my $rus = ""; my $spa = ""; my $arb = ""; my $chi = ""; my $leading_zero = sprintf( "%08d", $number_of_files); my $page = HTML::TokeParser->new(\$item_mech->content); while($page->get_tag('b')) { $tag = $page->get_trimmed_text('/font'); if($tag =~ m/Title/) { $item_link = $page->get_tag('font'); $title = $page->get_trimmed_text('/font'); $item_dir = substr($title,0,10); if(!$item_dir) { $item_dir = "No_Title"; } else { $item_dir =~ s/ /_/g; } $temp_dir = $date_tag."/".$entry."/".$leading_zero."_".$it +em_dir; mkdir($temp_dir, 0777); } if($tag =~ m/Symbol/) { $page->get_tag('font'); $symbol = $page->get_trimmed_text('/font'); } if($tag =~ m/Publication/) { $page->get_tag('font'); $pub_date = $page->get_trimmed_text('/font'); } } $page = HTML::TokeParser->new(\$item_mech->content); while($page->get_tag('b')) { $tag = $page->get_trimmed_text('/b'); if($tag =~ m/PDF/) { while($tag_1 = $page->get_tag('a')) { $lang_name = $page->get_trimmed_text('/a'); if($tag_1->[1]{href} =~ m/pdf/) { if($lang_name =~ m/English/) { $eng = "Eng Present"; my $dw1 = Thread->new( \&download_file,$tag_1- +>[1]{href},$temp_dir); $dw1->detach(); #&download_file($tag_1->[1]{href},$temp_dir); } # if($lang_name =~ m/French/) # { # #print $tag_1->[1]{href}."\n"; # $fre = "French Present"; # my $dw2 = Thread->new( \&download_file,$tag_ +1->[1]{href},$temp_dir); # $dw2->detach(); # } # if($lang_name =~ m/Russian/) # { # #print $tag_1->[1]{href}."\n"; # $rus = "Russian Present"; # my $dw3 = Thread->new( \&download_file,$tag_ +1->[1]{href},$temp_dir); # $dw3->detach(); # } # if($lang_name =~ m/Spanish/) # { # $spa = "Spanish Present"; # my $dw4 = Thread->new( \&download_file,$tag_ +1->[1]{href},$temp_dir); # $dw4->detach(); # } # if($lang_name =~ m/Arabic/) # { # $arb = "Arabic Present"; # my $dw5 = Thread->new( \&download_file,$tag_ +1->[1]{href},$temp_dir); # $dw5->detach(); # } # if($lang_name =~ m/Chinese/) # { # my $dw6 = Thread->new( \&download_file,$tag_ +1->[1]{href},$temp_dir); # $dw6->detach(); + # $chi = "Chinese Present"; # } } } } } if(open(TEXT,">>".$temp_dir."/".$leading_zero."_".$item_dir.".txt" +)) { print TEXT $leading_zero." ".$title."\n"; print TEXT $symbol."\n"; print TEXT $pub_date."\n"; print TEXT $eng."\n"; print TEXT $fre."\n"; print TEXT $rus."\n"; print TEXT $spa."\n"; print TEXT $arb."\n"; print TEXT $chi."\n"; close(TEXT); } } ###################################################################### # # # Subroutine : to download the final pdf files # Parameters : Link to the file and the path for the file to be saved # Note : this will be a detached thread. # # ###################################################################### sub download_file { my $link = shift; $item_mech->add_header( Accept => "text/html" ); $item_mech->add_header( Referer => $link ); # my $fields_1 = { # "lastevent" => "mother.display", # "pagename" => "mother", # "target" => "parent_document.asp", # "titlehidden" => "caca" # }; # $item_mech->submit_form(form_name => "ReultsForm", fields => $fi +elds_1); $item_mech->get($link,":content_file" => 'test.pdf'); #print $item_mech->content(); } # sub download_file # { # my $link = shift; # my $location = shift; # my @link_test = $item_mech->find_all_links(url_regex => qr/pdf/) +; # my @file_name = split(/\//, $link); # my @name = split(/\?/,$file_name[$#file_name]); # my $file = $location."/".$name[0]; # my @link_split = split(/\?/,$link); # chomp($link); # $item_mech->default_header(Accept => "text/*"); # $item_mech->get($link_test[0],":content_file" => $file); # } # getstore( $link , $file ); # $item_mech->cookie_jar(HTTP::Cookies->new); # $item_mech->add_header('UID' => 'phil', 'cn' => 'CN', 'id' => '7 +77'); # $item_mech->add_header('Connection' => 'keep_alive');

In reply to Re^6: Perl Mechanize - Header Help required. by smilingsagar
in thread Perl Mechanize - Header Help required. by smilingsagar

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.