#!/usr/bin/perl -w ###################################################################### # # Parse-href.pl 2001-05 # # Grab the content of all "href" attributes of the HTML "" tag, # Insert a redirection URL in the "href" and then UrlEncode the Old # Url to pass it in parameter. # # Greetings flys out to OeufMayo for his help. # # Nicolas Crovatti # http://www.gencoding.com # ###################################################################### use strict; use URI::Escape; use warnings; use CGI ':standard'; my @forminputs; my $JavascriptLinks; my %results; my $name; my $html; my $OUTHTML; my $INHTML; my $dDate = time(); my $RedirURL = "http://127.0.0.1/rapport/rnews.php?dt=" . $dDate . "&url="; my $File = param("file"); { package myParser; use base qw(HTML::Parser); sub start { # We fill Scalars using predefined array "@_" #################################################################### my ($self, $tagname, $attr, $attrseq, $origtext) = @_; my $at; # For each tag, we grab the content of the "href" attribute, then we # uri_escape it with URI::Escape module, and we rebuild the integrality # of the tag including our $RedirURL . ########################################################################## if ($tagname eq 'a'){ #Here, if we reach a tag,i don't want to parse '' #links, we incremment the $LiensJavascript counter. ################################################################### if ($attr->{href} && $attr->{href} eq "#") { $JavascriptLinks++; print $origtext; } else { $attr->{href} = $RedirURL . main::uri_escape($attr->{href}, "^A-Za-z0-9"); print '"; } } else { # If we don't find tag, we print the original text ###################################################### print $origtext; } } sub end { my ($self, undef, $origtext) = @_; print $origtext; } sub text { my ($self, $origtext) = @_; print $origtext; } } # Getting all params #################### @forminputs=param(); foreach $name (@forminputs){ $results{$name}=param($name); } # Opening the HTML file in read mode only ############################################ open(INHTML, "<$File") || die <<"EOT_"; -[Erreur prevue]--------------------- Usage : perl parse.pl file=file_name.htm $! ------------------------------------- EOT_ open(OUTHTML, ">News-ok.htm") || die <<"EOT_"; -[Erreur]---------------------------- Cannot open file: $! ------------------------------------- EOT_ # $html is filled with all the content of the input file ######################################################## while ($_=){$html .= $_;} close INHTML; # Initialising the Parser ######################### my $p = myParser->new(); # Parsing $html ############### select OUTHTML; $p->parse($html); select STDOUT; close OUTHTML; print " -[Result ok]--------------- Everything goes Ok! not modified links : $JavascriptLinks -[eot]----------------------------- "; exit;