#!/usr/local/bin/perl
use strict;
use warnings;
use XML::Simple;
use File::Find;
use HTML::TokeParser::Simple;
use Data::Dumper;
my $dir = "C:/perm/monks/files/html";
my $xmlfile = "C:/perm/monks/xmlfile/urlchange.xml";
my %lookup = get_lookup($xmlfile);
find(\&wanted, $dir);
sub wanted {
my $file = $_;
return unless -f $file;
my $rel = $File::Find::name;
$rel =~ s/$dir//;
return unless exists $lookup{$rel};
my $p = HTML::TokeParser::Simple->new($file)
or die "couldn't parse $file";
my $new_html;
while (my $t = $p->get_token){
if ($t->is_start_tag('a')){
my $href = $t->get_attr('href');
if ($lookup{$rel}{from} = $href){
$t->set_attr('href', $lookup{$rel}{to});
}
}
$new_html .= $t->as_is;
}
if (1){
print "$File::Find::name\n";
print "$new_html\n";
print '-' x 20, "\n";
}
# todo
# write new_html
}
sub get_lookup{
my ($xmlfile) = @_;
my $xml = new XML::Simple();
my $data = $xml->XMLin($xmlfile);
my %lookup;
for my $sheet (keys %{$data}){
my @records = @{$data->{$sheet}};
for my $record (@records){
$lookup{$record->{OriginPage}} = {
from => $record->{LinkToPage},
to => $record->{New_location},
}
}
}
return %lookup;
}
####
Sheet1
$VAR1 = {
'/meeting/series/index.asp' => {
'to' => 'new url',
'from' => 'http://quicklinkurl1/index.asp'
},
'/meeting/lunchtime-meeting/index.asp' => {
'to' => 'another changed url',
'from' => 'http://quicklinkurl2/index.asp'
},
'/meeting/index.asp' => {
'to' => 'xxxxxxx',
'from' => 'http://quicklinkurl1/index.asp'
}
};
####
index.asp
link
####
C:/perm/monks/files/html/meeting/index.asp
index.asp
link
--------------------
C:/perm/monks/files/html/meeting/lunchtime-meeting/index.asp
index.asp
link
--------------------
C:/perm/monks/files/html/meeting/series/index.asp
index.asp
link
--------------------