#!/usr/local/bin/perl use strict; use warnings; use XML::Simple; use File::Find; use HTML::TokeParser::Simple; use Data::Dumper; my $dir = "C:/perm/monks/files/html"; my $xmlfile = "C:/perm/monks/xmlfile/urlchange.xml"; my %lookup = get_lookup($xmlfile); find(\&wanted, $dir); sub wanted { my $file = $_; return unless -f $file; my $rel = $File::Find::name; $rel =~ s/$dir//; return unless exists $lookup{$rel}; my $p = HTML::TokeParser::Simple->new($file) or die "couldn't parse $file"; my $new_html; while (my $t = $p->get_token){ if ($t->is_start_tag('a')){ my $href = $t->get_attr('href'); if ($lookup{$rel}{from} = $href){ $t->set_attr('href', $lookup{$rel}{to}); } } $new_html .= $t->as_is; } if (1){ print "$File::Find::name\n"; print "$new_html\n"; print '-' x 20, "\n"; } # todo # write new_html } sub get_lookup{ my ($xmlfile) = @_; my $xml = new XML::Simple(); my $data = $xml->XMLin($xmlfile); my %lookup; for my $sheet (keys %{$data}){ my @records = @{$data->{$sheet}}; for my $record (@records){ $lookup{$record->{OriginPage}} = { from => $record->{LinkToPage}, to => $record->{New_location}, } } } return %lookup; } #### Sheet1 $VAR1 = { '/meeting/series/index.asp' => { 'to' => 'new url', 'from' => 'http://quicklinkurl1/index.asp' }, '/meeting/lunchtime-meeting/index.asp' => { 'to' => 'another changed url', 'from' => 'http://quicklinkurl2/index.asp' }, '/meeting/index.asp' => { 'to' => 'xxxxxxx', 'from' => 'http://quicklinkurl1/index.asp' } }; #### index.asp link #### C:/perm/monks/files/html/meeting/index.asp index.asp link -------------------- C:/perm/monks/files/html/meeting/lunchtime-meeting/index.asp index.asp link -------------------- C:/perm/monks/files/html/meeting/series/index.asp index.asp link --------------------