s{()}{$1}i;
####
# $link_target and $url_just_fetched are known...
my $full_target;
if ( $link_target =~ m{^/} ) {
# initial "/" means "relative to doc-root":
my $docroot = $url_just_fetched;
$docroot =~ s{(?<=[^/]/)[^/].*}{}; # delete everything after first single slash
$full_target = $docroot . $link_target;
}
elsif ( $link_target !~ /^http/ ) {
# it's presumably relative to the url just fetched, so:
if ( $url_just_fetched =~ m{/$} ) {
$full_target = $url_just_fetched . $link_target;
}
# this is the tricky part:
elsif ( $url_just_fetched =~ /[?&=;:]|\.htm/ ) # probably not a directory name...
{
my $last_slash = rindex( $url_just_fetched, "/" ) + 1;
$full_target = substr( $url_just_fetched, 0, $last_slash ) . $link_target;
}
else # assume its a directory name {
$full_target = join "/", $url_just_fetched, $link_target;
}
}
# (if $link_target does start with "http", then it's probably complete already)
# last step:
$full_target =~ s/\#.*//; # in case the link target is a named anchor within a page