# $link_target and $url_just_fetched are known... my $full_target; if ( $link_target =~ m{^/} ) { # initial "/" means "relative to doc-root": my $docroot = $url_just_fetched; $docroot =~ s{(?<=[^/]/)[^/].*}{}; # delete everything after first single slash $full_target = $docroot . $link_target; } elsif ( $link_target !~ /^http/ ) { # it's presumably relative to the url just fetched, so: if ( $url_just_fetched =~ m{/$} ) { $full_target = $url_just_fetched . $link_target; } # this is the tricky part: elsif ( $url_just_fetched =~ /[?&=;:]|\.htm/ ) # probably not a directory name... { my $last_slash = rindex( $url_just_fetched, "/" ) + 1; $full_target = substr( $url_just_fetched, 0, $last_slash ) . $link_target; } else # assume its a directory name { $full_target = join "/", $url_just_fetched, $link_target; } } # (if $link_target does start with "http", then it's probably complete already) # last step: $full_target =~ s/\#.*//; # in case the link target is a named anchor within a page