use strict;
use warnings;
my $name = 'PodMaster';
my $url = 'http://perlmonks.org/?node=PodMaster';
my $html = q~
PodMaster
PodMaster
Hi there PodMaster blah blah blah PodMaster
~;
print $/, untag_MOD( $html, $name, $url ), $/;
#http://perlmonks.org/?node_id=161281 modified for our purposes
sub untag_MOD {
local $_ = $_[0] || $_;
# ALGORITHM:
# find < ,
# comment ,
# or comment ... ?> ,
# or one of the start tags which require correspond
# end tag plus all to end tag
# or if \s or ="
# then skip to next "
# else [^>]
# >
# 1 is the entire "tag", add +1 to all numbers in comments
s{
( # podmaster
< # open tag
(?: # open group (A)
(!--) | # comment (1) or
(\?) | # another comment (2) or
(?i: # open group (B) for /i
( TITLE | # one of start tags
SCRIPT | # for which
APPLET | # must be skipped
OBJECT | # all content
STYLE # to correspond
) # end tag (3)
) | # close group (B), or
([!/A-Za-z]) # one of these chars, remember in (4)
) # close group (A)
(?(5) # if previous case is (4)
(?: # open group (C)
(?! # and next is not : (D)
[\s=] # \s or "="
["`'] # with open quotes
) # close (D)
[^>] | # and not close tag or
[\s=] # \s or "=" with
`[^`]*` | # something in quotes ` or
[\s=] # \s or "=" with
'[^']*' | # something in quotes ' or
[\s=] # \s or "=" with
"[^"]*" # something in quotes "
)* # repeat (C) 0 or more times
| # else (if previous case is not (4))
.*? # minimum of any chars
) # end if previous char is (4)
(?(2) # if comment (1)
(?<=--) # wait for "--"
) # end if comment (1)
(?(3) # if another comment (2)
(?<=\?) # wait for "?"
) # end if another comment (2)
(?(4) # if one of tags-containers (3)
# wait for end
(?i:\4) # of this tag
(?:\s[^>]*)? # skip junk to ">"
) # end if (3)
> # tag closed
)
([^<]*) # 6, text
}
'
my $ret = $1;
if( $6 ){
my $text = $6;
$text =~ s~\b(\Q$_[1]\E)\b~$1~g; # add link
$ret .= $text;
}
$ret;
'gsxe;
return $_ ? $_ : "";
}
__END__