use strict; use warnings; my $name = 'PodMaster'; my $url = 'http://perlmonks.org/?node=PodMaster'; my $html = q~ PodMaster

PodMaster

Hi there PodMaster blah blah blah PodMaster ~; print $/, untag_MOD( $html, $name, $url ), $/; #http://perlmonks.org/?node_id=161281 modified for our purposes sub untag_MOD { local $_ = $_[0] || $_; # ALGORITHM: # find < , # comment , # or comment , # or one of the start tags which require correspond # end tag plus all to end tag # or if \s or =" # then skip to next " # else [^>] # > # 1 is the entire "tag", add +1 to all numbers in comments s{ ( # podmaster < # open tag (?: # open group (A) (!--) | # comment (1) or (\?) | # another comment (2) or (?i: # open group (B) for /i ( TITLE | # one of start tags SCRIPT | # for which APPLET | # must be skipped OBJECT | # all content STYLE # to correspond ) # end tag (3) ) | # close group (B), or ([!/A-Za-z]) # one of these chars, remember in (4) ) # close group (A) (?(5) # if previous case is (4) (?: # open group (C) (?! # and next is not : (D) [\s=] # \s or "=" ["`'] # with open quotes ) # close (D) [^>] | # and not close tag or [\s=] # \s or "=" with `[^`]*` | # something in quotes ` or [\s=] # \s or "=" with '[^']*' | # something in quotes ' or [\s=] # \s or "=" with "[^"]*" # something in quotes " )* # repeat (C) 0 or more times | # else (if previous case is not (4)) .*? # minimum of any chars ) # end if previous char is (4) (?(2) # if comment (1) (?<=--) # wait for "--" ) # end if comment (1) (?(3) # if another comment (2) (?<=\?) # wait for "?" ) # end if another comment (2) (?(4) # if one of tags-containers (3) ]*)? # skip junk to ">" ) # end if (3) > # tag closed ) ([^<]*) # 6, text } ' my $ret = $1; if( $6 ){ my $text = $6; $text =~ s~\b(\Q$_[1]\E)\b~$1~g; # add link $ret .= $text; } $ret; 'gsxe; return $_ ? $_ : ""; } __END__