m|^\s*<[^/>]+>(.+)| and $_=$1; # Zap tags on both sides, if any
# The line above looks for text enclosed in html tokens, and extracts the text.
# Eg: applying the regex to : "
Some text
" places "Some text" into "$1", which is then copied into "$_"
s|<[^>]+>||g; # Zap single tags
# The line above handles left-over single tags:
# Eg: it zaps "" from "text1 text2"
# Actually, it is rather crude, and does not care about tag termination, or matching.
####
$collected_text .= $_;