#With $_ holding the HTML text... #Pull comments. Note that # `<!-- foo="--> bar <--" -->' will NOT strip ` bar '. # I claim this to be a feature. s/<!--.*?-->//g; #for comments like <blah blah="blah" blah='blah' ... >, # strip from after the start of the tag up to the end # of the first quoted string, repeatedly, ending in either # `<>' or `<no quotes here>' # Update: Now handles either quote char, with the other # possibly within the quoted string. while ( s/<(?!--)[^'">]*"[^"]*"/</g or s/<(?!--)[^'">]*'[^']*'/</g) {}; #strip HTML tags without quotes in them... which should be # the only kind that we have left. s/<(?!--)[^">]*>//g; print $_;
In reply to Strip HTML tags by rlk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |