in reply to Re: HTML::Parse
in thread Getting HTML::Parse to work (was: HTML::Parse)
sub strip_html { require 5.002; $_[0] =~ s{ <! (.*?) ( -- .*? -- \s* )+ (.*?) > }{ +if ($1 || $3) { "<!$1 $3>"; } }gesx; $_[0] =~ s{ < (?: [^>'"] * | ".* +?" | '.*?' ) + > }{}gsx; $_[0] =~ s{ ( & ( \x23\d+ | \w+ ) ;? ) } { +$entity{$2} || $1 }gex; BEGIN { %entity = (lt=>'<',gt=>'>',amp=>'&', +quot=>'"',nbsp=>chr 160,iexcl=>chr 161,cent=>chr 162,pound=>chr 163,c +urren=>chr 164,yen=>chr 165,brvbar=>chr 166,sect=>chr 167,uml=>chr 16 +8,copy=>chr 169,ordf=>chr 170,laquo=>chr 171,not=>chr 172,shy=>chr 17 +3,reg=>chr 174,macr=>chr 175,deg=>chr 176,plusmn=>chr 177,sup2=>chr 1 +78,sup3=>chr 179,acute=>chr 180,micro=>chr 181,para=>chr 182,middot=> +chr 183,cedil=>chr 184,sup1=>chr 185,ordm=>chr 186,raquo=>chr 187,fra +c14=>chr 188,frac12=>chr 189,frac34=>chr 190,iquest=>chr 191,Agrave=> +chr 192,Aacute=>chr 193,Acirc=>chr 194,Atilde=>chr 195,Auml=>chr 196, +Aring=>chr 197,AElig=>chr 198,Ccedil=>chr 199,Egrave=>chr 200,Eacute= +>chr 201,Ecirc=>chr 202,Euml=>chr 203,Igrave=>chr 204,Iacute=>chr 205 +,Icirc=>chr 206,Iuml=>chr 207,ETH=>chr 208,Ntilde=>chr 209,Ograve=>ch +r 210,Oacute=>chr 211,Ocirc=>chr 212,Otilde=>chr 213,Ouml=>chr 214,ti +mes=>chr 215,Oslash=>chr 216,Ugrave=>chr 217,Uacute=>chr 218,Ucirc=>c +hr 219,Uuml=>chr 220,Yacute=>chr 221,THORN=>chr 222,szlig=>chr 223,ag +rave=>chr 224,aacute=>chr 225,acirc=>chr 226,atilde=>chr 227,auml=>ch +r 228,aring=>chr 229,aelig=>chr 230,ccedil=>chr 231,egrave=>chr 232,e +acute=>chr 233,ecirc=>chr 234,euml=>chr 235,igrave=>chr 236,iacute=>c +hr 237,icirc=>chr 238,iuml=>chr 239,eth=>chr 240,ntilde=>chr 241,ogra +ve=>chr 242,oacute=>chr 243,ocirc=>chr 244,otilde=>chr 245,ouml=>chr +246,divide=>chr 247,oslash=>chr 248,ugrave=>chr 249,uacute=>chr 250,u +circ=>chr 251,uuml=>chr 252,yacute=>chr 253,thorn=>chr 254,yuml=>chr +255); for $chr ( 0 .. 255 ) { $entity{ '#' . $chr } = chr $chr; } } r +eturn $_[0]; }
|
|---|