in reply to Re^2: HTML::TokeParser, get_text scrambling rsquo and lsquo
in thread HTML::TokeParser, get_text scrambling rsquo and lsquo
#!/usr/bin/perl use strict; use warnings; use HTML::Entities; use HTML::TokeParser; binmode(STDOUT, ":utf8"); local $/; my $lines = <DATA>; my $tok_par = HTML::TokeParser->new( \$lines ); my $tok_inf = $tok_par->get_token ; my $tok_typ = shift @{$tok_inf}; print "Type: $tok_typ \n" ; my $title = $tok_par->get_text() || "<NO TITLE FOUND>"; print "Title: $title \n" ; my $encoded_title = encode_entities( $title, '\x{ff}-\x{ffff}' ); print "Enc_Title: $encoded_title\n"; __END__ <title>egrave: è : eacute: é : rsquo: ’ : lsquo: & +lsquo;</title>
|
|---|