#!/usr/bin/perl use strict; use warnings; use HTML::Entities; use HTML::TokeParser; binmode(STDOUT, ":utf8"); local $/; my $lines = ; my $tok_par = HTML::TokeParser->new( \$lines ); my $tok_inf = $tok_par->get_token ; my $tok_typ = shift @{$tok_inf}; print "Type: $tok_typ \n" ; my $title = $tok_par->get_text() || ""; print "Title: $title \n" ; my $encoded_title = encode_entities( $title, '\x{ff}-\x{ffff}' ); print "Enc_Title: $encoded_title\n"; __END__ egrave: è : eacute: é : rsquo: ’ : lsquo: ‘