utf8::decode($string); #### use 5.010001; use LWP::UserAgent; my $url = 'http://publib.boulder.ibm.com/infocenter/brjrules/v7r0m3/basic/tocView.jsp?toc=/com.ibm.websphere.ilog.jrules.doc/toc.xml'; my $content = LWP::UserAgent->new->get($url)->content; utf8::decode($content); $content =~ s { ([\x00-\x08\x0B\x0C\x0E-\x1F\x80-\x{1FFFFF}]) } { sprintf('[U+%04X]', ord($1)) }gex; print $content;