#################################################################### # Look at the first bytes of the content # FE FF -> utf-16be # FF FE -> utf-16le # EF BB BF -> utf-8 return "utf-16be" if $resp->content =~ m/^\xFE\xFF/; return "utf-16le" if $resp->content =~ m/^\xFF\xFE/; return "utf-8" if $resp->content =~ m/^\xEF\xBB\xBF/; #################################################################### # Use the header # content-type: text/html; charset=XXX my $ct = $resp->{_headers}->{'content-type'} || ""; my ($cs) = lc($ct) =~ m/\s+charset\s*=\s*["']*([^\s'";]*)/; $cs =~ s/utf=/utf\-/g if defined($cs); return $cs if defined($cs) && $cs ne ""; #################################################################### # Use the META tag # ($ct) = $resp->content =~ m/content =~ m/[^\x00-\x7F]/; return "iso-8859-1";