unless ($strXML =~ m{^\s*<\?xml [^\?]+?encoding\s*=\s*['"]([^'"]*)['"][^\?]*\?>} # starts by a and uc($1) ne 'UTF-8' # and there is something else than UTF-8 ) { print LOGFILE "It claims to be UTF-8.\n\n"; # so it should be UTF-8 yeah? Let's see ... if (!decode_utf8($strXML)) { print LOGFILE "Hey fix the encoding!!! This aint UTF-8!!!\n\n"; $strXML =~ s{^\s*(<\?xml [^\?]+?encoding\s*=\s*['"])[^'"]*(['"][^\?]*\?>)}{$1ISO-8859-1$2} or $strXML = qq{\n} . $strXML; } }