# a fancy version of "byte-swapping", combined with "wc" # (not suitable unless you know the input is UTF-16LE): open( INP, "<:UTF-16LE", "input.file" ); open( OUT, ">:UTF-16BE", "output.file" ); my ( $lines, $words, $chars ); while () { $lines++; $words += scalar( split ); # we're using utf-8 now... $chars += length(); # counts _characters_ -- NOT BYTES print OUT; } printf( "%7d %7d %7d\n", $lines, $words, $chars );