#! perl -sw use strict; use LWP::Simple; my $content = get( 'http://www.columbia.edu/kermit/utf8.html' ); { use utf8; my $c_len = length $content; my @c_bytes = unpack 'C*', $content; my @c_chars = unpack 'U*', $content; print "Charwise - length:$c_len; 'C*':", scalar @c_bytes, "; 'U*':", scalar @c_chars, $/; } { use bytes; my $b_len = length $content; my @b_bytes = unpack 'C*', $content; my @b_chars = unpack 'U*', $content; print "Bytewise - length:$b_len; 'C*':", scalar @b_bytes, "; 'U*':", scalar @b_chars, $/; } { open JUNK, '>', 'junk' or die $!; binmode(JUNK); print JUNK $content; close JUNK; print 'Actual (from os): ', -s 'junk', $/; } __END__ C:\test>239788 Charwise - length:31946; 'C*':31946; 'U*':28621 Bytewise - length:31946; 'C*':31946; 'U*':28621 Actual (from os): 31946