The Japanese strings come back with a Unicode hex representation of each characters, as found here.
What does that mean?
Are the bytes wrong, or are you getting html entities, or what?
Please make sure your code compiles and structure your code as an effective bug report aka a test-case, for example
#!/usr/bin/perl -- use WWW::Scripter; use Encode qw' from_to '; use Jcode; use URI::file; use File::Temp; use Test::More tests => 3; my $fh = File::Temp->new( SUFFIX => '.html' ); my $filename = $fh->filename ; my $uri = URI::file->new_abs( $filename )->as_string; print $fh <<"__HTML__"; <html><head> <title> title \x62\x6c\x61\x68 </title> </head><body> \x62\x6c\x61\x68\x20\x62\x6c\x61\x68\x20\x62\x6c\x61\x68 \xFF\xFF\xFF </body></html> __HTML__ ok(close $fh, "write tempfile "); my $w = WWW::Scripter->new ( qw/ autocheck 1 /); #~ my $w = WWW::Mechanize->new ( qw/ autocheck 1 /); $w->get( $uri ); my $html1 = $w->content(); from_to( $html1, 'utf8', 'euc-jp' ); my $html2 = Jcode->new( $w->content() )->euc(); is( $html1, "something", "something blah"); is( $html2, "something else", "something else blah"); __END__ $ prove pm.911748.pl pm.911748.pl .. 1/3 # Failed test 'something blah' # at pm.911748.pl line 43. # got: '<html><head> # <title> title blah </title> # </head><body> # blah blah blah # ÿÿÿ # </body> # </html>' # expected: 'something' # Failed test 'something else blah' # at pm.911748.pl line 44. # got: '<html><head> # <title> title blah </title> # </head><body> # blah blah blah # ÿÿÿ # </body> # </html>' # expected: 'something else' # Looks like you failed 2 tests of 3. pm.911748.pl .. Dubious, test returned 2 (wstat 512, 0x200) Failed 2/3 subtests Test Summary Report ------------------- pm.911748.pl (Wstat: 512 Tests: 3 Failed: 2) Failed tests: 2-3 Non-zero exit status: 2 Files=1, Tests=3, 1 wallclock secs ( 0.06 usr + 0.00 sys = 0.06 CPU +) Result: FAIL
In reply to Re: WWW::Mechanize & encoding
by Anonymous Monk
in thread WWW::Mechanize & encoding
by GaijinPunch
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |