Takamoto has asked for the wisdom of the Perl Monks concerning the following question:
I am parsing the JSON file returned by a service, and I can not get rid of the error "malformed UTF-8 character in JSON string". Any advice is welcomed. I see there are strange escapes in the JSON, but this is how data are returned....
use LWP::UserAgent; use JSON; use Data::Dumper; my $url = "http://127.0.0.1:8000/complete/"; my $data = {text => "my word"}; my $ua = LWP::UserAgent->new; # Creating a POST request my $req = HTTP::Request->new(POST => $url); $req->header('Content-Type' => 'application/json'); $req->content(encode_json($data)); # Sending request my $response = $ua->request($req); print Dumper $response; if ($response->is_success) { print "JSON data was successfully returned!\n"; my $content = decode_json($response->content); # Extract the JSON string containing the terms my $json_string = $content->{'response'}{'choices'}[0]{'message'}{ +'content'}; $json_string = (split "\n\n", $json_string)[0]; # Load the JSON string into a hash my $json_data = decode_json($json_string); # Extract the terms from the hash my $terms = $json_data->{'terms'}; # Print the terms foreach my $term (@$terms) { print "$term\n"; } } else { print "Error: " . $response->status_line . " - " . $response->cont +ent . "\n"; }
This is what I get
$VAR1 = bless( { '_protocol' => 'HTTP/1.1', '_content' => '{"status":"SUCCESS","data":"promt","re +sponse":{"id":"xxx","object":"chat.completion","created":1690752087," +model":"gpt-3.5-turbo-0613","choices":[{"index":0,"message":{"role":" +assistant","content":"{\\n \\"related_words\\": [\\n \\"Bundeskan +zlerin\\",\\n \\"Politik\\",\\n \\"Deutschland\\",\\n \\"CDU +\\",\\n \\"Kanzleramt\\",\\n \\"Bundesregierung\\",\\n \\"Po +litikerin\\",\\n \\"Bundestag\\",\\n \\"Regierung\\",\\n \\" +Partei\\",\\n \\"Wahl\\",\\n \\"Bundeskanzler\\",\\n \\"Euro +päische Union\\",\\n \\"Bundesrepublik\\",\\n \\"Europa\\",\\n + \\"Führungsperson\\",\\n \\"Staatschefin\\",\\n \\"Frauenpol +itik\\",\\n \\"G8-Gipfel\\",\\n \\"Macht\\"\\n ]\\n}"},"finish +_reason":"stop"}],"usage":{"prompt_tokens":48,"completion_tokens":145 +,"total_tokens":193}}}', '_msg' => 'OK', '_request' => bless( { '_content' => '{"text":"prompt +"}', '_headers' => bless( { 'user-a +gent' => 'libwww-perl/6.62', 'conten +t-type' => 'application/json' }, 'HTTP: +:Headers' ), '_uri_canonical' => bless( do{ +\(my $o = 'http://127.0.0.1:8000/complete/')}, 'URI::http' ), '_method' => 'POST', '_uri' => $VAR1->{'_request'}{ +'_uri_canonical'} }, 'HTTP::Request' ), '_rc' => '200', '_headers' => bless( { 'client-peer' => '127.0.0.1:80 +00', 'client-response-num' => 1, '::std_case' => { 'client-date +' => 'Client-Date', 'client-peer +' => 'Client-Peer', 'client-resp +onse-num' => 'Client-Response-Num' }, 'content-length' => '894', 'connection' => 'close', 'date' => 'Sun, 30 Jul 2023 21 +:21:25 GMT', 'client-date' => 'Sun, 30 Jul +2023 21:21:31 GMT', 'content-type' => 'application +/json', 'server' => 'uvicorn' }, 'HTTP::Headers' ) }, 'HTTP::Response' ); JSON data was successfully returned! malformed UTF-8 character in JSON string, at character offset 242 (bef +ore "\x{fffd}che Union",\n...") at /Users/post.pl line 29.
PS: my code is ported from Python. In the Python code there is no complaining about malformed UTF-8
|
---|
Replies are listed 'Best First'. | |
---|---|
Re: malformed UTF-8 character in JSON string
by Haarg (Priest) on Jul 31, 2023 at 01:24 UTC | |
by cavac (Prior) on Jul 31, 2023 at 06:03 UTC | |
Re: malformed UTF-8 character in JSON string
by haj (Vicar) on Jul 30, 2023 at 22:34 UTC |