use Encode qw( encode decode ); use URI::Escape qw( uri_escape ); # From DB my $title = decode('UTF-8', "OverlordQ/R\x{C4}\x{AB}ga-Herson-Astrahan"); # Escape each URL component. my @uri_components = map { uri_escape(encode('UTF-8', $_)) } split qr{/}, $title; # Prints OverlordQ/R%C4%ABga-Herson-Astrahan print(join('/', @uri_components), "\n"); #### DB<18> x $str 0 'http://en.wikipedia.org/w/api.php?prop=revisions&format=xml&titles=User:OverlordQ/Rīga-Herson-Astrahan&action=query&rvlimit=20' #### DB<24> x $str 0 'http://en.wikipedia.org/w/api.php?prop=revisions&format=xml&titles=User:OverlordQ/R%C3%84%C2%ABga-Herson-Astrahan&action=query&rvlimit=20' #### #!/usr/bin/perl use strict; use warnings; use lib '/home/overlordq/lib'; use LWP::UserAgent; use Data::Dumper; use DBI; use wikidb; $|++; my $ua = LWP::UserAgent->new(); my $dbh = DBI->connect("DBI:mysql:database=enwiki_p;host=sql-s1",$user,$password); my $query = "SELECT page_title FROM page WHERE page_title LIKE 'OverlordQ%' AND page_id = '22325873'"; my $sth = $dbh->prepare($query); $sth->execute(); my $title; while(my $ref = $sth->fetchrow_hashref() ) { $title = $ref->{'page_title'}; } print "Title: $title\n"; if( isUTF($title) ) { print "\tis UTF8\n"; } else { print "\tis not UTF8\n"; } my $res = $ua->post('http://en.wikipedia.org/w/api.php?prop=revisions&format=xml&titles=User:' . $title . '&action=query&rvlimit=20'); my $uriUsed = $res->request->uri->as_string; print "URI: $uriUsed\n"; if( isUTF($title) ) { print "\tis already UTF8\n"; } else { utf8::upgrade($title); if( isUTF($title) ) { print "$title\n\tis now UTF8\n"; } } $res = $ua->post('http://en.wikipedia.org/w/api.php?prop=revisions&format=xml&titles=User:' . $title . '&action=query&rvlimit=20'); $uriUsed = $res->request->uri->as_string; print "URI: $uriUsed\n"; print "Title: $title\n"; sub isUTF { my $string = shift; return utf8::is_utf8($string); }