use Encode qw( encode decode );
use URI::Escape qw( uri_escape );
# From DB
my $title = decode('UTF-8', "OverlordQ/R\x{C4}\x{AB}ga-Herson-Astrahan");
# Escape each URL component.
my @uri_components = map { uri_escape(encode('UTF-8', $_)) }
split qr{/},
$title;
# Prints OverlordQ/R%C4%ABga-Herson-Astrahan
print(join('/', @uri_components), "\n");
####
DB<18> x $str
0 'http://en.wikipedia.org/w/api.php?prop=revisions&format=xml&titles=User:OverlordQ/Rīga-Herson-Astrahan&action=query&rvlimit=20'
####
DB<24> x $str
0 'http://en.wikipedia.org/w/api.php?prop=revisions&format=xml&titles=User:OverlordQ/R%C3%84%C2%ABga-Herson-Astrahan&action=query&rvlimit=20'
####
#!/usr/bin/perl
use strict;
use warnings;
use lib '/home/overlordq/lib';
use LWP::UserAgent;
use Data::Dumper;
use DBI;
use wikidb;
$|++;
my $ua = LWP::UserAgent->new();
my $dbh = DBI->connect("DBI:mysql:database=enwiki_p;host=sql-s1",$user,$password);
my $query = "SELECT page_title FROM page WHERE page_title LIKE 'OverlordQ%' AND page_id = '22325873'";
my $sth = $dbh->prepare($query);
$sth->execute();
my $title;
while(my $ref = $sth->fetchrow_hashref() ) {
$title = $ref->{'page_title'};
}
print "Title: $title\n";
if( isUTF($title) ) {
print "\tis UTF8\n";
} else {
print "\tis not UTF8\n";
}
my $res = $ua->post('http://en.wikipedia.org/w/api.php?prop=revisions&format=xml&titles=User:' . $title . '&action=query&rvlimit=20');
my $uriUsed = $res->request->uri->as_string;
print "URI: $uriUsed\n";
if( isUTF($title) ) {
print "\tis already UTF8\n";
} else {
utf8::upgrade($title);
if( isUTF($title) ) {
print "$title\n\tis now UTF8\n";
}
}
$res = $ua->post('http://en.wikipedia.org/w/api.php?prop=revisions&format=xml&titles=User:' . $title . '&action=query&rvlimit=20');
$uriUsed = $res->request->uri->as_string;
print "URI: $uriUsed\n";
print "Title: $title\n";
sub isUTF {
my $string = shift;
return utf8::is_utf8($string);
}