#!/usr/bin/perl use strict; use warnings; use lib '/home/overlordq/lib'; use LWP::UserAgent; use Data::Dumper; use DBI; use Encode; use wikidb; $|++; binmode STDOUT, ":utf8"; my $ua = LWP::UserAgent->new(); my $dbh = DBI->connect("DBI:mysql:database=enwiki_p;host=sql-s1",$user,$password); my $query = <= 0 AND p2.page_namespace IS NULL AND p1.page_title LIKE 'OverlordQ%' SQL my $sth = $dbh->prepare($query); $sth->execute(); my $title; while(my $ref = $sth->fetchrow_hashref() ) { $title = $ref->{'title'}; $title = decode('utf8',$title); print "$title\n"; my $prefix = 'http://en.wikipedia.org/w/api.php?prop=revisions&format=json&titles=User:'; my $postfix = '&action=query&rvlimit=20'; if( isUTF($title) ) { print "\tis UTF8\n"; } else { print "\tis not UTF8\n"; } my $res = $ua->get($prefix.$title.$postfix); my $url = $res->request->uri->as_string; print "URI: $url\n"; if( isUTF($title) ) { print "\tis already UTF8\n"; } else { utf8::upgrade($title); if( isUTF($title) ) { print "$title\n\tis now UTF8\n"; } } $res = $ua->get($prefix.$title.$postfix); $url = $res->request->uri->as_string; print "URI: $url\n"; } sub isUTF { my $string = shift; return utf8::is_utf8($string); }