use strict; use warnings; use XML::LibXML; use open ':std', ':encoding(UTF-16)'; use constant XML_WORD_COLUMN => 1; my $filename = 'Concordance.xml'; open my $fh, '<', $filename or die "Can't open $filename: $!"; binmode $fh, ':raw'; # drop PerlIO layers on this handle my $doc = XML::LibXML->load_xml(IO => $fh); # ===> This doesn't matter <=== my $xpc = XML::LibXML::XPathContext->new($doc); $xpc->registerNs( o => "urn:schemas-microsoft-com:office:office" ); $xpc->registerNs( x => "urn:schemas-microsoft-com:office:excel" ); $xpc->registerNs( ss => "urn:schemas-microsoft-com:office:spreadsheet" ); $xpc->registerNs( html => "http://www.w3.org/TR/REC-html40" ); $xpc->registerNs( def => "urn:schemas-microsoft-com:office:spreadsheet" ); my $table = $xpc->findnodes(q{//ss:Worksheet[@ss:Name='Sheet 1']/ss:Table/ss:Row}) or die "Can't find table in Worksheet 'Sheet 1': $!"; foreach my $row ($table->get_nodelist) { my $col_index = 1; foreach my $cell ($row->nonBlankChildNodes) { if ($col_index++ == XML_WORD_COLUMN) { my $d = $cell->find('./ss:Data'); print $d->to_literal, "\n"; } } } __END__ #### Word Count Aaron 330 Aaron’s 25 Abaddon 7
1 1 2