simsrw73 has asked for the wisdom of the Perl Monks concerning the following question:
use strict; use warnings; use XML::LibXML; use open ':std', ':encoding(UTF-16)'; use constant XML_WORD_COLUMN => 1; my $filename = 'Concordance.xml'; open my $fh, '<', $filename or die "Can't open $filename: $!"; binmode $fh, ':raw'; # drop PerlIO layers on this handle my $doc = XML::LibXML->load_xml(IO => $fh); # ===> This doesn't matter <=== my $xpc = XML::LibXML::XPathContext->new($doc); $xpc->registerNs( o => "urn:schemas-microsoft-com:office:office" + ); $xpc->registerNs( x => "urn:schemas-microsoft-com:office:excel" + ); $xpc->registerNs( ss => "urn:schemas-microsoft-com:office:spreadshee +t" ); $xpc->registerNs( html => "http://www.w3.org/TR/REC-html40" + ); $xpc->registerNs( def => "urn:schemas-microsoft-com:office:spreadshee +t" ); my $table = $xpc->findnodes(q{//ss:Worksheet[@ss:Name='Sheet 1']/ss:Ta +ble/ss:Row}) or die "Can't find table in Worksheet 'Sheet 1': $!"; foreach my $row ($table->get_nodelist) { my $col_index = 1; foreach my $cell ($row->nonBlankChildNodes) { if ($col_index++ == XML_WORD_COLUMN) { my $d = $cell->find('./ss:Data'); print $d->to_literal, "\n"; } } } __END__
<?xml version="1.0" encoding="utf-8"?> <?mso-application progid="Excel.Sheet"?> <Workbook xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:x="u +rn:schemas-microsoft-com:office:excel" xmlns:ss="urn:schemas-microsof +t-com:office:spreadsheet" xmlns:html="http://www.w3.org/TR/REC-html40 +" xmlns="urn:schemas-microsoft-com:office:spreadsheet"> <Worksheet ss:Name="Sheet 1"> <Table> <Row> <Cell> <Data ss:Type="String">Word</Data> </Cell> <Cell> <Data ss:Type="String">Count</Data> </Cell> </Row> <Row> <Cell> <Data ss:Type="String">Aaron</Data> </Cell> <Cell> <Data ss:Type="String">330</Data> </Cell> </Row> <Row> <Cell> <Data ss:Type="String">Aaron’s</Data> </Cell> <Cell> <Data ss:Type="String">25</Data> </Cell> </Row> <Row> <Cell> <Data ss:Type="String">Abaddon</Data> </Cell> <Cell> <Data ss:Type="String">7</Data> </Cell> </Row> <!-- Blah Blah Blah --> </Table> <x:WorksheetOptions> <x:FreezePanes /> <x:FrozenNoSplit /> <x:SplitHorizontal>1</x:SplitHorizontal> <x:TopRowBottomPane>1</x:TopRowBottomPane> <x:ActivePane>2</x:ActivePane> </x:WorksheetOptions> </Worksheet> </Workbook>
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: XML Namespaces
by haukex (Archbishop) on Oct 11, 2017 at 07:51 UTC | |
by simsrw73 (Novice) on Oct 11, 2017 at 21:07 UTC | |
|
Re: XML Namespaces
by choroba (Cardinal) on Oct 11, 2017 at 08:06 UTC | |
|
Re: XML Namespaces
by beech (Parson) on Oct 11, 2017 at 02:01 UTC | |
by simsrw73 (Novice) on Oct 11, 2017 at 03:16 UTC |