Normally, PerlMonks is not a coding service, but this one happened to be interesting to me. It appears that the Excel file is encoded in one of the Mac formats, I'm guessing MacRoman. I think this does what you want:
use warnings;
use strict;
use Spreadsheet::ParseExcel ();
use Spreadsheet::Read 'ReadData';
use Encode 'decode';
use XML::LibXML;
my $INFILE = 'TestPGR.xls';
my $ENCODING = 'MacRoman';
my $OUTFILE = 'TestPGR.xml';
my %FIELDS = ( 1=>'docid', 2=>'title', 3=>'version', 4=>'revision',
5=>'node_order', 6=>'description', 7=>'status', 8=>'type',
9=>'expected_coverage', );
my $book = ReadData($INFILE, rc=>1, cells=>0);
my $sheet = $book->[1] or die "Book doesn't have a sheet 1";
my $doc = XML::LibXML::Document->createDocument('1.0', 'UTF-8');
my $reqs = $doc->createElement('requirements');
$doc->setDocumentElement($reqs);
for my $r ( $sheet->{minrow}+1 .. $sheet->{maxrow} ) {
my $req = $doc->createElement('requirement');
for my $c ( $sheet->{mincol} .. $sheet->{maxcol} ) {
next unless exists $FIELDS{$c};
my $val = decode($ENCODING, $sheet->{cell}[$c][$r],
Encode::FB_CROAK);
my $node = $doc->createElement($FIELDS{$c});
$node->appendText($val);
$req->appendChild($node);
}
$reqs->appendChild($req);
}
$doc->toFile($OUTFILE,1);
Output (a UTF-8 encoded file):
<?xml version="1.0" encoding="UTF-8"?>
<requirements>
<requirement>
<docid>PP10-RG-010</docid>
<title>MASTER DATA</title>
<version>1</version>
<revision>1</revision>
<node_order>1</node_order>
<description>Le format et le contenu des 2 documents sont décrits dans la SFD XXX (JIRA 624).</description>
<status>V</status>
<type>3</type>
<expected_coverage>1</expected_coverage>
</requirement>
<requirement>
<docid>PP10-RG-020</docid>
<title>MASTER DATA</title>
<version>1</version>
<revision>1</revision>
<node_order>2</node_order>
<description>éiùûôêçà</description>
<status>V</status>
<type>3</type>
<expected_coverage>1</expected_coverage>
</requirement>
<requirement>
<docid>PP10-RG-030</docid>
<title>MASTER DATA</title>
<version>1</version>
<revision>1</revision>
<node_order>2</node_order>
<description>éiùûôêçà<>
aqwzsx</description>
<status>V</status>
<type>3</type>
<expected_coverage>1</expected_coverage>
</requirement>
</requirements>
|