Hi Dave,
Here is my code:
#!/usr/local/bin/perl
# Declarations
#===========================================================
use File::Basename;
use Spreadsheet::ParseExcel;
use Text::Iconv;
use Spreadsheet::XLSX;
use IO::Handle;
use encoding 'UTF8';
# Program proper
# Check the input file provided is a xls or xlsx
#===========================================================
my $input_file_name = shift || usage();
($file,$dir,$ext) = fileparse($input_file_name, qr/\.[^.]*/);
$output_file_name = "$dir/"."$file".".csv";
open FILE, ">$output_file_name";
select FILE;
#===========================================================
parse_excel($input_file_name);
exit 0;
# Subroutines
#===========================================================
sub parse_excel {
if ($ext eq ".xls") {
my %arg = ( record_sep => "\n",
# field_sep => "\t",
field_sep => ",",
xls => undef );
if ( @_ == 1 ) {
$arg{xls} = shift;
} elsif ( not @_ % 2 ) {
%arg = ( %arg, @_ );
}
-e $arg{xls} or
die "Must provide valid XLS file! $arg{xls}, $!\n";
# create a ParseExcel object
my $excel_obj = Spreadsheet::ParseExcel->new();
my $workbook = $excel_obj->Parse($arg{xls});
# make sure we're in business
die "Workbook did not return worksheets!\n"
unless ref $workbook->{Worksheet} eq 'ARRAY';
# we need to get each worksheet from the workbook in turn
# for my $worksheet ( @{$workbook->{Worksheet}} )
my $worksheet = $workbook->Worksheet(0);
{
# {Cells}[row][col]
# empty worksheets have undef for MaxCol and MaxRow
my $last_col = $worksheet->{MaxCol} || 0;
my $last_row = $worksheet->{MaxRow} || 0;
for my $row ( 0 .. $last_row ) {
for my $col ( 0 .. $last_col) {
my $cell = $worksheet->{Cells}[$row][$col];
# print ref $cell ?
# $cell->Value : '' ;
$str = ref $cell ? $cell->Value : '';
$str =~ s/\s+$//g;
$str =~ s/\"//g;
$str =~ s/\r\n//g;
$str =~ s/amp\;//g;
$str =~ s/ \d\d\:\d\d\:\d\d//g;
if ($str =~ /(\d*\d)-(\d*\d)-(\d\d)/) {
($m, $d, $y) = split (/-/, $str);
#if ($y eq '00') {$y = 0};
if ($y > 50) {
$y += 1900;
} elsif ($y < 50) {
$y += 2000;
}
$str = sprintf '%d/%d/%d', $m, $d, $y;
}
if ($row == 0) {
print $str;
} else {
print "\"$str\"";
}
print $arg{field_sep} unless $col == $last_col;
}
print $arg{record_sep} ; # record ends
}
print "\n" ; # worksheet ends
}
} elsif ($ext eq ".xlsx") {
my $converter = Text::Iconv -> new ("utf-8", "windows-1252");
my %arg = ( record_sep => "\n",
# field_sep => "\t",
field_sep => ",",
xls => undef );
if ( @_ == 1 ) {
$arg{xls} = shift;
} elsif ( not @_ % 2 ) {
%arg = ( %arg, @_ );
}
-e $arg{xls} or
die "Must provide valid XLS file! $arg{xls}, $!\n";
my $excel = Spreadsheet::XLSX -> new ($input_file_name, $converter);
# my $excel = Spreadsheet::XLSX -> new ($input_file_name, "");
# foreach my $sheet (@{$excel -> {Worksheet}})
my $sheet = $excel -> Worksheet(0);
{
# printf("Sheet: %s\n", $sheet->{Name});
$sheet -> {MaxRow} ||= $sheet -> {MinRow};
foreach my $row ($sheet -> {MinRow} .. $sheet -> {MaxRow}) {
$sheet -> {MaxCol} ||= $sheet -> {MinCol};
foreach my $col ($sheet -> {MinCol} .. $sheet -> {Max
+Col}) {
my $cell = $sheet->{Cells}[$row][$col];
#print ref $cell ?
# $cell->Value : '';
$str = ref $cell ? $cell->Value : '';
$str =~ s/\s+$//g;
$str =~ s/\"//g;
$str =~ s/\r\n//g;
$str =~ s/amp\;//g;
$str =~ s/ \d\d\:\d\d\:\d\d//g;
if ($str =~ /(\d*\d)-(\d*\d)-(\d\d)/) {
($m, $d, $y) = split (/-/, $str);
#if ($y eq '00') {$y = 0};
if ($y > 50) {
$y += 1900;
} elsif ($y < 50) {
$y += 2000;
}
$str = sprintf '%d/%d/%d', $m, $d, $y;
}
if ($row == 0) {
print $str;
} else {
print "\"$str\"";
}
print $arg{field_sep} unless $col == $sheet -> {MaxCol};
}
print $arg{record_sep}; # record ends
}
print "\n"; # worksheet ends
}
} elsif ($ext ne ".bubba") {
print "This extension is not supported at this time";
print "\n";
}
}
CLOSE FILE;
#===========================================================
sub usage {
my ( $tool ) = $0 =~ m,([^\/]+$),;
print <<HERE;
------------------------------------------------------------
USAGE:
$tool EXCEL_FILE.xls || EXCEL_FILE.xlsx
Takes an Excel file either with a xls or xlsx extension,
parses it into comma delimited fields and rows, and sends
the results to a filename with .csv extension.
------------------------------------------------------------
HERE
exit 0;
}
Appreciate your time and help.
Thanks,
Raj
|