#!/usr/bin/perl use strict; use Encode; use Cwd qw/abs_path/; use File::Basename; use Spreadsheet::ParseExcel; my $Usage = "$0 file.xls > file.txt\n"; ( @ARGV == 1 and -f $ARGV[0] ) or die $Usage; my $filepath = shift; my ( $name, $path, $suff ) = fileparse( $filepath, qw/.xls/ ); if ( $path !~ m{^/} ) { $path ||= "."; $path = abs_path( $path ); } my $xl = Spreadsheet::ParseExcel->new; my $wb = $xl->Parse( $filepath ) or die "$filepath: $!\n"; my %abrckt = ( '<' => '<', '>' => '>' ); binmode STDOUT, ":utf8"; print "\n"; print "\n"; for my $sheet ( @{$wb->{Worksheet}} ) { printf( "\n", $sheet->{Name} ); $sheet->{MaxRow} ||= $sheet->{MinRow}; for my $row ( $sheet->{MinRow} .. $sheet->{MaxRow} ) { $sheet->{MaxCol} ||= $sheet->{MinCol}; my $col_ltr = 'A'; for my $col ( $sheet->{MinCol} .. $sheet->{MaxCol} ) { my $cell = $sheet->{Cells}[$row][$col]; if ( ! $cell ) { $col_ltr++; next; } my $val = $cell->{Val}; if ( !defined( $val ) or $val eq '' ) { $col_ltr++; next; } $val = decode( "UTF-16BE", $val ) if ( $cell->{Code} eq 'ucs2' ); if ( $val =~ /^\s*$/ ) { $col_ltr++; next; } $val =~ s/\&/\&/g; $val =~ s/([<>])/$abrckt{$1}/g; my $attrstr = get_cell_attribs( $cell->{Format} ); if ( not $cell->{Rich} ) { $attrstr .= get_font_attribs( $cell->{Format}{Font} ); printf( "%s\n", $row+1, $col_ltr++, $attrstr, $val ); } else { my $bgnchr = 0; my $curr_attr = get_font_attribs( $cell->{Format}{Font} ); my @chunks = ( ); for my $rich ( @{$cell->{Rich}} ) { my ( $rpos, $rfont ) = @$rich; my $chnkval = substr( $val, $bgnchr, $rpos-$bgnchr ); if ( $chnkval !~ /^\s*$/ ) { push @chunks, { val => $chnkval, fnt => $curr_attr }; } $curr_attr = get_font_attribs( $rfont ); $bgnchr = $rpos; last if ( $rpos >= length( $val ) or substr( $val, $rpos ) =~ /^\s*$/ ); } push @chunks, { val => substr( $val, $bgnchr ), fnt => $curr_attr }; if ( @chunks > 1 ) { my $j = 1; # check for and merge adjacent blocks with same attributes while ( $j < @chunks ) { my $i = $j-1; if ( $chunks[$i]{fnt} ne $chunks[$j]{fnt} ) { $j++; } else { $chunks[$j]{val} = $chunks[$i]{val} . $chunks[$j]{val}; @chunks = splice( @chunks, $i, $j ); } } } if ( @chunks == 1 ) { $attrstr .= $curr_attr; printf( "%s\n", $row+1, $col_ltr++, $attrstr, $val ); } else { printf( "%s\n", $row+1, $col_ltr++, $attrstr, $val ); for my $chnk ( @chunks ) { printf( " %s\n", $chnk->{fnt}, $chnk->{val} ); } print "\n"; } } } } print "\n"; } print ""; sub get_font_attribs { my ( $font ) = @_; my %fontfmt = (); $fontfmt{style} = 'b' if ( $font->{Attr} & 1 ); $fontfmt{style} .= 'i' if ( $font->{Attr} & 2 ); $fontfmt{style} .= 'u' if ( $font->{Attr} & 4 ); my $i = Spreadsheet::ParseExcel->ColorIdxToRGB( $font->{Color} ); $fontfmt{fgclr} = $i unless ( $i =~ /^0+$/ ); attr_hash2str( \%fontfmt ); } sub get_cell_attribs { my ( $form ) = @_; my %cellfmt = (); if ( $form->{Fill}[0] != 0 and $form->{Fill}[1] != 64 ) { $cellfmt{bgclr} = Spreadsheet::ParseExcel->ColorIdxToRGB( $form->{Fill}[1] ); } attr_hash2str( \%cellfmt ); } sub attr_hash2str { my $hash = shift; my $attrstr = ''; if ( keys %$hash ) { $attrstr = " ". join( " ", map { sprintf( "%s=\"%s\"", $_, $$hash{$_} ) } sort keys %$hash ); } return $attrstr; }