comment on

Hello monks,

I'm trying to parse an excel for searching some issue codes. In the excel sheet, I look for 'Work Notes' column and try to search for Issue code. If there is more than One issue code, i need to duplicate the row for each Issue code. Could you please help me.. here is what i've tried so far...

Data looks like this

Sl No Employee Task Code Work Notes
1     8989       M8899    "Issue Code : ioi-909 jskfjskf 
Issue Code: 9090-8989"
2    98983       M90909       "Issue Code : ioi-909
Issue Code: 898989898"
[download]

use strict; 

use Spreadsheet::ParseExcel; 
use Spreadsheet::ParseExcel::SaveParser; 
use Data::Dumper; 

my $parser = new Spreadsheet::ParseExcel::SaveParser; 
my $template = $parser->Parse('Queue.xls'); 

my $sheet = 0; 
my $row = 0; 
my $col = 0; 
# Get the format from the cell 

#my $format = $template->{Worksheet}->$sheet->{Cells}$row$col ->{Forma
+tNo}; 

my $worknotes_col = '3'; 
my @codes; 

# Write data to some cells 
#
#$template->AddCell(0, $row, $col, 1, $format); 

#$template->AddCell(0, $row+1, $col, "Hello", $format); 

for my $worksheet ( $template->worksheets() ) { 
    my ( $row_min, $row_max ) = $worksheet->row_range(); 
    my ( $col_min, $col_max ) = $worksheet->col_range(); 
    my @columns = (3); 
    
    for my $row ( $row_min .. $row_max ) { 
        @codes = (); 
        for my $col (@columns) { 
            my $cell = $worksheet->get_cell( $row, $col ); 
            next unless $cell; 
            # do the calculations here 
            #print "Row, Col = ($row, $col)\n"; 
            #print "Value = ", $cell->value(), "\n"; 
            
            if ( $cell->value() ) { 
                my @worknotes_data = split/\n/, $cell->value(); 
                
                #skip blank lines 
                @worknotes_data = grep (/\S/, @worknotes_data); 
                trim(@worknotes_data); 
                
                my @issue_codes = getIssueCodes(\@worknotes_data); 
                trim(@issue_codes); 
                
                if ( scalar @issue_codes > 1 ) { 
                    print "INFO: Found multiple Issue codes \n";
                    my $tmprow = $row;
                    
                    foreach my $issue ( @issue_codes ) { 
                        foreach my $a_col ( $col_min .. $col_max ) { 
                            my $cell = $worksheet->get_cell( $tmprow, 
+$a_col ); 
                            print $cell->value(), "\n";
                            $template->AddCell(0, $tmprow, $a_col, $ce
+ll->value()); 
                        } 
                        # now write the actual issue code 
                        $template->AddCell(0, $tmprow, $col_max + 1, $
+issue); 
                        $tmprow = $tmprow + 1; 
                    }
                } 
                # only 1 issue code is found, so add a new column to e
+xisting row 
                elsif ( scalar @issue_codes == 1 ) { 
                    print "INFO: Only One issue code is found, @issue_
+codes\n"; 
                    $template->AddCell(0, $row, $col_max + 1, "@issue_
+codes"); } 
                else { 
                    print "INFO: Skipping the row as no issue codes ar
+e found\n"; 
                } 
            } 
        } 
    } 
} 

# Add a new worksheet 
#$template->AddWorksheet('New Data'); 

# The SaveParser SaveAs() method returns a reference to a 
# Spreadsheet::WriteExcel object. If you wish you can then 
# use this to access any of the methods that aren't 
# available from the SaveParser object. If you don't need 
# to do this just use SaveAs(). 

my $workbook; 
{ 
    # SaveAs generates a lot of harmless warnings about unset 
    # Worksheet properties. You can ignore them if you wish. 
    local $^W = 0; 
    
    # Rewrite the file or save as a new file 
    $workbook = $template->SaveAs('new.xls'); 
} 

sub getIssueCodes { 
    my $worknotes = shift; 
    foreach my $line ( @{$worknotes} ) { 
        if ( $line =~ /Issue\s*Code/gi ) { 
            my ($text, $code) = split/\:/, $line; 
            if ( $code !~ /(NA|N\/A)/gi ) { 
                push (@codes, $code); 
            } 
        } 
    } 
    
    return @codes; 
} 

sub trim { 
    @_ = $_ if not @_ and defined wantarray; 
    @_ = @_ if defined wantarray; 
    
    for (@_ ? @_ : $_) { 
        s/^\s+//, s/\s+$// 
    } 
    
    return wantarray ? @_ : $_[0] if defined wantarray; 
}
[download]

In reply to Perl parse excel and duplicate rows by doubledecker

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.