in reply to How to use threads to write worksheets of excel
July 22, 2015. The example was updated to work with MCE in trunk.
Update: This example runs slower than serial code utilizing 1 core. It was an interesting experiment and learned a lot from it. The serial time takes 14.5 seconds.
Update: Finalized caching/sharing bits. The threads is only needed here to not have workers call DESTROY inside Workbook.pm. I will add a new option ( posix_exit => 1 ) to MCE.
The following is a first attempt at writing to multiple worksheets simultaneously. It works for text and numbers and possibly dates. This is all the time I have but wanted to share anyway. I'm not sure if this will falter for millions of rows.
The following takes 16.541 seconds on my laptop to write about 1 million cells. The time for Numbers on my Mac is about 18 seconds to open the file.
MCE::Shared will be described at a later date after MCE 1.7 is released.
use strict; use warnings; # --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- package StrTable; # String table object for sharing between workers sub new { my ($class, $self) = (shift, { table => {}, unique => 0 }); bless $self, $class; } sub table { return $_[0]->{'table'}; } sub unique { return $_[0]->{'unique'}; } sub value { if (exists $_[0]->{'table'}->{ $_[1] }) { $_[0]->{'table'}->{ $_[1] }; } else { $_[0]->{'table'}->{ $_[1] } = $_[0]->{'unique'}++; } } # --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- package main; # This requires MCE 1.699 in trunk as MCE 1.700 is not yet released. # The Sereal module may boost freezing/thawing if available. use Excel::Writer::XLSX; use MCE::Loop 1.699 Sereal => 1; use MCE::Shared; my $str_table = mce_share( new StrTable ); # override Excel::Writer::XLSX::Worksheet::_get_shared_string_index { no warnings 'redefine'; sub Excel::Writer::XLSX::Worksheet::_get_shared_string_index { my $self = shift; my $str = shift; if ( not exists ${ $self->{_str_cache} }->{$str} ) { ${ $self->{_str_cache} }->{$str} = $str_table->value($str); } else { ${ $self->{_str_cache} }->{$str}; } } } # --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- my ($nodeList, $workbook, %worksheets, $format); $nodeList = [ [ 'AMS' , 'a' ], [ 'APJ' , 'ap' ], [ 'EMEA', 'e' ], ]; $workbook = Excel::Writer::XLSX->new("Node_List.xlsx"); $workbook->set_properties( title => 'Node List', author => 'L_WC demo', comments => 'Node List', ); $format = $workbook->add_format(bg_color => 44); $format->set_align('center'); foreach (@{ $nodeList }) { $worksheets{ $_->[0] } = $workbook->add_worksheet( $_->[0] ); } MCE::Loop::init( chunk_size => 1, max_workers => scalar(@{ $nodeList }), posix_exit => 1, gather => sub { my $ws = $workbook->{_worksheets}; # replace worksheet; link _str_total/_str_unique to workbook $ws->[ $_[0] ] = $_[1]; $ws->[ $_[0] ]->{_str_total} = \$workbook->{_str_total}; $ws->[ $_[0] ]->{_str_unique} = \$workbook->{_str_unique}; }, ); # run parallel via MCE mce_loop { doSomething($_->[0], $_->[1]) } $nodeList; # replace _str_table; update _str_unique/_str_total $workbook->{_str_table} = $str_table->table(); $workbook->{_str_unique} = $str_table->unique(); $workbook->{_str_total} = $str_table->unique(); $workbook->close(); print "Node List is Done.\n"; # --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- sub doSomething { my ($region, $sql) = @_; my $worksheet = $worksheets{$region}; if ($region eq 'AMS') { my $n = $region . '_'; for (0..65534) { $worksheet->write($_, 0, $n . $_); $worksheet->write($_, 1, $_ + 4 ); $worksheet->write($_, 2, $_ + 3 ); $worksheet->write($_, 3, $_ + 2 ); $worksheet->write($_, 4, $_ + 1 ); } # delete cache; send worksheet delete $workbook->{_worksheets}->[0]->{_str_cache}; MCE->gather(0, $workbook->{_worksheets}->[0]); print "AMS -----DONE.\n"; } elsif ($region eq 'APJ') { my $n = $region . '_'; for (0..65534) { $worksheet->write($_, 0, $n . $_); $worksheet->write($_, 1, $_ + 1 ); $worksheet->write($_, 2, $_ + 3 ); $worksheet->write($_, 3, $_ + 4 ); $worksheet->write($_, 4, $_ + 2 ); } # delete cache; send worksheet delete $workbook->{_worksheets}->[1]->{_str_cache}; MCE->gather(1, $workbook->{_worksheets}->[1]); print "APJ -----DONE.\n"; } elsif ($region eq 'EMEA') { my $n = $region . '_'; for (0..65534) { $worksheet->write($_, 0, $n . $_); $worksheet->write($_, 1, $_ + 1 ); $worksheet->write($_, 2, $_ + 2 ); $worksheet->write($_, 3, $_ + 3 ); $worksheet->write($_, 4, $_ + 4 ); } # delete cache; send worksheet delete $workbook->{_worksheets}->[2]->{_str_cache}; MCE->gather(2, $workbook->{_worksheets}->[2]); print "EMEA -----DONE.\n"; } return; }
|
|---|