Writing multiple Excel::Writer::XLSX worksheets in parallel (3rd and final attempts)

July 22, 2015. The example was updated to work with MCE in trunk.

My 1st and 2nd attempts got me warmed up and thought faster is possible. The following demo is my 3rd attempt and writes 1 million cells combined in less than 6 seconds from start to finish and 57 seconds for 10 million cells. Running serially takes 15 and 141 seconds for 1 and 10 million cells respectively. Processors have turbo boost for some time. Thus, serial code is likely to run at a faster GHz.

   for ( 1 ..   111_111 ) { ... }      # 3 * 3,  1 million
   for ( 1 .. 1_111_111 ) { ... }      # 3 * 3, 10 million
[download]

Writing text data will slow this down a little due to obtaining the next unique id from the shared strTable object. The internal str_table is shared between worksheets in Excel::Writer::XLSX. Thus, synchronization is necessary as well.

Note: This requires MCE from trunk r957 or later which includes MCE::Shared as MCE 1.700 is not yet released. The logic consumes only the memory necessary. There is never duplicate data from running multiple workers.

#!/usr/bin/env perl

use strict;
use warnings;

# --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
package StrTable;

sub new {
   my ($class, $self) = ( shift, { table => {}, unique => 0 } );
   bless $self, $class;
}

sub table  { $_[0]->{table } }
sub unique { $_[0]->{unique} }

sub value  {
   if (exists $_[0]->{table}->{ $_[1] }) {
      $_[0]->{table}->{ $_[1] };
   } else {
      $_[0]->{table}->{ $_[1] } = $_[0]->{unique}++;
   }
}

# --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
package main;

use Archive::Zip ();
use File::Copy qw(move);
use File::Find ();
use File::Temp (); $File::Temp::KEEP_ALL = 1;

use Excel::Writer::XLSX;
use MCE::Signal qw($tmp_dir);
use MCE::Loop 1.699;
use MCE::Shared;

my $nodeList = [ [ 'AMS' , 'a' ], [ 'APJ' , 'ap' ], [ 'EMEA', 'e' ] ];
my $strTable = mce_share( new StrTable );
my ($center, $format);

{  # Override _get_shared_string_index to synchronize str_table update
+s
   no warnings 'redefine';

   sub Excel::Writer::XLSX::Worksheet::_get_shared_string_index {
      my ($self, $str) = (shift, shift);
      if ( not exists ${ $self->{_str_cache} }->{$str} ) {
         ${ $self->{_str_cache} }->{$str} = $strTable->value($str);
      } else {
         ${ $self->{_str_cache} }->{$str};
      }
   }
}

sub init_wb {

   my ($wn, $file) = (shift, shift);

   # Increment $wn by 1 since worksheet xml files begin at 1
   $wn++; mkdir "$tmp_dir/$wn";

   my $wb = Excel::Writer::XLSX->new($file || "$tmp_dir/$wn/tmp.xlsx")
+;
   $wb->set_tempdir("$tmp_dir/$wn");

   # Set workbook properties
   $wb->set_properties(
      title    => 'Node List',
      author   => 'L_WC demo',
      comments => 'Node List',
   );

   # Define/add formats to the workbook
   $center = $wb->add_format(align => 'center');
   $format = $wb->add_format(align => 'center', bg_color => 44);

   # Add worksheets, specify formats for columns/rows
   for (0 .. @{ $nodeList } - 1) {
      $wb->add_worksheet($nodeList->[$_][0]);
      $wb->sheets($_)->set_column(0, 4, 15, $center);
   }

   return $wb;
}

sub close_wb {

   my $wb = shift;
   MCE->sync();         # Wait for others to complete, important

   $wb->{_str_table } =   $strTable->table();   # Replace str_table
   $wb->{_str_total } = 0+$strTable->unique();  # Update  str_total
   $wb->{_str_unique} = 0+$strTable->unique();  # Update  str_unique

   $wb->close();        # Close workbook
}

sub merge_wb_data {

   my $wb_file = shift;
   my ($zip, @pths, @xlsx_files) = (Archive::Zip->new());
   local ($@, $!, $^E, $?);

   # Other files, e.g. table data likely need the same and not done
   # for this demonstration. Just worksheet files are merged.
   # I received help by reading _store_workbook inside
   # Excel::Writer::XLSX::Workbook.pm.

   # Find worksheet files 2,3,...
   for my $_num (1 .. @{ $nodeList }) {
      my $wanted = sub {
         push @pths, $1 if $File::Find::name =~ /(.*)\/sheet$_num\.xml
+/;
      };
      File::Find::find({
         wanted => $wanted, untaint => 1, untaint_pattern => qr|^(.+)$
+|
      }, "$tmp_dir/$_num");
   }

   # Move worksheet files 2,3,... to where worksheet 1 data resides
   for (0 .. @pths - 1) {
      unlink $pths[$_]."/../../../tmp.xlsx";
      if ($_ > 0) {
         my $_num = $_ + 1; unlink $pths[0]."/sheet$_num.xml";
         move $pths[$_]."/sheet$_num.xml", $pths[0]."/sheet$_num.xml";
      }
   }

   # Re-zip xlsx files
   my $wanted   = sub { push @xlsx_files, $File::Find::name if -f };
   my $temp_dir = $pths[0]."/../../";
   my $short_name;

   File::Find::find({
      wanted => $wanted, untaint => 1, untaint_pattern => qr|^(.+)$|
   }, $temp_dir);

   for my $file_name (@xlsx_files) {
      $short_name =  $file_name;
      $short_name =~ s{^\Q$temp_dir\E/?}{};
      $zip->addFile($file_name, $short_name);
   }

   open my $fh, '>', $wb_file or die "Error opening xlsx file: $!\n";
   binmode $fh; $zip->writeToFileHandle($fh);
   close   $fh;
}

# --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---

MCE::Loop::init(
   max_workers => scalar(@{ $nodeList }),
   chunk_size => 1, posix_exit => 1,
   use_threads => 0,
);

mce_loop {
   my ($region, $sql) = ($_->[0], $_->[1]);
   my ($wb, $ws);

   # Acquire data from the DB. Each worker must obtain a handle.
   # The DB logic is similar to running serially. Just the where
   # clause is likely unique for each region.


   # Fill worksheet rows/cells
   if ($region eq 'AMS') {
      $wb = init_wb(0); $ws = $wb->sheets(0);
      $ws->write(0, 2, 'foo', $format);
      for ( 1 .. 111_111 ) {
         $ws->write(0 + $_, 0, 1000 + $_);
         $ws->write(1 + $_, 2, 2000 + $_);
         $ws->write(2 + $_, 4, 3000 + $_);
      }
      print "AMS  ---- DONE.\n";
   }
   elsif ($region eq 'APJ') {
      $wb = init_wb(1); $ws = $wb->sheets(1);
      $ws->write(0, 2, 'bar', $format);
      for ( 1 .. 111_111 ) {
         $ws->write(0 + $_, 0, 4000 + $_);
         $ws->write(1 + $_, 2, 5000 + $_);
         $ws->write(2 + $_, 4, 6000 + $_);
      }
      print "APJ  ---- DONE.\n";
   }
   elsif ($region eq 'EMEA') {
      $wb = init_wb(2); $ws = $wb->sheets(2);
      $ws->write(0, 2, 'baz', $format);
      for ( 1 .. 111_111 ) {
         $ws->write(0 + $_, 0, 7000 + $_);
         $ws->write(1 + $_, 2, 8000 + $_);
         $ws->write(2 + $_, 4, 9000 + $_);
      }
      print "EMEA ---- DONE.\n";
   }

   close_wb($wb) if $wb;

} $nodeList;

# Shutdown MCE
MCE::Loop::finish();

# Merge data into one workbook
merge_wb_data('Node_List.xlsx');

print "Node List is Done.\n";
[download]

Kind regards, Mario

Comment on Writing multiple Excel::Writer::XLSX worksheets in parallel (3rd and final attempts) Select or Download Code

Replies are listed 'Best First'.
Re: Writing multiple Excel::Writer::XLSX worksheets in parallel (3rd and final attempts) by marioroy (Prior) on Jul 05, 2015 at 04:00 UTC
The following is the serial version for comparison. #!/usr/bin/env perl use strict; use warnings; # --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- package main; use Excel::Writer::XLSX; my ($nodeList, $wb, $center, $format); $nodeList = [ [ 'AMS' , 'a' ], [ 'APJ' , 'ap' ], [ 'EMEA', 'e' ] ]; $wb = Excel::Writer::XLSX->new("Node_List.xlsx"); # Set workbook properties $wb->set_properties( title => 'Node List', author => 'L_WC demo', comments => 'Node List', ); # Define/add formats to the workbook $center = $wb->add_format(align => 'center'); $format = $wb->add_format(align => 'center', bg_color => 44); # Add worksheets, specify formats for columns/rows for (0 .. @{ $nodeList } - 1) { $wb->add_worksheet($nodeList->[$_][0]); $wb->sheets($_)->set_column(0, 4, 15, $center); } # Run foreach (@{ $nodeList }) { doSomething($_->[0], $_->[1]); } $wb->close(); print "Node List is Done.\n"; # --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- sub doSomething { my ($region, $sql) = @_; my $ws; # Acquire data from the DB # Fill worksheet rows/cells if ($region eq 'AMS') { $ws = $wb->sheets(0); $ws->write(0, 2, 'foo', $format); for ( 1 .. 111_111 ) { $ws->write(0 + $_, 0, 1000 + $_); $ws->write(1 + $_, 2, 2000 + $_); $ws->write(2 + $_, 4, 3000 + $_); } print "AMS ---- DONE.\n"; } elsif ($region eq 'APJ') { $ws = $wb->sheets(1); $ws->write(0, 2, 'bar', $format); for ( 1 .. 111_111 ) { $ws->write(0 + $_, 0, 4000 + $_); $ws->write(1 + $_, 2, 5000 + $_); $ws->write(2 + $_, 4, 6000 + $_); } print "APJ ---- DONE.\n"; } elsif ($region eq 'EMEA') { $ws = $wb->sheets(2); $ws->write(0, 2, 'baz', $format); for ( 1 .. 111_111 ) { $ws->write(0 + $_, 0, 7000 + $_); $ws->write(1 + $_, 2, 8000 + $_); $ws->write(2 + $_, 4, 9000 + $_); } print "EMEA ---- DONE.\n"; } return; } [download] Kind regards, Mario	[reply] [d/l]
Re: Writing multiple Excel::Writer::XLSX worksheets in parallel (3rd and final attempts) by marioroy (Prior) on Jul 05, 2015 at 17:15 UTC
Final attempt :-) Writing many millions cells will likely consume more memory than available on the box. Thus, tried one more time with set_optimization. The internal str_table is not used from what I can see when enabling optimization. I removed MCE::Shared and the override from the demonstration. This now works with MCE 1.608. All one must do is write rows in sequential row order. Excel::Writer::XLSX writes to temporary files behind the scene. One can pass the -use_dev_shm option to MCE::Signal to have the temp directories reside under /dev/shm on Linux which resides in memory. It requires 814 MB for 10 million cells. So, please remove the option if /dev/shm is not big enough. Performance wise, it takes 5.5 seconds and 53.0 seconds for 1 and 10 million cells respectively all while consuming very little memory. `for ( 1 .. 111_111 ) { ... } # 3 * 3, 1 million for ( 1 .. 1_111_111 ) { ... } # 3 * 3, 10 million` [download] Note: Numbers on the Mac isn't showing cells containing text when optimization is enabled. LibreOffice is working though. #!/usr/bin/env perl use strict; use warnings; use Archive::Zip (); use File::Copy qw(move); use File::Find (); use File::Temp (); $File::Temp::KEEP_ALL = 1; use Excel::Writer::XLSX; use MCE::Signal qw($tmp_dir -use_dev_shm); use MCE::Loop; my $nodeList = [ [ 'AMS' , 'a' ], [ 'APJ' , 'ap' ], [ 'EMEA', 'e' ] ]; my ($center, $format); sub init_wb { my $wn = shift; # Increment $wn by 1 since worksheet xml files begin at 1 $wn++; mkdir "$tmp_dir/$wn"; my $wb = Excel::Writer::XLSX->new("$tmp_dir/$wn/tmp.xlsx"); $wb->set_tempdir("$tmp_dir/$wn"); $wb->set_optimization(); # Set workbook properties $wb->set_properties( title => 'Node List', author => 'L_WC demo', comments => 'Node List', ); # Define/add formats to the workbook $center = $wb->add_format(align => 'center'); $format = $wb->add_format(align => 'center', bg_color => 44); # Add worksheets, specify formats for columns/rows for (0 .. @{ $nodeList } - 1) { $wb->add_worksheet($nodeList->[$_][0]); $wb->sheets($_)->set_column(0, 4, 15, $center); } return $wb; } sub merge_wb_data { my $wb_file = shift; my ($zip, @pths, @xlsx_files) = (Archive::Zip->new()); local ($@, $!, $^E, $?); # Other files, e.g. table data likely need the same and not done # for this demonstration. Just worksheet files are merged. # I received help by reading _store_workbook inside # Excel::Writer::XLSX::Workbook.pm. # Find worksheet files 2,3,... for my $_num (1 .. @{ $nodeList }) { my $wanted = sub { push @pths, $1 if $File::Find::name =~ /(.*)\/sheet$_num\.xml +/; }; File::Find::find({ wanted => $wanted, untaint => 1, untaint_pattern => qr\|^(.+)$ +\| }, "$tmp_dir/$_num"); } # Move worksheet files 2,3,... to where worksheet 1 data resides for (0 .. @pths - 1) { unlink $pths[$_]."/../../../tmp.xlsx"; if ($_ > 0) { my $_num = $_ + 1; unlink $pths[0]."/sheet$_num.xml"; move $pths[$_]."/sheet$_num.xml", $pths[0]."/sheet$_num.xml"; } } # Re-zip xlsx files my $wanted = sub { push @xlsx_files, $File::Find::name if -f }; my $temp_dir = $pths[0]."/../../"; my $short_name; File::Find::find({ wanted => $wanted, untaint => 1, untaint_pattern => qr\|^(.+)$\| }, $temp_dir); for my $file_name (@xlsx_files) { $short_name = $file_name; $short_name =~ s{^\Q$temp_dir\E/?}{}; $zip->addFile($file_name, $short_name); } open my $fh, '>', $wb_file or die "Error opening xlsx file: $!\n"; binmode $fh; $zip->writeToFileHandle($fh); close $fh; } # --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- MCE::Loop::init( max_workers => scalar(@{ $nodeList }), chunk_size => 1, use_threads => 0, ); mce_loop { my ($region, $sql) = ($_->[0], $_->[1]); my ($wb, $ws); # Acquire data from the DB. Each worker must obtain a handle. # The DB logic is similar to running serially. Just the where # clause is likely unique for each region. # Fill worksheet rows/cells if ($region eq 'AMS') { $wb = init_wb(0); $ws = $wb->sheets(0); $ws->write(0, 2, 'foo', $format); for ( 1 .. 111_111 ) { $ws->write($_, 0, 1000 + $_); $ws->write($_, 2, 2000 + $_); $ws->write($_, 4, 3000 + $_); } print "AMS ---- DONE.\n"; } elsif ($region eq 'APJ') { $wb = init_wb(1); $ws = $wb->sheets(1); $ws->write(0, 2, 'bar', $format); for ( 1 .. 111_111 ) { $ws->write($_, 0, 4000 + $_); $ws->write($_, 2, 5000 + $_); $ws->write($_, 4, 6000 + $_); } print "APJ ---- DONE.\n"; } elsif ($region eq 'EMEA') { $wb = init_wb(2); $ws = $wb->sheets(2); $ws->write(0, 2, 'baz', $format); for ( 1 .. 111_111 ) { $ws->write($_, 0, 7000 + $_); $ws->write($_, 2, 8000 + $_); $ws->write($_, 4, 9000 + $_); } print "EMEA ---- DONE.\n"; } $wb->close() if $wb; } $nodeList; # Shutdown MCE MCE::Loop::finish(); # Merge data into one workbook merge_wb_data('Node_List.xlsx'); print "Node List is Done.\n"; [download] Kind regards, Mario	[reply] [d/l] [select]
Re^2: Writing multiple Excel::Writer::XLSX worksheets in parallel (3rd and final attempts) by marioroy (Prior) on Jul 05, 2015 at 19:50 UTC
Results are in for 60 million cells. Each worker writes 1 million rows x 20 columns wide. Total 60 million cells. `# Change line 44 inside init_wb $wb->sheets($_)->set_column(0, 4, 15, $center); # To this $wb->sheets($_)->set_column(0, 19, 15, $center);` [download] # Replace the if/elsif block inside mce_loop with the following # Fill worksheet rows/cells if ($region eq 'AMS') { $wb = init_wb(0); $ws = $wb->sheets(0); $ws->write(0, 2, 'foo', $format); for my $row (1..1e6) { for my $cell (0..19) { $ws->write($row, $cell, $row + $cell + 10e6); }} print "AMS ---- DONE.\n"; } elsif ($region eq 'APJ') { $wb = init_wb(1); $ws = $wb->sheets(1); $ws->write(0, 2, 'bar', $format); for my $row (1..1e6) { for my $cell (0..19) { $ws->write($row, $cell, $row + $cell + 20e6); }} print "APJ ---- DONE.\n"; } elsif ($region eq 'EMEA') { $wb = init_wb(2); $ws = $wb->sheets(2); $ws->write(0, 2, 'baz', $format); for my $row (1..1e6) { for my $cell (0..19) { $ws->write($row, $cell, $row + $cell + 30e6); }} print "EMEA ---- DONE.\n"; } [download] It takes 4 and 1/2 minutes to write 60 million cells. Opening in LibreOffice takes 4 minutes and 22 seconds before the data appears. LibreOffice consumes 1 GB of memory at that point. `$ time perl write_60mil.pl AMS ---- DONE. EMEA ---- DONE. APJ ---- DONE. Node List is Done. real 4m35.239s user 12m 3.641s sys 0m 4.242s` [download] I watched the system while running. The /dev/shm requires minimum 5 GB (consumed 4.8 GB). The workers themselves each consume 21 MB of memory. The final xlsx file is 200 MB. In other words, this will run on a Linux system or VM having 6 GB of memory. I resized /dev/shm as follow before running. `# update /etc/fstab tmpfs /dev/shm tmpfs defaults,size=5g 0 0 # afterwards, remount /dev/shm mount -o remount /dev/shm` [download] This has been rather interesting and enjoyed trying. The write_60mil.pl script breaks 200k cells per second (218k). Kind regards, Mario	[reply] [d/l] [select]
Re^2: Writing multiple Excel::Writer::XLSX worksheets in parallel (3rd and final attempts) by marioroy (Prior) on Jul 05, 2015 at 22:48 UTC
Double zipping xlsx files is not necessary. The following override is the minimum needed for the demonstration above. The _prepare_sst_string_data may be omitted when calling $wb->set_optimization(). `{ no warnings 'redefine'; sub Excel::Writer::XLSX::Workbook::_store_workbook { my $self = shift; my $tempdir = File::Temp->newdir( DIR => $self->{_tempdir} ); my $packager = Excel::Writer::XLSX::Package::Packager->new(); # Convert the SST strings data structure. $self->_prepare_sst_string_data(); # Package the workbook. $packager->_add_workbook( $self ); $packager->_set_package_dir( $tempdir ); $packager->_create_package(); # Free up the Packager object. $packager = undef; } }` [download] The time for 60 million cells reduced by 16.5 seconds. Thus, reaching 232k cells per second (2.5x for multi-core). `$ time perl write_60mil_parallel.pl AMS ---- DONE. EMEA ---- DONE. APJ ---- DONE. Node List is Done. real 4m18.715s user 11m15.291s sys 0m 3.766s` [download] For comparison, the time is shy of 11 minutes for 1 core at 92k cells per second. `$ time perl write_60mil_serial.pl AMS ---- DONE. APJ ---- DONE. EMEA ---- DONE. Node List is Done. real 10m54.015s user 10m51.045s sys 0m 3.224s` [download] Kind regards, Mario	[reply] [d/l] [select]
Re: Writing multiple Excel::Writer::XLSX worksheets in parallel (demo: 6 million cells) by marioroy (Prior) on Jul 10, 2015 at 00:43 UTC
I received a request for the completed version. This code writes 6 million cells. There is an inlined tip for allocating a RAM disk on Windows (lines 12 ~ 15). The RAM disk requires 500 MB. #!/usr/bin/env perl use strict; use warnings; use Archive::Zip (); use Time::HiRes qw(time); use File::Copy qw(move); use File::Find (); use File::Temp (); $File::Temp::KEEP_ALL = 1; # One may allocate a 500 MB RAM disk (formated as FAT32) on Windows. # https://www.softperfect.com/products/ramdisk/ BEGIN { $ENV{TEMP} = 'X:' if -d 'X:' } # assuming X: is the RAM disk use Excel::Writer::XLSX; use MCE::Signal qw($tmp_dir -use_dev_shm); use MCE::Loop; { no warnings 'redefine'; sub Excel::Writer::XLSX::Workbook::_store_workbook { my $self = shift; my $tempdir = File::Temp->newdir( DIR => $self->{_tempdir} ); my $packager = Excel::Writer::XLSX::Package::Packager->new(); # Convert the SST strings data structure. $self->_prepare_sst_string_data(); # Package the workbook. $packager->_add_workbook( $self ); $packager->_set_package_dir( $tempdir ); $packager->_create_package(); # Free up the Packager object. $packager = undef; } } my $nodeList = [ [ 'AMS' , 'a' ], [ 'APJ' , 'ap' ], [ 'EMEA', 'e' ] ]; my ($center, $format); sub init_wb { my $wn = shift; # Increment $wn by 1 since worksheet xml files begin at 1 $wn++; mkdir "$tmp_dir/$wn"; my $wb = Excel::Writer::XLSX->new("$tmp_dir/$wn/tmp.xlsx"); $wb->set_tempdir("$tmp_dir/$wn"); $wb->set_optimization(); # Set workbook properties $wb->set_properties( title => 'Node List', author => 'L_WC demo', comments => 'Node List', ); # Define/add formats to the workbook $center = $wb->add_format(align => 'center'); $format = $wb->add_format(align => 'center', bg_color => 44); # Add worksheets, specify formats for columns/rows for (0 .. @{ $nodeList } - 1) { $wb->add_worksheet($nodeList->[$_][0]); $wb->sheets($_)->set_column(0, 19, 15, $center); } return $wb; } sub merge_wb_data { my $wb_file = shift; my ($zip, @pths, @xlsx_files) = (Archive::Zip->new()); local ($@, $!, $^E, $?); # Other files, e.g. table data likely need the same and not done # for this demonstration. Just worksheet files are merged. # I received help by reading _store_workbook inside # Excel::Writer::XLSX::Workbook.pm. # Find worksheet files 2,3,... for my $_num (1 .. @{ $nodeList }) { my $wanted = sub { push @pths, $1 if $File::Find::name =~ /(.*)\/sheet$_num\.xml +/; }; File::Find::find({ wanted => $wanted, untaint => 1, untaint_pattern => qr\|^(.+)$ +\| }, "$tmp_dir/$_num"); } # Move worksheet files 2,3,... to where worksheet 1 data resides for (0 .. @pths - 1) { unlink $pths[$_]."/../../../tmp.xlsx"; if ($_ > 0) { my $_num = $_ + 1; unlink $pths[0]."/sheet$_num.xml"; move $pths[$_]."/sheet$_num.xml", $pths[0]."/sheet$_num.xml"; } } # Re-zip xlsx files my $wanted = sub { push @xlsx_files, $File::Find::name if -f }; my $temp_dir = $pths[0]."/../../"; my $short_name; File::Find::find({ wanted => $wanted, untaint => 1, untaint_pattern => qr\|^(.+)$\| }, $temp_dir); for my $file_name (@xlsx_files) { $short_name = $file_name; $short_name =~ s{^\Q$temp_dir\E/?}{}; $zip->addFile($file_name, $short_name); } open my $fh, '>', $wb_file or die "Error opening xlsx file: $!\n"; binmode $fh; $zip->writeToFileHandle($fh); close $fh; } # --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- my $start = time(); MCE::Loop::init( max_workers => scalar(@{ $nodeList }), chunk_size => 1, use_threads => 0, ); mce_loop { my ($region, $sql) = ($_->[0], $_->[1]); my ($wb, $ws); # Acquire data from the DB. Each worker must obtain a handle. # The DB logic is similar to running serially. Just the where # clause is likely unique for each region. # Fill worksheet rows/cells if ($region eq 'AMS') { $wb = init_wb(0); $ws = $wb->sheets(0); $ws->write(0, 2, 'foo', $format); for my $row (1..1e5) { for my $cell (0..19) { $ws->write($row, $cell, $row + $cell + 10e6); }} print "AMS ---- DONE.\n"; } elsif ($region eq 'APJ') { $wb = init_wb(1); $ws = $wb->sheets(1); $ws->write(0, 2, 'bar', $format); for my $row (1..1e5) { for my $cell (0..19) { $ws->write($row, $cell, $row + $cell + 20e6); }} print "APJ ---- DONE.\n"; } elsif ($region eq 'EMEA') { $wb = init_wb(2); $ws = $wb->sheets(2); $ws->write(0, 2, 'baz', $format); for my $row (1..1e5) { for my $cell (0..19) { $ws->write($row, $cell, $row + $cell + 30e6); }} print "EMEA ---- DONE.\n"; } $wb->close() if $wb; } $nodeList; # Shutdown MCE MCE::Loop::finish(); # Merge data into one workbook merge_wb_data('Node_List.xlsx'); print "Node List is Done.\n"; printf "Duration: %0.3f\n", time() - $start; [download] Kind regards, Mario	[reply] [d/l]
Re: Writing multiple Excel::Writer::XLSX worksheets in parallel (3rd and final attempts) by Anonymous Monk on Jul 06, 2015 at 13:42 UTC
Shudder... an Excel spreadsheet with ten million cells. But then again, why am I not surprised? Why use a database, the right tool for the job, when the wrong tool for such a job will do? Oh well, it was not your doing . . .	[reply]
Re^2: Writing multiple Excel::Writer::XLSX worksheets in parallel (3rd and final attempts) by marioroy (Prior) on Jul 06, 2015 at 14:56 UTC
The question was asked here on wanting to import data from an existing DB into an Excel file (multiple worksheets one workbook) in parallel. It was an interesting experiment nonetheless.	[reply]