#!/usr/bin/perl use lib 'SPE/blib/lib'; use strict; use warnings; #=========================================================== # DECLARATIONS #=========================================================== use Data::Dumper; use Spreadsheet::ParseExcel; use Spreadsheet::WriteExcel; use Text::ParseWords; #=========================================================== # VARIABLES #=========================================================== my $dir = 'M:\Business Process Reengineering\Taxonomy\Reviewed Content\Group D'; my $column_max = 26; #=========================================================== # SUBROUTINES #=========================================================== sub trim { $_[0] =~ s/\s+$//; $_[0] =~ s/^\s+//; } sub parse_excel { my %by_agency; my $excel_file = shift; my $acronyms = shift; -e $excel_file or die "Must provide valid XLS file! $excel_file, $!\n"; # create a ParseExcel object my $excel_obj = Spreadsheet::ParseExcel->new(); warn "Parsing $excel_file (may take a long time)"; my $workbook = $excel_obj->Parse($excel_file); # we need to get each worksheet from the workbook in turn my @worksheet = @{$workbook->{Worksheet}}; my $worksheet_i; my $header; for my $worksheet ( @{$workbook->{Worksheet}} ) { # {Cells}[row][col] my $last_col = $worksheet->{MaxCol}; my $last_row = $worksheet->{MaxRow}; warn "Last row: $last_row"; warn "Last col: $last_col"; $header = [ map { $_->Value } @{$worksheet->{Cells}[0]} ] ; for my $row ( 1 .. $last_row ) { my $row = $worksheet->{Cells}[$row]; ref($row->[2]) and (my $V = $row->[2]->Value) or next; # print "row: " . $V; # print "\n"; trim $V; my $key = $acronyms -> { $V } ; if ($key) { push @ { $by_agency{$key} } , [ map { $_->Value } @$row ] ; } else { print qq(no key found for "$V"\n); } } last; # only process one worksheet } ($header, \%by_agency); } sub read_acronyms { my $f = "$dir\\..\\A-B-C-D Acronyms.csv"; open F, $f or die "couldnt open $f: $!"; ; my %ret; while () { my @data = quotewords("\t", 0, $_); for (@data) { trim ($_); } $ret{$data[1]} = $data[0]; } \%ret; } sub new_agency { my ($header, $acronym) = @_; my $w = "Organization_$acronym.xls"; # Create a new workbook called simple.xls and add a worksheet my $workbook = Spreadsheet::WriteExcel->new($w); my $worksheet = $workbook->addworksheet(); # The general syntax is write($row, $column, $token). Note that row and # column are zero indexed my $i; # Write some text $worksheet->write(0, $i++, $_) for @$header; $worksheet; } sub write_excel { my ($header, $agency, $acronyms) = @_; warn 'write_excel'; for my $acronym (keys %$agency) { my @agency_rows = @ { $agency->{$acronym} } ; my $w = new_agency ($header, $acronym) ; my ($row, $col) = (1, 0); for my $agency_row (@agency_rows) { warn "agency_row: " . Dumper($agency_row); warn "witdth of row: " . scalar @$agency_row; for (@$agency_row) { warn "$w->write($row, $col, $_)"; $w->write($row, $col, $_); warn "$w->write($row, $col, $_) WRITTEN"; ++$col; } ++$row; } return; # why write all the excel files when one is failing... } } #=========================================================== sub usage { my ( $tool ) = $0 =~ m,([^\/]+$),; print <## c:\Documents and Settings\tbrannon\My Documents\Applications\BulkloadCheck\Bulkloader\internals>perl convert-org.pl perl convert-org.pl Parsing M:\Business Process Reengineering\Taxonomy\Reviewed Content\Group D/Organization_GroupD-no-CR-SMALL.xls (may take a long time) at convert-org.pl line 44, line 353. Last row: 38 at convert-org.pl line 62. Last col: 19 at convert-org.pl line 63. write_excel at convert-org.pl line 144. agency_row: $VAR1 = [ 'Headquarters', 'Other', 'Citizens Committee for New York City', 'Citizens Committee for New York City', '', '', '', '(212) 989-0909', '(212) 989-0909', 'N', '', '', 'http://ccnyc.neighborhoodlink.com/ccnyc/', '305 7th Avenue', '', '', 'New York', 'NY', '10001', 'Citizens committee for NYC is a non profit organization which encourages and supports volunteer action that improves the quality of life in City neighborhoods.' ]; witdth of row: 20 at convert-org.pl line 157. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 0, Headquarters) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 0, Headquarters) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 1, Other) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 1, Other) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 2, Citizens Committee for New York City) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 2, Citizens Committee for New York City) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 3, Citizens Committee for New York City) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 3, Citizens Committee for New York City) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 4, ) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 4, ) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 5, ) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 5, ) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 6, ) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 6, ) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 7, (212) 989-0909) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 7, (212) 989-0909) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 8, (212) 989-0909) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 8, (212) 989-0909) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 9, N) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 9, N) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 10, ) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 10, ) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 11, ) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 11, ) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 12, http://ccnyc.neighborhoodlink.com/ccnyc/) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 12, http://ccnyc.neighborhoodlink.com/ccnyc/) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 13, 305 7th Avenue) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 13, 305 7th Avenue) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 14, ) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 14, ) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 15, ) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 15, ) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 16, New York) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 16, New York) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 17, NY) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 17, NY) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 18, 10001) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 18, 10001) WRITTEN at convert-org.pl line 163. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 19, Citizens committee for NYC is a non profit organization which encourages and supports volunteer action that improves the quality of life in City neighborhoods.) at convert-org.pl line 161. Spreadsheet::WriteExcel::Worksheet=HASH(0x22402e8)->write(1, 19, Citizens committee for NYC is a non profit organization which encourages and supports volunteer action that improves the quality of life in City neighborhoods.) WRITTEN at convert-org.pl line 163. c:\Documents and Settings\tbrannon\My Documents\Applications\BulkloadCheck\Bulkloader\internals>