in reply to A copyeditor needs help to get started with a Perl project
Working with Word documents directly from Perl *can* be done. Working with them from VB is possibly easier. You will basically be converting VB examples to Win32::OLE in Perl if you wish to run with Perl. If you can process the manuscripts as text files then Perl is definitely the weapon of choice. Here are a few Win32::OLE examples that work with Word Files. I just ripped them out of the toolbox so you will need to fiddle with them. They worked happilly in their native environment ;-)
# Win32::OLE::Const cause all sorts of warnings and has no inport meth +od # so bad as it is we are hard coding then here. Code below tests # uncomment these to make sure M$ has not randomly changed them again +;-) # if the Win32::OLE code falls over for no particularly good reason. # use Win32::OLE::Const 'Microsoft Word'; # print "Word Constants:\ndoc:" ,wdFormatDocument, "\ntext:", wdFormat +Text, # "\nhtml:", wdFormatHTML, "\nreplace all:", wdReplaceAll; exit; my $wdFormatDocument = 0; my $wdFormatText = 2; my $wdFormatHTML = 8; my $wdReplaceAll = 2; sub save_doc_as_text { my ( $infile, $outfile ) = @_; require Win32::OLE; my $word = Win32::OLE->new( 'Word.Application', sub {$_[0]->Quit;} + ); error( "Can't create new instance or Word Reason:$Win32::OLE::Last +Error" ) unless $word; $word->{visible} = 0; my $doc = $word->{Documents}->Open($infile); error( "Can't open $infile, Reason:$Win32::OLE::LastError" ) unles +s $doc; # wdFormatDocument wdFormatText wdFormatHTML $doc->SaveAs( { FileName => $outfile, FileFormat => $wdFormatText +} ); $doc->Close; undef $doc; undef $word; } CompareWord( "C:\\Hello.doc", "C:\\Hello.edit.doc", "C:\\compare.doc" +); sub CompareWord { my( $doc1, $doc2, $outfile) = @_; require Win32::OLE; $Win32::OLE::Warn = 3; # die on errors... my $Word = Win32::OLE->GetActiveObject('Word.Application') || Win32::OLE->new('Word.Application', 'Quit'); my $WordFile = $Word->Documents->Open($doc1); $Word->{Visible} = 0; $WordFile->Compare( $doc2 ); $Word->WordBasic->FileSaveAs( $outfile); $WordFile->Close(); undef $WordFile; undef $Word; return 0; } require Win32::OLE; $word = Win32::OLE->new( 'Word.Application', sub {$_[0]->Quit;} ); sub word_find_and_replace { my ( $word, $rel_file_path, $tokens_ref ) = @_; # first make a temporary file to do the search and replace on my ( $fh, $temp_name ) = get_tempfile( "$DOC_DIR/system", 'doc' ); close $fh; my $content_ref = read_file( "$DOC_DIR/$rel_file_path" ); create_file( "$DOC_DIR/system/$temp_name", $content_ref, 'overwrit +e ok' ); $word->{visible} = 0; my $doc = $word->{Documents}->Open("$DOC_DIR/system/$temp_name"); my $search_obj = $doc->Content->Find; my $replace_obj = $search_obj->Replacement; for my $token ( keys %$tokens_ref ) { my $find = '<?' . $token . '?>'; my $replace = $tokens_ref->{$token}; # now i know this looks wierd but M$ word (at least 2000) want +s \r # as the para marker not \r\n or even \n if you send \n you ge +t little # binary squares..... oh well that's M$ for you. $replace =~ s/\r\n|\n/\r/g; # this makes it work properly. GO +K $search_obj->{Text} = $find; $replace_obj->{Text} = $replace; $search_obj->Execute({Replace => $wdReplaceAll}); } $doc->Save; $doc->Close; # now get the data out of the modified temp file $content_ref = read_file( "$DOC_DIR/system/$temp_name" ); # remove our unwanted temp files and objects unlink "$DOC_DIR/system/$temp_name"; undef $search_obj; undef $replace_obj; undef $doc; return $content_ref; } # this was a mish mash example of stuff you can do # the reason for which is long since forgotten use Win32::OLE; use Win32::OLE::Const 'Microsoft Word'; # start Word program die if unable to $word = Win32::OLE->new('Word.Application', sub { $_[0]->Quit; } ) or die 'Cannot start Word'; # let's watch $word->{'Visible'} = 1; # Create new document my $d = $word->Documents->Add; # define selection my $s = $word->Selection; my @lines = ( "This is test line 1", "This is test line 2", "This is test line 3", ); # $c is the color # $start is the start of Range # $end is the end of Range # $r is the Range object my ($c, $start, $end, $r) = (2, 0, 0, ); for my $line (@lines) { $end += length($line) + 1; $s->TypeText($line); # define the Range $r = $d->Range($start, $end); # Set font properties $r->Font->{Size} = 18; $r->Font->{ColorIndex} = $c++; $r->Font->{Name} = 'Courier New'; $s->TypeText("\n"); $start = $end; } # TIMTOWTDI but this will overwrite the above #my $r = $doc->{Content}; #$r->{Text} = 'Hello World!'; #$r->InsertParagraphAfter(); #$r->InsertParagraphAfter(); #$r->InsertAfter('Bye!'); # here is how to print a document $word->ActiveDocument->PrintOut({ Background => 0, Append => 0, Range => wdPrintAllDocument, Item => wdPrintDocumentContent, Copies => 1, PageType => wdPrintAllPages, }); # save the file without a prompt $word->WordBasic->FileSaveAs("c:\\test.doc"); # have a quick look at the objects and properties # don't try to Dumper the whole thing unless you have time and memory+ ++ print "Range: $_ => $r->{$_}\n" for sort keys %$r; print "Doc: $_ => $d->{$_}\n" for sort keys %$d; print "Font: $_ $r->{Font}->{$_}\n" for sort keys %{$r->{Font}}; sleep 2; # house keeping, clean up our instances $d->Close(); $word->Quit(); undef $word;
cheers
tachyon
|
|---|