1: # I ran into this problem today and found some helpful code snippets here. 
   2: # However, I use Word 97 and there is a bug in the program which prevented 
   3: # files from being saved. I found the workaround in the ActiveState 
   4: # documentation. I thought people might find the code snippet helpful:
   5: 
   6: ###############################
   7: # Convert incoming word file to plain text
   8: # USAGE: a full path file name needs to be provided
   9: #     eg c:\\dir\\file.doc
  10: 
  11: use constant TRUE => 1;
  12: use constant FALSE => 0;
  13: 
  14: sub WordToText
  15: {
  16: my( $infile, $outfile) = @_;
  17: 
  18: use Win32::OLE qw(in with);
  19: use Win32::OLE::Const 'Microsoft Word';
  20: $Win32::OLE::Warn = 3;            # die on errors...
  21: my $Word = Win32::OLE->GetActiveObject('Word.Application')
  22:     || Win32::OLE->new('Word.Application', 'Quit');
  23: 
  24: my $WordFile = $Word->Documents->Open("$infile");
  25: if(!$WordFile)
  26:    {
  27:    print "WordToText did not create WordFile object\n";
  28:    undef $Word;
  29:    return FALSE;
  30:    }
  31: 
  32: $Word->{Visible} = FALSE;
  33: $Word->WordBasic->FileSaveAs( $outfile, 2); # '2' is text
  34: $WordFile->Close( );
  35: 
  36: undef $WordFile;
  37: undef $Word;
  38: 
  39: return TRUE;
  40: }

Replies are listed 'Best First'.
Re: Convert MSWord to Text
by particle (Vicar) on Oct 11, 2002 at 13:42 UTC

    nice little utility sub, but i think you'll find this code's a bit cleaner.

    note there's no need to check for OLE errors once Warn is set to die on errors. also, i think these comments are a bit friendlier.

    use Win32::OLE qw(in with); ## avoid warnings while loading Win32::OLE::Const module { no warnings; use Win32::OLE::Const 'Microsoft Word' } ## WordToText - Convert a microsoft word file to a plain text file ## ## Example: my $status = WordToText( $infile, $outfile ); ## ## $infile - absolute path to word document input ## $outfile - absolute path to text file output ## ## returns 1 on success, dies on errors sub WordToText { my( $infile, $outfile ) = @_; die( (caller(0))[3] . ": invalid args" ) unless defined $outfile; ## make word die on errors $Win32::OLE::Warn = 3; ## create a word object my $Word = Win32::OLE->GetActiveObject( 'Word.Application' ) || Win32::OLE->new( 'Word.Application', 'Quit' ); ## open the word document my $WordFile = $Word->Documents->Open( $infile ); ## make word invisible $Word->{Visible} = 0; ## perform 'save as' in text mode (2) $Word->WordBasic->FileSaveAs( $outfile, 2 ); ## close the file $WordFile->Close(); return 1; }

    ~Particle *accelerates*

Re: Convert MSWord to Text
by nothingmuch (Priest) on Oct 17, 2002 at 07:34 UTC
    I'm curios- is there any way to read the plain text directly, and sequentially? like read x bytes, or readline, or whatever, instead of saving to a new file and reading that?

    -nuffin
    zz zZ Z Z #!perl
        Can this be done in smaller segments though? without loading everything at once?

        -nuffin
        zz zZ Z Z #!perl