#!perl -w # # Name: # ole-word-2-text-2.pl. # # Purpose: # Read an MS Word doc and extract each paragraph. Do this in 2 different ways. # # Test environment: # MS Word 2000 under WinNT. # # Author: # Ron Savage # # Home: # http://savage.net.au/Perl-tutorials.html#tut-17 # # Version # 1.00 16-Apr-2000 # # Licence: # Copyright (c) 2000 Ron Savage. # All Programs in this package are free software; you can redistribute # them and/or modify them under the same terms as Perl itself. # Perl's Artistic License is available at: # See licence.txt. use strict; use Win32::OLE qw(in with); # -------------------------------------------------- my($input_file) = 'C:\My Documents\msword\olewordtotext1\tut-17\ole-word-demo-4.doc'; $input_file = Win32::GetCwd() . "/$input_file" if $input_file !~ /^(\w:)?[\/\\]/; die "File $input_file does not exist" unless -f $input_file; my($word) = Win32::OLE->new('Word.Application', 'Quit') || die "Couldn't run Word"; my($doc) = $word->Documents->Open($input_file); print "Extract using first method: \n"; my($index) = 0; for my $paragraph (in $doc->Paragraphs) { $index++; # Remove trailing ^M (the paragraph marker) from Range. my($text) = substr($paragraph->Range->Text, 0, -1); print "Paragraph: $index. Text: <$text>\n\n"; } print '-' x 50, "\n"; print "Extract using second method: \n"; my($paraCount) = $doc->Paragraphs->Count; for ($index = 1 ; $index <= $doc->Paragraphs->Count ; ++$index) { my($text) = $doc->Paragraphs($index); print "Paragraph: $index. Text: <", $text->Range->Text, ">\n\n"; } print '=' x 50, "\n"; $doc->{Saved} = 1; $doc->Close; # Success. print "Success \n"; exit(0);