If like me you search since a long time ago the solution to
transform WORD DOCUMENT (extension word,rtf ...)in simple text document
you will probably interest by this piece of code
All right it make a dependance with windows but it to good result
For this , Just using APACHE / PERL on WIN32 with WORD 97,98 or 2000
#!perl -w
# If like me you search since a long time ago the solution to
# transform WORD DOCUMENT (extension word,rtf ...)in simple text docum
+ent
# you will be interest by this piece of code
# All right it make a dependance with windows but it to good result
#
# For this , Just using APACHE / PERL on WIN32 with WORD 97,98 or 2000
#
use CGI ;
use Win32::OLE;
use Win32::OLE::Const 'Microsoft Word';
my $query=new CGI;
print $query->header;
my $filepath=$query->param('document');
my $html=$query->param('html');
my ( $filename , $inputFileName ) ;
if ( $filepath ne '' ) {
if ($filepath =~ /([^\/\\]+)$/)
{ $filename = $1 ; }
else
{ $filename= $filepath ;}
$filename =~ s/\s+//g;
$inputFileName= "c:\\doctotexte\\$filename";
if (!open(WFD,">$inputFileName"))
{ print "Error last recording file on HD";
exit(0);
}
while ($bytes_read=read($filepath,$buff,2096))
{
binmode WFD;
print WFD $buff;
}
close(WFD);
my($outputFileName) = "c:\\doctotexte\\$filename.doc";
die("Can't find $inputFileName\n") if (! -e $inputFileName);
unlink($outputFileName);
my($word) = Win32::OLE -> new('Word.Application', 'Quit');
my($doc) = $word -> Documents -> Open($inputFileName);
$word -> {DisplayAlerts} = 0; # Stop msg box: Do you wish to sa
+ve...?
$word -> {Visible} = 1; # Watch what happens.
$doc -> SaveAs($outputFileName, wdFormatTextLineBreaks);
$doc -> Close();
$word -> Quit();
# Success.
open ( FIC , $outputFileName ) ;
my $texte = join ( '' , <FIC> ) ;
close ( FIC ) ;
if ($html eq 'on' ) {
$texte =~ s/\n/<br>\n/sgi;
}
print $texte ;
unlink($inputFileName);
unlink($outputFileName);
} else {
print 'No Attachment in document field' ;
}
# --------------------------------------------------------------------