use Win32::OLE; use Win32::OLE::Enum; use Win32::OLE::Const 'Microsoft Office 15.0 Object Library'; use Win32::OLE::Const 'Microsoft Word'; #$tm = localtime; #print "$tm\n"; #Create and Open the Text file to Write open(OUTFILE2,">Author_name_extract.txt") or die("Cant open Output file\n"); ### open Word application and add an empty document ### (will die if Word not installed on your machine) my $word = Win32::OLE->new('Word.Application', 'Quit') or die; $word->{Visible} = 0; @filesnames = glob '*.docx'; #@filesnames = "AR765_Maint_Code_repositoryUINT32.docx"; foreach $count (@filesnames) #Loop till the end is reached { if($count !~ /^~\$/) { print "$count\n"; $filename = "D:\\MRJ_BCU\\Perl\\From thejaswini\\doc_read\\$count"; #my $document = $word->Documents->open($filename) || die 'Unable to open document: ', Win32::OLE->LastError; my $document = $word->Documents->open($filename)|| die 'Unable to open document:'; open(OUTFILE1,">File_under_Review.txt") or die("Cant open Output file\n"); print "Extracting Text from $filename...\n"; $paragraphs = $document->Paragraphs(); $enumerate = new Win32::OLE::Enum($paragraphs); while(defined($paragraph = $enumerate->Next())) { $a = $paragraph->{Range}->{Text}; print OUTFILE1 "$a\n"; } close(OUTFILE1); $document->Save; $document->Close; # Open the Converted Text file to read the Pattern. open(INFILE,") { if($a !~ /\S/) { ; } else { $b = $a; if($a =~ /Date:/) { $a =~ /\s*\S*\s*Date:\s*(\d*\/\d*\/\d*)\s*/; $a= $1; $a =~ s/\s*//g; $a =~ s/_*//g; print OUTFILE2 "$count\t"; print OUTFILE2 "$a\t"; } if($b =~ /Review Moderator:/) { $b =~ /\s*\S*\s*Review Moderator:\s*(\w+\s?.?\w*)\s*Date:/; $b=$1; $b =~ s/\s{2,}//g; $b =~ s/_*//g; print OUTFILE2 "$b\n"; } } } close(INFILE); #To Delete the Temp converted text File unlink("File_under_Review.txt"); } else { print "Corrupted File: $count\n"; } } #To Quit the Word Application $word->Quit(); #close the Output text file used to write close(OUTFILE2); $tm = localtime; print "$tm\n";