I'm an absolute newbie to Perl and I'm trying to print the contents of a table (in a docx) into a tab-delimited or xlsx file, either is fine.
There are over 100 docx's that have tables in them with identical delimiters ("Version History", "Table of Contents").
I can print the entire contents of the file to a .txt, but I can't yet get the data between the delimiters.
Any suggestions?
Thank you!
use strict; use warnings; use Win32::OLE qw(in); use Win32::OLE::Const 'Microsoft Word'; use Win32::OLE::Variant; $|=1; sub Parse{ my $document_name = 'C:\TestPolicy.rtf'; my $word = Win32::OLE->GetActiveObject('Word.Application') || Win32::OLE->new('Word.Application','Quit') or die Win32::OLE->LastError(); my $document = $word->Documents->Open($document_name) or die Win32::OLE->LastError(); my $paragraphs = $document->Paragraphs (); my $n_paragraphs = $paragraphs->Count (); my $outputfile = 'C:\testfile.txt'; open(INPUT, $document_name) or die "Failed to open $document_name\n"; while (<INPUT>){ if ($_ =~ /HISTORY/../TABLE/){ open(OUTPUT, '>'.$outputfile) or die "Can't create $output +file.\n"; print OUTPUT "$_\n"; close OUTPUT; } } close INPUT; } Parse()
In reply to Getting lines in a file between two patterns by Daikini
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |