package readPowerPoint; use strict; use Win32::OLE qw(in with); use Win32::OLE::Const 'Microsoft PowerPoint'; use Win32::OLE::Const 'Microsoft Office'; use Data::Dumper; require Exporter; use vars qw(@ISA @EXPORT); @ISA = qw(Exporter); @EXPORT = qw(readActivePPT); sub readActivePPT # this method returns the name of the active PPT, its content and a hash # the PPT's properties we consider to be useful. Hence it is ready for use { if ( my $powerpoint = Win32::OLE->GetActiveObject('Powerpoint.Application')) # connect to powerpoint application { if (defined (my $activePPT=$powerpoint->ActivePresentation)) { my $PPTName = $activePPT->Name; # retrieve PPT name i.e. subject my $PPTProperties = $activePPT->{BuiltInDocumentProperties}; my %PPTContent = (); # read PPT content - see below for structure details for my $slideNumber(1..$activePPT->{Slides}->Count) { my $total = $slideNumber / $activePPT->{Slides}->Count; print "\n---------------- Please wait (".100*$total."% done)----------------"; my $slide = $activePPT->slides($slideNumber); if ($slide->{Shapes}->Count) { for my $shapeNumber(1..$slide->{Shapes}->Count) { if ( ($slide->shapes($shapeNumber)->HasTextFrame()==msoCTrue) or($slide->shapes($shapeNumber)->HasTextFrame()==msoTrue)) { my $wordGroup=$slide->shapes($shapeNumber)->TextFrame->{TextRange}; if (defined $wordGroup) { my $text = ""; foreach my $palabre (in $wordGroup->Words) # prints out every single word { $text.= lc($palabre->{Text})." "; # lc stands for lower case } $PPTContent{("slide".$slideNumber)}{("shape".$shapeNumber)} = $text; } } } } } return ($PPTName,$PPTProperties,%PPTContent); } } return (undef,undef,undef); } # structure of $PPTContent will be a hash of hashes : # first hash will contain all the slides and each slide will be a hash itself containing all # the text areas found. # to access do this $PPTContent{slide1}{shape1}