use strict; use warnings; use utf8; use Archive::Zip qw( :ERROR_CODES ); use XML::Twig; use Data::Dumper; my $PathDocument="myDocument.pptx"; our @textPPT; my $zip = Archive::Zip->new(); $zip->read( $PathDocument ) == AZ_OK or die "Unable to open Office file\n"; my @slides = $zip->membersMatching( "ppt/slides/slide.+\.xml" ); for my $i ( 1 .. scalar @slides ) { push @textPPT, "\n\nSLIDE $i\n\n"; my $content = $zip->contents( "ppt/slides/slide${i}.xml"); my $twig= XML::Twig->new( #keep_encoding=>1, twig_handlers => { 'a:t' => \&text_processing, 'a:endParaRPr' => \&line_processing, 'w:tab' => \&tab_processing, }, ); $twig->parse( $content ); } my $text=join("", @textPPT); #BASIC FORMATTING $text =~ s/ +/ /g; print $text; sub text_processing { my($twig, $ppttext) = @_; push @textPPT, $ppttext->text(); } sub line_processing { my($twig, $ppttext) = @_; push @textPPT, "\n"; } sub tab_processing { my($twig, $ppttext) = @_; push @textPPT, "\t"; }