open (my $XML, "-|", "e:\\path\\to\\pdftohtml.exe -xml -zoom 1.4 -stdout $PDF_FILE") or die "pdftohtml failed:\n$!\n$^E"; my $t = XML::Twig->new( twig_handlers => { '/pdf2html/pagetext[(@top >= 180 and @top <= 190) and (@left >= 100 and @left <= 111)]' => \&RouteTo, '/pdf2html/pagetext[(@top >= 215 and @top <= 225) and (@left >= 260 and @left <= 270)]' => \&InvoiceSort, '/pdf2html/page' => sub { $_[0]->purge; 1; }, # free memory after every page }, comments => 'drop', # remove any comments empty_tags => 'normal',# empty tags = ); $t->parse($XML); close $XML;