$ diff -u TextBlock.pm.orig TextBlock.pm --- TextBlock.pm.orig 2009-07-13 18:45:27.000000000 +0200 +++ TextBlock.pm 2009-09-03 15:52:45.128961624 +0200 @@ -151,7 +151,7 @@ # Build %content_texts. A hash of all PDF::API2::Content::Text objects, # one for each tag ( or or whatever) in $text. my %content_texts; - foreach my $tag (($text =~ /<([^\/].*?)>/g), "default") { + foreach my $tag (($text =~ /<(\w*)[^\/].*?>/g), "default") { next if ($content_texts{$tag}); my $content_text = $page->text; # PDF::API2::Content::Text obj my $font; @@ -307,6 +307,7 @@ if ($tag =~ /^href/) { ($href) = ($tag =~ /href="(.*?)"/); # warn "href is now $href"; + $current_content_text = $content_texts{href} if ref $content_texts{href}; } elsif ($tag !~ /\//) { $current_content_text = $content_texts{$tag}; } @@ -350,7 +351,8 @@ if ($word =~ /\//) { if ($word =~ /\/href/) { undef $href; - } else { + } + unless ($href) { $current_content_text = $content_texts{default}; } } @@ -374,7 +376,10 @@ # Don't yet know why we'd want to return @paragraphs... # unshift( @paragraphs, join( ' ', @paragraph ) ) if scalar(@paragraph); - return ( $endw, $ypos ); # , join( "\n", @paragraphs ) ) + #return ( $endw, $ypos ); # , join( "\n", @paragraphs ) ) + unshift( @paragraphs, join( ' ', @paragraph ) ) if scalar(@paragraph); + my $overflow = join("\n",@paragraphs); + return ( $endw, $ypos, $overflow); #$overflow text returned to script }