in reply to Re: Re: HTML::Parser - no contents. no error.
in thread HTML::Parser - no contents. no error.

I don't know if HTML::Parser passes its own test suite. I don't have rights and the adminstrator is hard to reach. I'm working on that. I did noticed that they must have recompiled Perl back in August when I did a -V.

As for code, I've been hesistant because there is quite a bit. I did cut it down into it relevant parts. The report_tags call is commented out. Here it is:

#!/usr/bin/perl -w use Text::Styler; # use Devel::TraceMethods qw ( HTML::Parser Text::Styler::Parser ); my $styler = Text::Styler->new( { left_margin=>5, right_margin=>2, col +umns=>70 } ); my $html = <<OUTPUT; <p>Little <q>help</q> and love key the i to i'll tune get me help i i. +</p> <p>Walk friends sad, you. <br />My by up the my little, a a, and my i, + with <a href="http://www.foo.com/">you high</a> you're. By from when + walk would what my help from my, by, when. Your up, be own with woul +d is me up your you're friends does to of by and my how ears. You if +little with out i own, worry you, of are, little, not get, i sing sad + key. Stand help get with get be what are walk my out a of song, on b +y would i a a. Away think think, because me not . Tune you with me a +sing the how i'll song sang sang on your, out, i. Little, help help a +nd love key the i to i'll tune get me help i i, of. The <b>sad, you does by</b> up the my little, a.</p> <pre>xxxx foo foo</pre> OUTPUT print $styler->text_style($html)."\n"; ---- package Text::Styler; use strict; use Text::Wrap; use vars qw( $VERSION ); $VERSION = 0.1; sub new { my $class = shift; my $self = bless { }, $class; if ( my $a = shift ) { # getting set wrong when hash ref is handed + in. $self->left_margin( $a->{left_margin} || 0 ); $self->right_margin( $a->{right_margin} || 0 ); $self->columns( $a->{columns} || 60 ); } return $self; } sub text_style { my($self,$text) = @_; use Text::Styler::Parser; my $styler = new Text::Styler::Parser; $styler->parse($text); $self->text_wrap( $styler->contents ); } ---- package Text::Styler::Parser; use strict; use HTML::Parser; our @ISA = qw( HTML::Parser ); use vars qw( $VERSION ); $VERSION = 0.1; my %inline_tags = ( 'a' => '[', 'b' => '*', 'strong' => '*', 'i' => '\\', 'em' => '\\', 'q' => '"' ); my %block_tags = ( 'p' => "\n", 'pre' => '' ); my %empty_tags = ( 'br' => '1' ); my @report_tags = (keys %inline_tags, keys %block_tags, keys %empty_ta +gs); my $tag_symbols = { %inline_tags, %block_tags }; # not too concerned a +bout saving memory right now my $tag_handlers= { 'a' => \&hdlr_hyperlink, 'p' => \&hdlr_paragrap +h, 'br' => \&hdlr_br, 'pre' => \&hdlr_pre }; sub new { my $proto = shift; my $class = ref( $proto ) || $proto; my $parser = HTML::Parser->new( api_version => 3 ); $parser->handler(start_document => \&start_document_handler, 'self +' ); $parser->handler(start => \&start_handler, 'self, tagname, attr' ) +; $parser->handler(end => \&end_handler, 'self, tagname' ); $parser->handler(text => \&text_handler, 'self, dtext' ); #$parser->report_tags( qw( a b strong i em q p pre br ) ); #$parser->report_tags( qw( @report_tags ) ); $parser->unbroken_text(1); return bless $parser, $class; } sub contents { $_[0]->{_output}; } sub start_document_handler { $_[0]->{_stack}=undef; $_[0]->{_output}=u +ndef; } sub start_handler { my $self = shift; my $tag = shift; my $attr = shift; push( @{ $self->{_stack} }, [ $tag, $tag_symbols->{$tag} ] ); unless( defined( $tag_handlers->{$tag} ) && $tag_handlers->{$tag}->($self,'1',$tag_symbols->{$tag},$attr) +) { $self->{_output} .= length( $tag_symbols->{$tag} ) ? ' '.$tag_symbols->{$tag} : $tag_symbols->{$tag}; # default start handler } } sub end_handler { my $self = shift; my $tag = shift; unless( defined( $tag_handlers->{$tag} ) && $tag_handlers->{$tag}->($self,'-1',$tag_symbols->{$tag}) ) { if ( defined( $block_tags{$tag} ) ) { $self->{_output} .= "\n\n"; } elsif ( defined( $inline_tags{$tag} ) ) { $self->{_output} .= length( $tag_symbols->{$tag} ) ? $tag_symbols->{$tag}.' ' : $tag_symbols->{$tag}; # default end handler } # empty tags can only have start tag routines } pop( @{ $self->{_stack} } ); } sub text_handler { my $self = shift; my $text = shift; if ( $self->{_stack}->[0] ) { # filters out text outside a tag. $self->{_current_text}=$text; # a kludge. my $tag = $self->current_element; unless( defined($tag) && defined( $tag_handlers->{$tag} ) && defined( $tag_symbols->{$tag}) && $tag_handlers->{$tag}->($self,'0',$tag_symbols->{$tag} +) ) { $self->{_current_text}=~s/\r//sg; $self->{_current_text}=~s/^[\s\t]*//gm; $self->{_current_text}=~s/[\s\t]*$//gm; $self->{_current_text}=~s/\n/ /gs; } $self->{_output} .= $self->{_current_text}; $self->{_current_text} = undef; } } #--- tag handlers sub hdlr_hyperlink { my ($self, $mode, $symbol, $attr) = @_; if ($mode == 1) { $self->{_output}.=' '; $self->{_current_link} .= $attr->{'href'}; # add alt storage here. return 1; } elsif ($mode == -1) { $self->{_output} .= ' ['.$self->{_current_link}.'] '; $self->{_current_link}=undef; # blank alt storage here. return 1; } return 0; } sub hdlr_paragraph { my ($self, $mode, $symbol, $attr) = @_; if ( $mode == 1) { # pass through. return 1; } return 0; } sub hdlr_br { my ($self, $mode, $symbol, $attr) = @_; if ($mode == 1) { $self->{_output}.="\n"; } return 1; # This handles it or we ignore it. } sub hdlr_pre { my ($self, $mode, $tag, $attr) = @_; if ( $mode==0 ) { # pass through. return 1; } return 0; }

Replies are listed 'Best First'.
Re: Re: Re: Re: HTML::Parser - no contents. no error.
by PodMaster (Abbot) on Oct 30, 2003 at 02:18 UTC

      Thanks for that link. I've used PREFIX with less then stellar results. I grabbed a copy of HTML::Parser v3.25 and installed it locally. All tests in that package were successful. The script still displays the same behavior.

      As for my code not compiling, I'm sorry about that. It seems this matter has fried my brain. I failed to fully cut and paste it all into the forum. Let's try it again.

      test.pl
      #!/usr/bin/perl -w use Text::Styler; # use Devel::TraceMethods qw ( HTML::Parser Text::Styler::Parser ); my $styler = Text::Styler->new( { left_margin=>5, right_margin=>2, col +umns=>70 } ); my $html = <<OUTPUT; <p>Little <q>help</q> and love key the i to i'll tune get me help i i. +</p> <p>Walk friends sad, you. <br />My by up the my little, a a, and my i, +with <a href="http://www.foo.com/">you high</a> you're. By from whenw +alk would what my help from my, by, when. Your up, be own with would +is me up your you're friends does to of by and my how ears. You iflit +tle with out i own, worry you, of are, little, not get, i sing sadkey +. Stand help get with get be what are walk my out a of song, on by wo +uld i a a. Away think think, because me not . Tune you with me asing +the how i'll song sang sang on your, out, i. Little, help help and lo +ve key the i to i'll tune get me help i i, of. The <b>sad, you does by</b> up the my little, a.</p> <pre>xxxx foo foo</pre> OUTPUT print $styler->text_style($html)."\n";
      Text::Styler
      package Text::Styler; use strict; use Text::Wrap; use vars qw( $VERSION ); $VERSION = 0.1; sub new { my $class = shift; my $self = bless { }, $class; if ( my $a = shift ) { # getting set wrong when hash ref is handed + +in. $self->left_margin( $a->{left_margin} || 0 ); $self->right_margin( $a->{right_margin} || 0 ); $self->columns( $a->{columns} || 60 ); } return $self; } sub text_style { my($self,$text) = @_; use Text::Styler::Parser; my $styler = new Text::Styler::Parser; $styler->parse($text); $self->text_wrap( $styler->contents ); } sub text_wrap { my($self,$text) = @_; local( $Text::Wrap::columns=$self->width ); my $lm = ' ' x $self->left_margin; # local( $Text::Wrap::unexpand=0 ); my $out = wrap($lm,$lm,$text); # stupid hack because $Text::Wrap::unexpand=0; is ignored when set +. my $s = ' ' x 8; $out=~s/\t/$s/g; $out =~ s/\n\s*?\n/\n\n/gs; $out; } sub columns { $_[0]->{__columns} = $_[1] if $_[1]; $_[0]->{__columns}; + } sub left_margin { $_[0]->{__left_margin} = $_[1] if $_[1]; $_[0]->{__l +eft_margin}; } sub right_margin { $_[0]->{__right_margin} = $_[1] if $_[1]; $_[0]->{_ +_right_margin}; } sub width { $_[0]->{__columns} - $_[0]->{__left_margin} - $_[0]->{__ri +ght_margin}; }
      Text::Styler::Parser
      package Text::Styler::Parser; use strict; use HTML::Parser; our @ISA = qw( HTML::Parser ); use vars qw( $VERSION ); $VERSION = 0.1; my %inline_tags = ( 'a' => '[', 'b' => '*', 'strong' => '*', 'i' => '\\', 'em' => '\\', 'q' => '"' ); my %block_tags = ( 'p' => "\n", 'pre' => '' ); my %empty_tags = ( 'br' => '1' ); my @report_tags = (keys %inline_tags, keys %block_tags, keys %empty_ta +gs); my $tag_symbols = { %inline_tags, %block_tags }; # not too concerned a +bout saving memory right now my $tag_handlers= { 'a' => \&hdlr_hyperlink, 'p' => \&hdlr_paragrap +h, 'br' => \&hdlr_br, 'pre' => \&hdlr_pre }; sub new { my $proto = shift; my $class = ref( $proto ) || $proto; my $parser = HTML::Parser->new( api_version => 3 ); $parser->handler(start_document => \&start_document_handler, 'self +' ); $parser->handler(start => \&start_handler, 'self, tagname, attr' ) +; $parser->handler(end => \&end_handler, 'self, tagname' ); $parser->handler(text => \&text_handler, 'self, dtext' ); #$parser->report_tags( qw( a b strong i em q p pre br ) ); #$parser->report_tags( qw( @report_tags ) ); $parser->unbroken_text(1); return bless $parser, $class; } sub contents { $_[0]->{_output}; } sub start_document_handler { $_[0]->{_stack}=undef; $_[0]->{_output}=u +ndef; } sub start_handler { my $self = shift; my $tag = shift; my $attr = shift; push( @{ $self->{_stack} }, [ $tag, $tag_symbols->{$tag} ] ); unless( defined( $tag_handlers->{$tag} ) && $tag_handlers->{$tag}->($self,'1',$tag_symbols->{$tag},$attr)) + { $self->{_output} .= length( $tag_symbols->{$tag} ) ? ' '.$tag_symbols->{$tag} : $tag_symbols->{$tag}; # default start handler } } sub end_handler { my $self = shift; my $tag = shift; unless( defined( $tag_handlers->{$tag} ) && $tag_handlers->{$tag}->($self,'-1',$tag_symbols->{$tag}) ) { if ( defined( $block_tags{$tag} ) ) { $self->{_output} .= "\n\n"; } elsif ( defined( $inline_tags{$tag} ) ) { $self->{_output} .= length( $tag_symbols->{$tag} ) ? $tag_symbols->{$tag}.' ' : $tag_symbols->{$tag}; # default end handler } # empty tags can only have start tag routines } pop( @{ $self->{_stack} } ); } sub text_handler { my $self = shift; my $text = shift; if ( $self->{_stack}->[0] ) { # filters out text outside a tag. $self->{_current_text}=$text; # a kludge. my $tag = $self->current_element; unless( defined($tag) && defined( $tag_handlers->{$tag} ) && defined( $tag_symbols->{$tag}) && $tag_handlers->{$tag}->($self,'0',$tag_symbols->{$tag} +) ) { $self->{_current_text}=~s/\r//sg; $self->{_current_text}=~s/^[\s\t]*//gm; $self->{_current_text}=~s/[\s\t]*$//gm; $self->{_current_text}=~s/\n/ /gs; } $self->{_output} .= $self->{_current_text}; $self->{_current_text} = undef; } } #--- tag handlers sub hdlr_hyperlink { my ($self, $mode, $symbol, $attr) = @_; if ($mode == 1) { $self->{_output}.=' '; $self->{_current_link} .= $attr->{'href'}; # add alt storage here. return 1; } elsif ($mode == -1) { $self->{_output} .= ' ['.$self->{_current_link}.'] '; $self->{_current_link}=undef; # blank alt storage here. return 1; } return 0; } sub hdlr_paragraph { my ($self, $mode, $symbol, $attr) = @_; if ( $mode == 1) { # pass through. return 1; } return 0; } sub hdlr_br { my ($self, $mode, $symbol, $attr) = @_; if ($mode == 1) { $self->{_output}.="\n"; } return 1; # This handles it or we ignore it. } sub hdlr_pre { my ($self, $mode, $tag, $attr) = @_; if ( $mode==0 ) { # pass through. return 1; } return 0; } sub current_element { return shift->{_stack}->[-1]->[0] } #if defined. 1;
        HTML::Parser is not to blame. You have logic errors.

        First #$parser->report_tags( qw( @report_tags ) ); should be #$parser->report_tags( @report_tags );.

        Second, your text_handler is flawed. You test     if ( $self->{_stack}->[0] ) { # filters out text outside a tag. which will FAIL every time since '<@report_tags>' and/or '</@report_tags>' are never encountered. The same would happen if you only had text as input. I suggest you rework your text_handler, and in the future always start with a small example which demonstrates the problem (it's easier to diagnose the actual problem that way).

        MJD says "you can't just make shit up and expect the computer to know what you mean, retardo!"
        I run a Win32 PPM repository for perl 5.6.x and 5.8.x -- I take requests (README).
        ** The third rule of perl club is a statement of fact: pod is sexy.