I don't know if HTML::Parser passes its own test suite. I don't have rights and the adminstrator is hard to reach. I'm working on that. I did noticed that they must have recompiled Perl back in August when I did a -V.
As for code, I've been hesistant because there is quite a bit. I did cut it down into it relevant parts. The report_tags call is commented out. Here it is:
#!/usr/bin/perl -w use Text::Styler; # use Devel::TraceMethods qw ( HTML::Parser Text::Styler::Parser ); my $styler = Text::Styler->new( { left_margin=>5, right_margin=>2, col +umns=>70 } ); my $html = <<OUTPUT; <p>Little <q>help</q> and love key the i to i'll tune get me help i i. +</p> <p>Walk friends sad, you. <br />My by up the my little, a a, and my i, + with <a href="http://www.foo.com/">you high</a> you're. By from when + walk would what my help from my, by, when. Your up, be own with woul +d is me up your you're friends does to of by and my how ears. You if +little with out i own, worry you, of are, little, not get, i sing sad + key. Stand help get with get be what are walk my out a of song, on b +y would i a a. Away think think, because me not . Tune you with me a +sing the how i'll song sang sang on your, out, i. Little, help help a +nd love key the i to i'll tune get me help i i, of. The <b>sad, you does by</b> up the my little, a.</p> <pre>xxxx foo foo</pre> OUTPUT print $styler->text_style($html)."\n"; ---- package Text::Styler; use strict; use Text::Wrap; use vars qw( $VERSION ); $VERSION = 0.1; sub new { my $class = shift; my $self = bless { }, $class; if ( my $a = shift ) { # getting set wrong when hash ref is handed + in. $self->left_margin( $a->{left_margin} || 0 ); $self->right_margin( $a->{right_margin} || 0 ); $self->columns( $a->{columns} || 60 ); } return $self; } sub text_style { my($self,$text) = @_; use Text::Styler::Parser; my $styler = new Text::Styler::Parser; $styler->parse($text); $self->text_wrap( $styler->contents ); } ---- package Text::Styler::Parser; use strict; use HTML::Parser; our @ISA = qw( HTML::Parser ); use vars qw( $VERSION ); $VERSION = 0.1; my %inline_tags = ( 'a' => '[', 'b' => '*', 'strong' => '*', 'i' => '\\', 'em' => '\\', 'q' => '"' ); my %block_tags = ( 'p' => "\n", 'pre' => '' ); my %empty_tags = ( 'br' => '1' ); my @report_tags = (keys %inline_tags, keys %block_tags, keys %empty_ta +gs); my $tag_symbols = { %inline_tags, %block_tags }; # not too concerned a +bout saving memory right now my $tag_handlers= { 'a' => \&hdlr_hyperlink, 'p' => \&hdlr_paragrap +h, 'br' => \&hdlr_br, 'pre' => \&hdlr_pre }; sub new { my $proto = shift; my $class = ref( $proto ) || $proto; my $parser = HTML::Parser->new( api_version => 3 ); $parser->handler(start_document => \&start_document_handler, 'self +' ); $parser->handler(start => \&start_handler, 'self, tagname, attr' ) +; $parser->handler(end => \&end_handler, 'self, tagname' ); $parser->handler(text => \&text_handler, 'self, dtext' ); #$parser->report_tags( qw( a b strong i em q p pre br ) ); #$parser->report_tags( qw( @report_tags ) ); $parser->unbroken_text(1); return bless $parser, $class; } sub contents { $_[0]->{_output}; } sub start_document_handler { $_[0]->{_stack}=undef; $_[0]->{_output}=u +ndef; } sub start_handler { my $self = shift; my $tag = shift; my $attr = shift; push( @{ $self->{_stack} }, [ $tag, $tag_symbols->{$tag} ] ); unless( defined( $tag_handlers->{$tag} ) && $tag_handlers->{$tag}->($self,'1',$tag_symbols->{$tag},$attr) +) { $self->{_output} .= length( $tag_symbols->{$tag} ) ? ' '.$tag_symbols->{$tag} : $tag_symbols->{$tag}; # default start handler } } sub end_handler { my $self = shift; my $tag = shift; unless( defined( $tag_handlers->{$tag} ) && $tag_handlers->{$tag}->($self,'-1',$tag_symbols->{$tag}) ) { if ( defined( $block_tags{$tag} ) ) { $self->{_output} .= "\n\n"; } elsif ( defined( $inline_tags{$tag} ) ) { $self->{_output} .= length( $tag_symbols->{$tag} ) ? $tag_symbols->{$tag}.' ' : $tag_symbols->{$tag}; # default end handler } # empty tags can only have start tag routines } pop( @{ $self->{_stack} } ); } sub text_handler { my $self = shift; my $text = shift; if ( $self->{_stack}->[0] ) { # filters out text outside a tag. $self->{_current_text}=$text; # a kludge. my $tag = $self->current_element; unless( defined($tag) && defined( $tag_handlers->{$tag} ) && defined( $tag_symbols->{$tag}) && $tag_handlers->{$tag}->($self,'0',$tag_symbols->{$tag} +) ) { $self->{_current_text}=~s/\r//sg; $self->{_current_text}=~s/^[\s\t]*//gm; $self->{_current_text}=~s/[\s\t]*$//gm; $self->{_current_text}=~s/\n/ /gs; } $self->{_output} .= $self->{_current_text}; $self->{_current_text} = undef; } } #--- tag handlers sub hdlr_hyperlink { my ($self, $mode, $symbol, $attr) = @_; if ($mode == 1) { $self->{_output}.=' '; $self->{_current_link} .= $attr->{'href'}; # add alt storage here. return 1; } elsif ($mode == -1) { $self->{_output} .= ' ['.$self->{_current_link}.'] '; $self->{_current_link}=undef; # blank alt storage here. return 1; } return 0; } sub hdlr_paragraph { my ($self, $mode, $symbol, $attr) = @_; if ( $mode == 1) { # pass through. return 1; } return 0; } sub hdlr_br { my ($self, $mode, $symbol, $attr) = @_; if ($mode == 1) { $self->{_output}.="\n"; } return 1; # This handles it or we ignore it. } sub hdlr_pre { my ($self, $mode, $tag, $attr) = @_; if ( $mode==0 ) { # pass through. return 1; } return 0; }
In reply to Re: Re: Re: HTML::Parser - no contents. no error.
by Anonymous Monk
in thread HTML::Parser - no contents. no error.
by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |