I don't know if HTML::Parser passes its own test suite. I don't have rights and the adminstrator is hard to reach. I'm working on that. I did noticed that they must have recompiled Perl back in August when I did a -V.
As for code, I've been hesistant because there is quite a bit. I did cut it down into it relevant parts. The report_tags call is commented out. Here it is:
#!/usr/bin/perl -w
use Text::Styler;
# use Devel::TraceMethods qw ( HTML::Parser Text::Styler::Parser );
my $styler = Text::Styler->new( { left_margin=>5, right_margin=>2, col
+umns=>70 } );
my $html = <<OUTPUT;
<p>Little <q>help</q> and love key the i to i'll tune get me help i i.
+</p>
<p>Walk friends sad, you. <br />My by up the my little, a a, and my i,
+ with <a href="http://www.foo.com/">you high</a> you're. By from when
+ walk would what my help from my, by, when. Your up, be own with woul
+d is me up your you're friends does to of by and my how ears. You if
+little with out i own, worry you, of are, little, not get, i sing sad
+ key. Stand help get with get be what are walk my out a of song, on b
+y would i a a. Away think think, because me not . Tune you with me a
+sing the how i'll song sang sang on your, out, i. Little, help help a
+nd love key the i to i'll tune get me help i i, of.
The <b>sad, you does by</b> up the my little, a.</p>
<pre>xxxx
foo
foo</pre>
OUTPUT
print $styler->text_style($html)."\n";
----
package Text::Styler;
use strict;
use Text::Wrap;
use vars qw( $VERSION );
$VERSION = 0.1;
sub new {
my $class = shift;
my $self = bless { }, $class;
if ( my $a = shift ) { # getting set wrong when hash ref is handed
+ in.
$self->left_margin( $a->{left_margin} || 0 );
$self->right_margin( $a->{right_margin} || 0 );
$self->columns( $a->{columns} || 60 );
}
return $self;
}
sub text_style {
my($self,$text) = @_;
use Text::Styler::Parser;
my $styler = new Text::Styler::Parser;
$styler->parse($text);
$self->text_wrap( $styler->contents );
}
----
package Text::Styler::Parser;
use strict;
use HTML::Parser;
our @ISA = qw( HTML::Parser );
use vars qw( $VERSION );
$VERSION = 0.1;
my %inline_tags = ( 'a' => '[', 'b' => '*', 'strong' => '*',
'i' => '\\', 'em' => '\\', 'q' => '"' );
my %block_tags = ( 'p' => "\n", 'pre' => '' );
my %empty_tags = ( 'br' => '1' );
my @report_tags = (keys %inline_tags, keys %block_tags, keys %empty_ta
+gs);
my $tag_symbols = { %inline_tags, %block_tags }; # not too concerned a
+bout saving memory right now
my $tag_handlers= { 'a' => \&hdlr_hyperlink, 'p' => \&hdlr_paragrap
+h,
'br' => \&hdlr_br, 'pre' => \&hdlr_pre };
sub new {
my $proto = shift;
my $class = ref( $proto ) || $proto;
my $parser = HTML::Parser->new( api_version => 3 );
$parser->handler(start_document => \&start_document_handler, 'self
+' );
$parser->handler(start => \&start_handler, 'self, tagname, attr' )
+;
$parser->handler(end => \&end_handler, 'self, tagname' );
$parser->handler(text => \&text_handler, 'self, dtext' );
#$parser->report_tags( qw( a b strong i em q p pre br ) );
#$parser->report_tags( qw( @report_tags ) );
$parser->unbroken_text(1);
return bless $parser, $class;
}
sub contents { $_[0]->{_output}; }
sub start_document_handler { $_[0]->{_stack}=undef; $_[0]->{_output}=u
+ndef; }
sub start_handler {
my $self = shift;
my $tag = shift;
my $attr = shift;
push( @{ $self->{_stack} }, [ $tag, $tag_symbols->{$tag} ] );
unless( defined( $tag_handlers->{$tag} ) &&
$tag_handlers->{$tag}->($self,'1',$tag_symbols->{$tag},$attr)
+) {
$self->{_output} .= length( $tag_symbols->{$tag} ) ?
' '.$tag_symbols->{$tag} :
$tag_symbols->{$tag}; # default start handler
}
}
sub end_handler {
my $self = shift;
my $tag = shift;
unless( defined( $tag_handlers->{$tag} ) &&
$tag_handlers->{$tag}->($self,'-1',$tag_symbols->{$tag}) ) {
if ( defined( $block_tags{$tag} ) ) {
$self->{_output} .= "\n\n";
} elsif ( defined( $inline_tags{$tag} ) ) {
$self->{_output} .= length( $tag_symbols->{$tag} ) ?
$tag_symbols->{$tag}.' ' :
$tag_symbols->{$tag}; # default end handler
} # empty tags can only have start tag routines
}
pop( @{ $self->{_stack} } );
}
sub text_handler {
my $self = shift;
my $text = shift;
if ( $self->{_stack}->[0] ) { # filters out text outside a tag.
$self->{_current_text}=$text; # a kludge.
my $tag = $self->current_element;
unless( defined($tag) && defined( $tag_handlers->{$tag} ) &&
defined( $tag_symbols->{$tag}) &&
$tag_handlers->{$tag}->($self,'0',$tag_symbols->{$tag}
+) ) {
$self->{_current_text}=~s/\r//sg;
$self->{_current_text}=~s/^[\s\t]*//gm;
$self->{_current_text}=~s/[\s\t]*$//gm;
$self->{_current_text}=~s/\n/ /gs;
}
$self->{_output} .= $self->{_current_text};
$self->{_current_text} = undef;
}
}
#--- tag handlers
sub hdlr_hyperlink {
my ($self, $mode, $symbol, $attr) = @_;
if ($mode == 1) {
$self->{_output}.=' ';
$self->{_current_link} .= $attr->{'href'};
# add alt storage here.
return 1;
} elsif ($mode == -1) {
$self->{_output} .= ' ['.$self->{_current_link}.'] ';
$self->{_current_link}=undef;
# blank alt storage here.
return 1;
}
return 0;
}
sub hdlr_paragraph {
my ($self, $mode, $symbol, $attr) = @_;
if ( $mode == 1) { # pass through.
return 1;
}
return 0;
}
sub hdlr_br {
my ($self, $mode, $symbol, $attr) = @_;
if ($mode == 1) { $self->{_output}.="\n"; }
return 1; # This handles it or we ignore it.
}
sub hdlr_pre {
my ($self, $mode, $tag, $attr) = @_;
if ( $mode==0 ) { # pass through.
return 1;
}
return 0;
}
|