#!/usr/bin/perl -w use strict; use HTML::TokeParser::Simple; my $total = 0; my $p = HTML::TokeParser::Simple->new(*DATA); my $length_left = shift || 40; my $abstract = ""; while (my $t = $p->get_token) { $_ = $t->as_is; if ($t->is_text) { s/\s+/ /g; s/^(.{1,$length_left}\S*).*/$1/s; $length_left -= length; } $abstract .= $_; last unless $length_left > 0; } print $abstract, $/; #### #!/usr/bin/perl -w use strict; use HTML::TokeParser::Simple; my $total = 0; my $p = HTML::TokeParser::Simple->new(*DATA); my $length_left = shift || 40; my $abstract = ""; my @stack; while (my $t = $p->get_token) { $_ = $t->as_is; if ($t->is_text) { s/\s+/ /g; s/^(.{1,$length_left}\S*).*/$1/s; $length_left -= length; } elsif($t->is_start_tag) { push @stack, $t->return_tag; } elsif($t->is_end_tag) { pop @stack if $stack[-1] eq $t->return_tag; } $abstract .= $_; last unless $length_left > 0; } $abstract .= join '', map "", reverse @stack; print $abstract, $/;