# strip html tags
$text =~ s/<[^>]*>//g;
# strip special chars
$text =~ s/&[^;]*;//g;
# shove resulting words into an array
my @words = $text =~ /(\w+\'*\w+)/g;
####
#!/usr/bin/perl
use strict;
use warnings;
my $html = '';
while () {
$html .= $_;
}
my $begin = 0;
my $end = 0;
my @excerpts = ();
for (my $i=0;$i') {
$begin = $i + 1;
}
if ($begin && substr($html,$i,1) eq '<') {
$end = $i;
}
if ($begin && $end) {
push @excerpts, { begin => $begin, end => $end };
$begin = 0;
$end = 0;
}
}
# last snippet
if ($begin && !$end) {
push @excerpts, { begin => $begin, end => length($html) };
}
foreach my $excerpt (@excerpts) {
my $begin = $excerpt->{begin} || 0;
my $end = $excerpt->{end} || 0;
my $length = $end - $begin;
my $word_string = substr($html,$begin,$length);
...still working on search and replaces for $word_string...
}