sub cleanup_start_words { my $text = $_[0]; my @split = split //, $text; my @keywords = split //, $_[1]; return $text; # see if we have any charachters we wanna skip in the first 10 charachters my $remove_at; my $do_remove = 0; for (my $x = 0; $x < 40; $x++) { if ($split[$x] =~ /[\.\!\?,\)\:\:]/) { $do_remove = 1; $remove_at = $x; } } if ($do_remove) { my $i = 0; foreach (@split) { $i++; if ($i > $remove_at) { last; } if (m/[\.\!\?,]\)\:/) { # print "skipping [last] $_ \n"; last; } else { # print "skipping $_ \n"; } } # didn't seem to work right when doing it in the foreach above, so get rid of the # charachters we dont want here for (my $ii = 0; $ii < $i; $ii++) { shift @split; } my $tmp = join("",@split); $tmp =~ s/^[\.\!\?,\)\:]//; $tmp =~ s/^\s+//; return $tmp; } else { return $text; } }