while ( my $token = $p->get_token ) { my $tkntext = $token->as_is; $tkntext =~ s/\s+/ /g; # normalize all whitespace if ($token->is_text) { if (length($tkntext) + $total <= 200) { $doc2 .= $tkntext; $total += length($tkntext); } else { my $maxlen = 200 - $total; $doc2 .= substr( $tkntext, 0, rindex( $tkntext, ' ', $maxlen ); last; # this finishes the while loop } } else { $doc2 .= " $tkntext "; } }