while ( my $token = $p->get_token ) { if ($token->is_text) { my $text = $token->as_is; $text =~ s/\s+/ /g; if (length($text) + $total <= 200) { $doc2 .= $text; $total += length($text); } else { for (split / /, $text) { if ($total + length($_) <= 200) { $doc2 .= $_ . ' '; $total += length($_) + 1; } else { last; } } chop($doc2) if $doc2 =~ /\s$/; last; } } else { $doc2 .= $token->as_is; } }