# Tokenise my @tokens = $doc =~ /<[^>]*>|./sg; # Extract text my @text_tokens = grep /^<|<[0-9]+>/, @tokens; # Do stuff with @text_tokens # ... # Print resulting text print join('', @text_tokens); #### # Tokenise my @tokens = $doc =~ /<[^>]*>|./sg; # Extract text my $text = ''; for (@tokens) { if (/^<([0-9]+)>/) { $text .= chr($1); } elsif (/^[^<]/) { $text .= $1; } } # Do stuff with $text # ... # Print resulting text print join('', $text);