my $token; while( $token = $stream =~ /( <[^/]([^>]*[^/>])?> | ]*> | <[^>]*/> | (?: ... ) # unicode goop )/gx ) { print $token; }