use strict; use warnings; use Parse::RecDescent; $::RD_ERRORS = 1; $::RD_WARN = 1; $::RD_HINT = 1; my $grammar = <<'__EOI__'; { use strict; use warnings; } tokenize : token(s?) /\Z/ { $item[1] } token : ident | binops | lbinops | number | integer | /./ { [ skip => $items[1] ] } ident : /[a-zA-Z][a-zA-Z0-9_]*/ { [ @items ] } binops : /\*\*|[+\-\/*%]/ { [ @items ] } lbinops : />=?|<=?|!|&&|\|\||==/ { [ @items ] } number : /-?[0-9]*\.[0-9]+/ { [ @items ] } integer : /-?[0-9]/ { [ @items ] } __EOI__ ... my @tokens = $parser->tokenize($file);