#!/usr/bin/env perl use 5.010; use warnings; use strict; my $n; # line no while (my $x = ) { chomp $x; say $x; while ( $x =~ m/ ([[:punct:]]*) # $1: leading punct marks ( # $2: a "word" consisting of (?: [[:word:]']+ - )* # optional segments with # embedded {'}s ending with # single {-} [[:word:]]+ # and ending in pure word characters ) ([[:punct:]]* \ ? ) # $3: trailing punct marks ending # with space (except at end of # line?) /xxg ) { printf " %3s {%s|%s|%s}\n", ++$n, # make whitespace visible map {(my $y = $_ // '') =~ tr/ /_/; $y} $1, $2, $3; } } __DATA__ "'Uncouth' about sums it up." The word they will use is 'uncouth'. "It's the old story." It's a will-o'-the-wisp--a two-fer--and Bob's-yer-uncle at four o'clock. It's two o'clock--time for a nap. Remember 45's? What about (this)? [Editor's note: blah blah] and so on... A ... and B I said--"What's the expression?" #### "'Uncouth' about sums it up." 1 {"'|Uncouth|'_} 2 {|about|_} 3 {|sums|_} 4 {|it|_} 5 {|up|."} The word they will use is 'uncouth'. 6 {|The|_} 7 {|word|_} 8 {|they|_} 9 {|will|_} 10 {|use|_} 11 {|is|_} 12 {'|uncouth|'.} "It's the old story." 13 {"|It|'} <- should be {"|It's|_} 14 {|s|_} 15 {|the|_} 16 {|old|_} 17 {|story|."} It's a will-o'-the-wisp--a two-fer--and Bob's-yer-uncle at four o'clock. 18 {|It|'} <- same problem 19 {|s|_} 20 {|a|_} 21 {|will-o'-the-wisp|--} <- perfect! 22 {|a|_} 23 {|two-fer|--} 24 {|and|_} 25 {|Bob's-yer-uncle|_} 26 {|at|_} 27 {|four|_} 28 {|o|'} <- should be {|o'clock|.} 29 {|clock|.} It's two o'clock--time for a nap. 30 {|It|'} 31 {|s|_} 32 {|two|_} 33 {|o|'} <- should be {|o'clock|--} 34 {|clock|--} 35 {|time|_} 36 {|for|_} 37 {|a|_} 38 {|nap|.} Remember 45's? 39 {|Remember|_} 40 {|45|'} <- 41 {|s|?} What about (this)? 42 {|What|_} 43 {|about|_} 44 {(|this|)?} [Editor's note: blah blah] and so on... 45 {[|Editor|'} <- 46 {|s|_} 47 {|note|:_} 48 {|blah|_} 49 {|blah|]_} 50 {|and|_} 51 {|so|_} 52 {|on|...} A ... and B 53 {|A|_} <- correct to omit detached elipsis 54 {|and|_} 55 {|B|} I said--"What's the expression?" 56 {|I|_} 57 {|said|--"} <- should be {|said|--} 58 {|What|'} <- should be {"|What's|_} 59 {|s|_} 60 {|the|_} 61 {|expression|?"}