#!/usr/bin/perl -wl ### this code assumes that there are no characters ### in the "ignore_last" class that AREN'T in the ### "word" class -- it might seem silly that there ### would be, but still, that's how I'm coding this use strict; my $text_stream = q{foo#&#bar}; my $ig_first = '#'; my $ig_last = ''; my $word = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz#'; my $pre = length($ig_first) ? qr/[\Q$ig_first\E]*/ : ''; my $post = length($ig_last) ? qr/[\Q$ig_last\E]*/ : ''; my $inside = length($ig_last) ? qr/[\Q$ig_last\E]+/ : ''; my ($match, @words); { # remove chars from $word that are matched by $post my $reg = $word; $reg =~ s/$post+//g if $post; $reg = qr/[\Q$reg\E]/; # unroll the loop: $match = qr{ ($pre) # pre chars (save to $1) ( # (save to $2) $reg+ # one or more regular chars | # OR $reg* # zero or more regular chars (?: $inside # one or more post chars $reg+ # one or more non-post chars )+ # this chunk one or more times ) }x; # /x for extended mode } $text_stream =~ s[$match]{ push @words, $2; "$1$2" }eg; print $text_stream; print "words: @words";