my $input = "kw1,kw2; kw3 — kw4‐kw5"; # separator is any string consisting of comma, semicolon, # —, – or ‐, bounded by 0 or more whitespace: my $sep = qr{ \s* (?: , | ; | \&(?:[mn]dash|hyphen); ) \s* }x; # in the map block, add keyword tags to non-separator items my @out = map { /$sep/ ? $_ : "$_" } split /($sep)/, $input; print join "\n",@out,"";