##
my $BRK = qr(^\s*|\s*<(?:/?(?:p|ul|ol|li)|br\s*/?)>\s*|\s*$)i;
####
my $BRK = qr(\s*<(?:/?(?:p|ul|ol|li)|br\s*/?)>\s*|^\s*|\s*$)i;
####
use warnings;
use strict;
use utf8;
open my $logfh, ">:encoding(utf8)", q(Debug.log) or die "Cannot open LOG for writing: $!";
#If using RE below, $BRK+ will match start of string, but not (for example) following
my $BRK = qr(^\s*|\s*<(?:/?(?:p|ul|ol|li)|br\s*/?)>\s*|\s*$)i;
#This one seems to do the right thing, but why the difference?
#my $BRK = qr(\s*<(?:/?(?:p|ul|ol|li)|br\s*/?)>\s*|^\s*|\s*$)i;
#my $str='
test
test2 with bold italic
';
#Simple demo of the problem
my $str='test';
my @pieces = $str =~ /($BRK+)(\S.*?(?=$BRK))/gsp;
push @pieces,${^POSTMATCH} if ${^POSTMATCH};
$logfh->print("These were the phrases (and breaks) extracted:\n",join("\n",@pieces),"\n");