use warnings; use strict; my $str='XtestXYtest2'; my $BRK = qr(X|Y|^|$)i; my @pieces = $str =~ /($BRK+)(.+?(?=$BRK))/gsp; push @pieces,${^POSTMATCH} if ${^POSTMATCH}; print "INTENDED: These were the phrases (and breaks) extracted:\n",join("\n",@pieces),"\n-------\n"; $BRK = qr(^|X|Y|$)i; @pieces = $str =~ /($BRK+)(.+?(?=$BRK))/gsp; push @pieces,${^POSTMATCH} if ${^POSTMATCH}; print "WRONG: These were the phrases (and breaks) extracted:\n",join("\n",@pieces),"\n"; #### my $BRK = qr(^\s*|\s*<(?:/?(?:p|ul|ol|li)|br\s*/?)>\s*|\s*$)i; #### my $BRK = qr(\s*<(?:/?(?:p|ul|ol|li)|br\s*/?)>\s*|^\s*|\s*$)i; #### use warnings; use strict; use utf8; open my $logfh, ">:encoding(utf8)", q(Debug.log) or die "Cannot open LOG for writing: $!"; #If using RE below, $BRK+ will match start of string, but not (for example)

following my $BRK = qr(^\s*|\s*<(?:/?(?:p|ul|ol|li)|br\s*/?)>\s*|\s*$)i; #This one seems to do the right thing, but why the difference? #my $BRK = qr(\s*<(?:/?(?:p|ul|ol|li)|br\s*/?)>\s*|^\s*|\s*$)i; #my $str='


test

'; #Simple demo of the problem my $str='

test'; my @pieces = $str =~ /($BRK+)(\S.*?(?=$BRK))/gsp; push @pieces,${^POSTMATCH} if ${^POSTMATCH}; $logfh->print("These were the phrases (and breaks) extracted:\n",join("\n",@pieces),"\n");