in reply to Re^2: Multiline string and one line comments
in thread Multiline string and one line comments

Here's a regex-based approach. (I agree, however, that a parsing approach may be more appropriate.) It doesn't handle single-quoted strings, but should be easily extensible to cover such. I'm not sure it gives you exactly what you want, but I think it comes close. The critical (IMHO) regexes require Perl version 5.10+.

use warnings; use strict; use Test::More # tests => ?? + 1 # Test::NoWarnings adds 1 test 'no_plan' ; use Test::NoWarnings; use constant TEST1 => <<'EOT'; # this is a comment, should be matched. # "I am not a string" . 'because I am inside a comment' my $string = " #I am not a \comment, because I am \" quoted"; my $another_string = "I am a multiline string with # on each \t line #, have fun!"; EOT # print qq{[[${ \TEST1 }]] \n\n}; # FOR DEBUG use constant C1 => '# this is a comment, should be matched.'; use constant C2 => q{# "I am not a string" . 'because I am inside a co +mment'}; use constant S1 => q{" #I am not a \comment, because I am \" quoted"}; use constant S2 => q{"I am a multiline string with # on each \t line #, have fun!"}; # these regexes compatible with 5.8 (and prior? 5.0?) my $comment = qr{ [#] [^\n]* $ }xms; my $string = qr{ " [^"\\]* (?: \\. [^"\\]*)* " }xms; my $comment_or_string = qr{ $comment | $string }xms; # these regexes require 5.10+ my $comment_only = qr{ $comment | $string (*SKIP) (*FAIL) }xms; my $string_only = qr{ $string | $comment (*SKIP) (*FAIL) }xms; VECTOR: for my $ar_vector ( [ TEST1, $comment_or_string, C1, C2, S1, S2, ], [ TEST1, $comment_only, C1, C2, ], [ TEST1, $string_only, S1, S2, ], ) { if (not ref $ar_vector) { # must be a note... note $ar_vector; next VECTOR; } my ($text, $rx, @expected) = @$ar_vector; is_deeply [ $text =~ m{ $rx }xmsg ], \@expected, # qq{}, ; } # end for VECTOR