Hi all - I'm trying to identify comments in code (so that ultimately I'll strip them out). I've tried using Regexp::Common::comment, but it appears to detect comments in strings when they should really be ignored. Am I using it wrong, or is it a limitation of the module?
use strict;
use Regexp::Common qw /comment/;
my @tests = (
{
language => 'PL/SQL',
description => 'PL/SQL Comment in String',
code => q{
declare
j varchar2(2);
begin
j := '--';
end;
}
},
{
language => 'PL/SQL',
description => 'PL/SQL no comment',
code => q{
declare
j varchar2(2);
begin
j := 'xx';
end;
}
},
{
language => 'SQL',
description => 'SQL Comment in String',
code => q{
select '--'
from dual;
}
},
{
language => 'SQL',
description => 'SQL no comment',
code => q{
select 'xx'
from dual;
}
},
{
language => 'Perl',
description => 'Perl Comment in String',
code => q{
my $j = '#';
}
},
{
language => 'Perl',
description => 'Perl no comment',
code => q{
my $j = 'xx';
}
}
);
foreach my $test (@tests) {
print $test->{description}."\n";
if ($RE{comment}{$test->{language}}->matches($test->{code})) {
print "\tcontains comment\n";
} else {
print "\tno comment\n";
}
}
which outputs:
PL/SQL Comment in String
contains comment
PL/SQL no comment
no comment
SQL Comment in String
contains comment
SQL no comment
no comment
Perl Comment in String
contains comment
Perl no comment
no comment