I'm afraid that it's hardly single regex but the following code works on nested /* /* */ */ and " ... */ ...", etc.

#!/usr/bin/perl use Data::Dumper; use Params::Validate qw(:types); use strict; use warnings; use 5.10.0; local $/="\n\n"; for my $in (<DATA>) { chomp($in); my ($out,$comment)=RemoveComments($in); say $out; # Now putting the comments back for my $pos (sort { $b <=> $a } keys %$comment) { substr($out,$pos,$comment->{$pos}{length})=$comment->{$pos}{b +ody}; }; print $out."\n\n"; }; exit; sub RemoveComments { # Now handles nested /* */ and -- @_=Params::Validate::validate_pos(@_,{ type=>SCALAR }); my ($in)=@_; my (%comment,$comment_begins); my $stackptr=0; local *foo=sub { @_=Params::Validate::validate_pos(@_,{ type=>SCALAR }, { type +=>SCALAR,default=>0 }); my ($string,$forced)=@_; if ($forced || $stackptr > 0) { $comment{$comment_begins}{length}+=length($string); $comment{$comment_begins}{body}.=$string; $string=~ s{.}{ }mg; }; return $string; }; # foo:; my $out=''; my $pos=0; while ($in !~ m{\G$}cg) { if ($in =~ m{\G((?:/\*)+)}cg) { # /* $comment_begins=$pos if ($stackptr == 0); $stackptr+=length($1)/2; $out.=foo($1); $pos=pos($in); } elsif ($in =~ m{\G((?:\*/)+)}cg) { # */ $out.=foo($1); $stackptr-=length($1)/2; $pos=pos($in); die "Too many closing '*/'! \$stackptr($stackptr) has gon +e negative!\n" if ($stackptr < 0); } elsif ($stackptr == 0 && $in =~ m{\G(--+.*$)}cgm) { # -- co +mment not in a /* */ comment $comment_begins=$pos; $out.=foo($1,1); $pos=pos($in); } elsif ($stackptr > 0 && $in =~ m{\G(--+)}cgs) { # might be +a -- comment but it's in a /* */ comment $out.=foo($1); $pos=pos($in); } elsif ($in =~ m{\G('(?:[^']|'')*'|"(?:[^"]|"")*")}cgs) { #' +# ' or " quoted string $out.=foo($1); $pos=pos($in); } elsif ($in =~ m{\G([^'"]+?(?=\*/|/\*|--|'|"|$))}cgs) { # up + to /*,*/,--,',",\z $out.=foo($1); $pos=pos($in); } else { # Everything should be caught in one of the cases be +fore! warn "WTF!"; my $pos=pos($in); my $residue=substr($in,$pos); die Data::Dumper->Dump([\$pos,\$residue],[qw(*pos *residu +e)]); }; }; return $out,\%comment; }; # RemoveComments: __DATA__ 0/*3--6*/90123456789 01234/*789012*/56789 0/*3456*/90123456789 01--4567890123456789 01234/*789012*/56789 -- /*567890123456789 01234567890123456789 -- */567890123456789 012/*567890123456789 01234/*7890123456789 01234567890123456789 01234*/7890123456789 01*/4567890123456789 '123456/**/12345678' 0'234567--01234567'9 01234567890123456789 '123456/**/12345678' 01234567890123456789 -- /* /* -- */ code code -- */ /* bah */ /* /* */ */ yada /* x */

In reply to Re: Regex to strip comments by clueless newbie
in thread Regex to strip comments by zuma53

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.