comment on

I'm afraid that it's hardly single regex but the following code works on nested /* /* */ */ and " ... */ ...", etc.

#!/usr/bin/perl

use Data::Dumper;
use Params::Validate qw(:types);
use strict;
use warnings;

use 5.10.0;

   local $/="\n\n";
   for my $in (<DATA>) {
      chomp($in);
      my ($out,$comment)=RemoveComments($in);
      say $out;
      # Now putting the comments back
      for my $pos (sort { $b <=> $a } keys %$comment) {
         substr($out,$pos,$comment->{$pos}{length})=$comment->{$pos}{b
+ody};
      };
      print $out."\n\n";
   };
   exit;

   sub RemoveComments { # Now handles nested /* */ and --
      @_=Params::Validate::validate_pos(@_,{ type=>SCALAR });
      my ($in)=@_;

      my (%comment,$comment_begins);
      my $stackptr=0;
      local *foo=sub {
         @_=Params::Validate::validate_pos(@_,{ type=>SCALAR }, { type
+=>SCALAR,default=>0 });
         my ($string,$forced)=@_;
         if ($forced || $stackptr > 0) {
            $comment{$comment_begins}{length}+=length($string);
            $comment{$comment_begins}{body}.=$string;
            $string=~ s{.}{ }mg;
         };
         return $string;
      }; # foo:;
      my $out='';
      my $pos=0;
      while ($in !~ m{\G$}cg) {
         if ($in =~ m{\G((?:/\*)+)}cg) { # /*
             $comment_begins=$pos
                if ($stackptr == 0);
             $stackptr+=length($1)/2;
             $out.=foo($1);
             $pos=pos($in);
         } elsif ($in =~ m{\G((?:\*/)+)}cg) { # */
             $out.=foo($1);
             $stackptr-=length($1)/2;
             $pos=pos($in);
             die "Too many closing '*/'! \$stackptr($stackptr) has gon
+e negative!\n"
                 if ($stackptr < 0);
         } elsif ($stackptr == 0 && $in =~ m{\G(--+.*$)}cgm) { # -- co
+mment not in a /* */ comment
             $comment_begins=$pos;
             $out.=foo($1,1);
             $pos=pos($in);
         } elsif ($stackptr > 0 && $in =~ m{\G(--+)}cgs) { # might be 
+a -- comment but it's in a /* */ comment
             $out.=foo($1);
             $pos=pos($in);
         } elsif ($in =~ m{\G('(?:[^']|'')*'|"(?:[^"]|"")*")}cgs) { #'
+# ' or " quoted string
             $out.=foo($1);
             $pos=pos($in);
         } elsif ($in =~ m{\G([^'"]+?(?=\*/|/\*|--|'|"|$))}cgs) { # up
+ to /*,*/,--,',",\z
             $out.=foo($1);
             $pos=pos($in);
         } else { # Everything should be caught in one of the cases be
+fore!
             warn "WTF!";
             my $pos=pos($in);
             my $residue=substr($in,$pos);
             die Data::Dumper->Dump([\$pos,\$residue],[qw(*pos *residu
+e)]);
         };
      };

      return $out,\%comment;
   }; # RemoveComments:

__DATA__
0/*3--6*/90123456789

01234/*789012*/56789
0/*3456*/90123456789
01--4567890123456789
01234/*789012*/56789

-- /*567890123456789
01234567890123456789
-- */567890123456789

012/*567890123456789
01234/*7890123456789
01234567890123456789
01234*/7890123456789
01*/4567890123456789

'123456/**/12345678'

0'234567--01234567'9

01234567890123456789
'123456/**/12345678'
01234567890123456789

-- /*
/* -- */ code
code
-- */

/* bah */
/*
   /*
   */
*/
yada
/*
x
*/
[download]

In reply to Re: Regex to strip comments by clueless newbie
in thread Regex to strip comments by zuma53

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.