SFLEX has asked for the wisdom of the Perl Monks concerning the following question:

Hi Monks, ^^

I've been making a code That works like the <code> #.. </code> on this site with a few visual additions like a Perl editer reads.

CSS code
.codepost { background-color: #ffffff; width: 500px; height: auto; white-space: nowrap; overflow: scroll; padding-left: 2px; padding-bottom: 5px; }
My HTML Escape code
sub html_escape { my $text = shift; return '' unless $text; #$text =~ s{\;}{#59;}gso; $text =~ s{&}{&amp;}gso; #$text =~ s{#59;}{&#59;}gso; $text =~ s{"}{&quot;}gso; $text =~ s{ }{ \&nbsp;}gso; $text =~ s{\*}{&#42;}gso; $text =~ s{<}{&lt;}gso; $text =~ s{>}{&gt;}gso; $text =~ s{'}{&#39;}gso; $text =~ s{\)}{&#41;}gso; $text =~ s{\(}{&#40;}gso; $text =~ s{\\}{&#92;}gso; # need this! $text =~ s{\t}{ \&nbsp; \&nbsp; \&nbsp;}gso; $text =~ s{\|}{\&#124;}gso; # going to keep this $text =~ s{\n}{<br>}gso; $text =~ s{\cM}{}gso; return $text; }
The main code
$message =~ s~<br>~=br=~isg; while ($message =~ s{\[code\]([\S\s].+?[\S\s])\[/code\]} { my $tmp = $1; $tmp =~ s!<!&#60;!g; $tmp =~ s!>!&#62;!g; # &#39; $tmp =~ s!:!&#58;!g; $tmp =~ s!\[!&#91;!g; $tmp =~ s!\\!&#92;!g; $tmp =~ s!\]!&#93;!g; $tmp =~ s!\)!&#41;!g; $tmp =~ s!\(!&#40;!g; $tmp =~ s!\|!&#124;!g; $tmp =~ s!([^\&])(\#.*?(=br=))!$1<font color=" +blue"><i>$2</i></font>!g; $tmp =~ s!(&#39;.*?(&#39;|=br=))!<font color=r +ed>$1</font>!g; $tmp =~ s!(&quot;.*?(&quot;|=br=))!<font color +=red>$1</font>!g; $tmp =~ s!(return)!<b>$1</b>!g; $tmp =~ s!(require)!<b>$1</b>!g; $tmp =~ s!(while)!<b>$1</b>!g; $tmp =~ s!(foreach)!<b>$1</b>!g; $tmp =~ s!(for)!<b>$1</b>!g; $tmp =~ s!(my)!<b>$1</b>!g; $tmp =~ s!(sub)!<b>$1</b>!g; $tmp =~ s!([^\w])(if)([^\w]*)!$1<b>$2</b>$3!g; $tmp =~ s!(unless)!<b>$1</b>!g; $tmp =~ s!(elsif)!<b>$1</b>!g; $tmp =~ s!(else)!<b>$1</b>!g; $tmp =~ s!(use)!<b>$1</b>!g; $tmp =~ s!(package)!<b>$1</b>!g; $tmp =~ s!&lt;!&#60;!g; $tmp =~ s!&gt;!&#62;!g; $tmp =~ s!&quot;!&#34;!g; $tmp =~ s!"!&#34;!g; #" $tmp =~ s!\s{1};!&#59;!g; #$tmp =~ s!&gt;br&gt;!\n!g; $tmp = "<br><div class=\"codepost\"><font colo +r=\"blue\"><i># $msg{code}</i></font> <br>" . $tmp . '<br><font color="blue"><i># Code En +d</i></font></div>'; }exisog) {} $message =~ s~=br=~<br>~isg;

with the above code it should display a code like this
sub html_escape
{
       my $text = shift;

       return '' unless $text;
       #$text =~ s{\;}{#59;}gso;
       $text =~ s{&}{&amp;}gso;
       #$text =~ s{#59;}{&#59;}gso;
       $text =~ s{"}{&quot;}gso;
       $text =~ s{  }{ \&nbsp;}gso;
       $text =~ s{\*}{&#42;}gso;
       $text =~ s{<}{&lt;}gso;
       $text =~ s{>}{&gt;}gso;
       $text =~ s{'}{&#39;}gso; # bug 1 (want it to be red for the hole line)
       $text =~ s{\)}{&#41;}gso;
       $text =~ s{\(}{&#40;}gso;
       $text =~ s{\\}{&#92;}gso; # need this!
       $text =~ s{\t}{ \&nbsp; \&nbsp; \&nbsp;}gso;
       $text =~ s{\|}{\&#124;}gso; # going to keep this
       $text =~ s{\n}{<br>}gso;
       $text =~ s{\cM}{}gso;

       return $text;
}

And for the other bug.

package filters;
# ---------------------
#  Untaint # bug2 should be blue
# ---------------------
sub untaint {
my $value   = shift || '';
my $pattern = shift || '\w\-\.\/';
return '' unless $value;
$value =~ m!^([$pattern]+)$!i
? return $1
: return;
}

For the first bug i guess i can deal with it becase i didnt want to do a fancy code to fix it
For the 2nt bug, well its not working the way i wanted it to.

Help!!!

Replies are listed 'Best First'.
Re: Regex - it works, but not the way i want it to =(
by lima1 (Curate) on Sep 17, 2007 at 13:19 UTC
Re: Regex - it works, but not the way i want it to =(
by Anonymous Monk on Sep 17, 2007 at 14:27 UTC
    As others have pointed out before, parsing Perl with a few simple regexes is nearly impossible.

    Another hilighter that you could use is PPI::HTML.

Re: Regex - it works, but not the way i want it to =(
by SFLEX (Chaplain) on Sep 17, 2007 at 16:35 UTC
    Thank you for the links ^^
    Im going to check out each one because those modules would probably do a better job then me or could show me what im missing.

    I fixed the 2nt bug and dont really feel the first bug is really a big issue to fix.(since i was modeling it after a highlighter i use)
    I added a few more highlights but the code overall is probably the most simplest of all highlighters =P

    Main code
    $message =~ s~<br>~=br=~isg; while ($message =~ s{\[code\]([\S\s].+?[\S\s])\[/code\]} { my $tmp = $1; $tmp =~ s!<!&#60;!g; $tmp =~ s!>!&#62;!g; $tmp =~ s!:!&#58;!g; $tmp =~ s!\[!&#91;!g; $tmp =~ s!\\!&#92;!g; $tmp =~ s!\]!&#93;!g; $tmp =~ s!\)!&#41;!g; $tmp =~ s!\(!&#40;!g; $tmp =~ s!\|!&#124;!g; $tmp =~ s!&lt;!&#60;!g; $tmp =~ s!&gt;!&#62;!g; $tmp =~ s!([^\&\$])(\#.*?(=br=))!$1<font color +="blue"><i>$2</i></font>!g; $tmp =~ s!(>)(\#.*?(=br=))!$1<font color="blue +"><i>$2</i></font>!g; # A fix for bug 2 $tmp =~ s!(&#39;.*?(&#39;|=br=))!<font color=r +ed>$1</font>!g; $tmp =~ s!(&quot;.*?(&quot;|=br=))!<font color +=red>$1</font>!g; $tmp =~ s!(return)([^\w])!<b>$1</b>$2!g; $tmp =~ s!(require)([^\w])!<b>$1</b>$2!g; $tmp =~ s!([^\#\d\w])(\d+)!$1<font color=green +>$2</font>!g; $tmp =~ s!(while)!<b>$1</b>!g; $tmp =~ s!(foreach)!<b>$1</b>!g; $tmp =~ s!([^\w])(for)([^\w])!$1<b>$2</b>$3!g; $tmp =~ s!([^\w])(my)([^\w])!$1<b>$2</b>$3!g; $tmp =~ s!([^\w])(sub)([^\w])!$1<b>$2</b>$3!g; $tmp =~ s!([^\w])(if)([^\w])!$1<b>$2</b>$3!g; $tmp =~ s!([^\w])(eq)([^\w])!$1<b>$2</b>$3!g; $tmp =~ s!([^\w])(ne)([^\w])!$1<b>$2</b>$3!g; $tmp =~ s!([^\w])(lt)([^\w])!$1<b>$2</b>$3!g; $tmp =~ s!([^\w])(gt)([^\w])!$1<b>$2</b>$3!g; $tmp =~ s!([^\w])(or)([^\w])!$1<b>$2</b>$3!g; $tmp =~ s!(next)!<b>$1</b>!g; $tmp =~ s!(last)!<b>$1</b>!g; $tmp =~ s!(unless)!<b>$1</b>!g; $tmp =~ s!(elsif)!<b>$1</b>!g; $tmp =~ s!(else)!<b>$1</b>!g; $tmp =~ s!([^\w])(use)([^\w])!$1<b>$2</b>$3!g; $tmp =~ s!(package)!<b>$1</b>!g; $tmp =~ s!&quot;!&#34;!g; $tmp =~ s!"!&#34;!g; #" $tmp =~ s!\s{1};!&#59;!g; #$tmp =~ s!&gt;br&gt;!\n!g; $tmp = "<br><font color=\"blue\"><i># $msg{cod +e}</i></font> <br><div class=\"codepost\"><code>" . $tmp . '</code><br><font color="blue"><i># +Code End</i></font></div>'; }exisog) {} $message =~ s~=br=~<br>~isg;


    And here is an output of what it shows
    #!/usr/bin/perl
    # This code is not ment to work or syntax ok
    # It is only ment to view the code highlighter
    # for, if, befor, returning, return
    print "Content-type: text/html\n\n";
    print "<html><h1>Hello!</h1></html>\n";
    push ( @INC, './lib' ) if $cfg{rr} gt 2;
    push ( @INC, './lib2' ) if $cfg{st} lt 1 or die('im dead');
    $14563634545345
    package filters;
    # ---------------------
    #  Untaint
    # ---------------------
    sub untaint {
    my $value   = shift || '';
    my $pattern = shift || '\w\-\.\/';
    return '' unless $value;
    $value =~ m!^([$pattern]+)$!i
    ? return $1
    : return;
    }
             elsif ($row[2]) { # a comment here
              push ( @INC, './lib/modules' );
              next unless ($row[3] ne -r "$row[3].pm") or die(' XP ');
              require "$row[3].pm";
              if ($row[4] eq $sub_action{$row[4]}) {
               delete $INC[$#INC];
               last;
              }
              $load = $row[3] . '::' . $row[4];
              delete $INC{"$row[3].pm"};
    sub html_escape
    {
          my $text = shift;
          return '' unless $text;
          #$text =~ s{\;}{#59;}gso; # SQL Safer
          $text =~ s{&}{&amp;}gso;
          $text =~ s{"}{&quot;}gso;
          $text =~ s{<}{&lt;}gso;
          $text =~ s{>}{&gt;}gso;
          $text =~ s{'}{&#39;}gso; # SQL Safer
          $text =~ s{\\}{&#92;}gso; # need this!
          $text =~ s{\t}{ \&nbsp; \&nbsp; \&nbsp;}gso;
          $text =~ s{\|}{\&#124;}gso; # going to keep this
          return $text;
    }