#!/usr/bin/perl -w

use strict;
use warnings;

my $SCRIPT = ReadFile(__FILE__);
my $SHRUNK = CompressPerl($SCRIPT);  # Compress script
CreateFile('output.pl', $SHRUNK);

my $ORIGINAL_SIZE = -s __FILE__;
my $SHRUNK_SIZE = length($SHRUNK);
my $DIFF = $ORIGINAL_SIZE - $SHRUNK_SIZE;

print $SHRUNK, "\n";
print '^' x 78;
print "\n\nFILE NAME   : ", __FILE__;
print "\nFILE SIZE   : $ORIGINAL_SIZE bytes";
print "\nSHRUNK SIZE : $SHRUNK_SIZE bytes";
if ($SHRUNK_SIZE < $ORIGINAL_SIZE)
{ print "\n\nYou could eliminate $DIFF bytes by compressing this script.\n"; }
exit;

##################################################
# Takes a perl script string as input and returns
# condensed perl code.
# Usage: STRING = CompactPerl(STRING)
#
sub CompressPerl
{
  defined $_[0] or return '';
  my @LINES = split(/\n/, shift);

  my $c;
  my $p;
  my $MODE;
  my $START;
  my $NOSPACE;
  my $NAME;
  my $DATA = 0;

  my @VARS;
  my @OUTPUT;

  my $FIRST_LINE = lc($LINES[0]);
  if (substr($FIRST_LINE, 0, 15) eq '#!/usr/bin/perl')
  {
    $OUTPUT[0] = shift(@LINES) . "\n";	# Skip shebang
  }

  foreach my $CODE (@LINES)	# Process line by line
  {
    $c = 0;
    $MODE = 0;
    $START = 0;
    $NOSPACE = '';

    ################################### PROCESS EOF MARKER
    if (ExtractFirstWord($CODE) eq '__END__') { last; }

    ################################### PROCESS DATA SECTION
    if ($DATA) { push(@OUTPUT, "\n" . RTRIM($CODE)); next; }

    ###############################################
    # Process lines character by character
    for (my $i = 0; $i <= length($CODE); $i++)
    {
      $p = $c;			# Store previous character

      # $NOSPACE is a look-back buffer that allows us to
      # check what came before the current character.
      # As the name suggests, $NOSPACE contains
      # no whitespace and no line breaks.

      if ($p > 32) { $NOSPACE .= chr($p); }

      $c = vec($CODE, $i, 8);	# Get current character

      ################################### PROCESS STRING QUOTES
      if ($c == 34 || $c == 39)		# 34 = "   39 = '
      {					# Catch beginning of quote
        if ($MODE == 0 && $p != 36)	# Ignore $'
        {
          $MODE = $c;
          push(@OUTPUT, Compress(substr($CODE, $START, $i - $START)));
          $START = $i;
          next;
        }				# Catch end of quote
        if ($MODE == $c && $p != 92)	# Ignore \'
        {
          push(@OUTPUT, substr($CODE, $START, $i - $START + 1));
          $START = $i + 1;
          $MODE = 0;
          next;
        }
      }
      if ($MODE == 34 || $MODE == 39)	# Ignore everything between quotes
      { next; }

      ################################### PROCESS COMMENTS
      if ($c == 35)			# 35 = #
      {
        if ($MODE == 0 && $p != 36)	# Ignore $#
        {
          push(@OUTPUT, Compress(substr($CODE, $START, $i - $START)));
          $MODE = -1;
          last;				# Ignore rest of the line
        }
      }

      ################################### PROCESS REGEX MATCH
      if ($p == 61 && $c == 126)	# Look for =~
      {
        if ($MODE == 0)	  		# We skip lines with regex
        {
          $MODE = 126;
          push(@OUTPUT, Compress(substr($CODE, $START, $i - $START)));
          push(@OUTPUT, substr($CODE, $i) . "\n");
          $CODE = '';
          $START = 0;
          last;
        }
      }

      ################################### COLLECT SUB AND VARIABLE NAMES
      if (isName($c) && $MODE == 0)
      {
        if ($p == 36 || $p == 64)
        {
          push(@OUTPUT, Compress(substr($CODE, $START, $i - $START)));
          $START = $i;
          $MODE = 85;			# Expect a variable name to follow
          next;
        }
        elsif (EndsWith($NOSPACE, 'sub'))
        {
          if ($p == 32 || $p == 9)	# Previous char was space
          {
            push(@OUTPUT, Compress(substr($CODE, $START, $i - $START)));
            $START = $i;
            $MODE = 85;			# Expect sub name to follow
            next;
          }
        }
      }
      if ($MODE == 85)			# Capture sub or variable name
      {
        if (!isName($c))
        {
          $NAME = Compress(substr($CODE, $START, $i - $START));
          push(@OUTPUT, $NAME);
          push(@VARS, $NAME);
          $START = $i;
          $MODE = 0;
          next;
        }
      }

      ################################### DETECT __DATA__ SECTION
      if ($c == 0)			# End of line?
      {
        if ($MODE == 0 && index(substr($CODE, $START), '__DATA__') >= 0)
        {
          push(@OUTPUT, Compress(substr($CODE, $START, $i - $START)) . "\n");
          push(@OUTPUT, substr($CODE, $i));
          $CODE = '';
          $DATA = 1;
          next;
        }
      }
    }

    # Store rest of the line.
    if ($MODE >= 0 && $START < length($CODE))
    {
      push(@OUTPUT, Compress(substr($CODE, $START)));
    }
  }
  $DATA = MergeOutput(@OUTPUT);

  # Remove short names
  foreach (@VARS)
  { length($_) > 6 or $_ = ''; }

  # Remove duplicate names from list of subs and variables.
  @VARS = RemoveDuplicates(sort @VARS);
#  print join("\n", @VARS);

  # Try to replace sub names...
  my $i = 0;
  my $NEW_NAME;
  for ($a = 0; $a < 26; $a++)
  {
    for ($b = 0; $b < 26; $b++)
    {
      $NEW_NAME = chr($a + 97) . chr($b + 65);
      $i < @VARS or return $DATA;
      $DATA = Replace($DATA, $VARS[$i++], $NEW_NAME);
    }
  }
}
##################################################
#                                      v2019.11.23
# Returns true if string A ends with string B,
# otherwise returns false. This is case sensitive!
# Usage: INTEGER = EndsWith(STRING_A, STRING_B)
#
sub EndsWith
{
  defined $_[0] or return 0;
  defined $_[1] or return 1;
  my $LA = length($_[0]);
  my $LB = length($_[1]);
  $LB or return 1;
  $LA >= $LB or return 0;
  $_[1] eq substr($_[0], $LA - $LB);
}
##################################################
#                                       v2019.12.8
# Same as the LTRIM$() function in QBASIC.
# Removes whitespace from the left side of
# string and returns a new string.
# Usage STRING = LTRIM(STRING)
#
sub LTRIM
{
  my $S = defined $_[0] ? $_[0] : '';
  $S =~ s/^[\s\r\n\0-\x1F]*//g;
  return $S;
}
##################################################
#                                       v2019.12.8
# Same as the RTRIM$() function in QBASIC.
# Removes whitespace from the right side of
# string and returns a new string.
# Usage STRING = RTRIM(STRING)
#
sub RTRIM
{
  my $S = defined $_[0] ? $_[0] : '';
  $S =~ s/[\s\r\n\0-\x1F]*$//g;
  return $S;
}
##################################################
#                                       v2019.8.25
# Removes whitespace from before and after STRING.
# Whitespace is here defined as any byte whose
# ASCII value is less than 33. That includes
# tabs, esc, null, vertical tab, new lines, etc.
# Usage: STRING = Trim(STRING)
#
sub Trim
{
  my $S = defined $_[0] ? $_[0] : '';
  $S =~ s/^[\s\r\n\0-\x1F]*|[\s\r\n\0-\x1F]*$//g;
  return $S;
}
##################################################
# This function compresses a portion of code that
# does not contain any quotes, regex, or comments.
# Usage: STRING = Compress(STRING)
#
sub Compress
{
  my $S = defined $_[0] ? $_[0] : '';
  $S =~ s/\s+/ /g;            # Reduce large gaps
  $S = TrimOperators(Trim($S));
  return $S;
}
##################################################
# This function trims whitespace from
# before and after a list of perl operators.
# Usage: STRING = TrimOperators(STRING)
#
sub TrimOperators
{
  my $S = defined $_[0] ? $_[0] : '';
  my $c;
  my $OP = '+-,.|&*/<>?:;!%^()[]{}=$@';
  for (my $i = 0; $i < length($OP); $i++)
  {
    $c = substr($OP, $i, 1);
    $S =~ s/\s*\Q$c\E\s*/$c/g;
  }
  return $S;
}
##################################################
#                                       v2019.12.8
# Returns the first word from a string which may
# start with whitespace or new line characters.
# Usage: STRING = ExtractFirstWord(STRING)
#
sub ExtractFirstWord
{
  defined $_[0] or return '';
  my ($i, $P, $L) = (-1, -1, length($_[0]));
  $L or return '';
  while (++$i < $L)
  {
    if (vec($_[0], $i, 8) > 32)
    { $P >= 0 or $P = $i; }
    else
    { $P < 0 or last; }
  }
  return substr($_[0], $P, $i - $P);
}
##################################################
#
# Merges two adjoining elements of an array with
# a space in between if the two are both letters.
# Merge with no space if...
# Usage: STRING = MergeOutput(ARRAY)
#
sub MergeOutput
{
  @_ or return '';
  my @A = @_;
  my $OUTPUT = ' ';	# Just a placeholder
  my $ENDS_WITH_NAME;
  my $STARTS_WITH_NAME;
  for (my $i = 0; $i < @A; $i++)
  {
    # Take last character of previous element
    $ENDS_WITH_NAME = isName(vec($OUTPUT, length($OUTPUT) - 1, 8));

    # Take the first character of the next element
    $STARTS_WITH_NAME = isName(vec($A[$i], 0, 8));

    # If a line ends with a letter and the next line starts with
    # the word 'or' then we need to insert a space between the two
    # in order to keep the code functional.
    if ($ENDS_WITH_NAME && $STARTS_WITH_NAME)
    { $OUTPUT .= ' '; }

    $OUTPUT .= $A[$i];
  }
  return substr($OUTPUT, 1);	# Remove initial space
}
##################################################
# Tests if the ASCII character code is of a name.
# A name may consist of letters and underscore.
# Usage: INTEGER = isName(ASCII_CODE)
#
sub isName
{
  defined $_[0] or return 0;
  $_[0] > 64    or return 0;
  $_[0] < 123   or return 0;
  return ($_[0] < 91 || $_[0] == 95 || $_[0] > 96) ? 1 : 0;
}
##################################################
#
# This function reads the entire contents of a file
# in binary mode and returns it as a string. If an
# errors occur, an empty string is returned silently.
# A second argument will move the file pointer before
# reading. And a third argument limits the number
# of bytes to read.
# Usage: STRING = ReadFile(FILENAME, [START, [LENGTH]])
#
sub ReadFile
{
  my $NAME = defined $_[0] ? $_[0] : '';
  $NAME =~ tr/\"\0*?|<>//d;	# Remove special characters
  -e $NAME or return '';
  -f $NAME or return '';
  my $SIZE = -s $NAME;
  $SIZE or return '';
  my $LEN = defined $_[2] ? $_[2] : $SIZE;
  $LEN > 0 or return '';
  local *FH;
  sysopen(FH, $NAME, 0) or return '';
  binmode FH;
  my $POS = defined $_[1] ? $_[1] : 0;
  $POS < $SIZE or return '';
  $POS < 1 or sysseek(FH, 0, $POS);	# Move file ptr
  my $DATA = '';
  sysread(FH, $DATA, $LEN);		# Read file
  close FH;
  return $DATA;
}
##################################################
#
# Creates and overwrites a file in binary mode.
# Returns 1 on success or 0 if something went wrong.
# Usage: INTEGER = CreateFile(FILE_NAME, CONTENT)
#
sub CreateFile
{
  defined $_[0] or return 0;
  my $F = $_[0];
  $F =~ tr#\"\0*?|<>##d;	# Remove special characters
  length($F) or return 0;
  local *FH;
  open(FH, ">$F") or return 0;
  binmode FH;
  if (defined $_[1] ? length($_[1]) : 0) { print FH $_[1]; }
  close FH or return 0;
  return 1;
}
##################################################
#                                       v2019.12.7
# This function scans string S and replaces the
# first N occurrences of string A with string B
# and returns a new string. If N is -1 then only
# the last instance is replaced.
# Usage: STRING = Replace(STRING_S, STRING_A, [STRING_B, [N]])
#
sub Replace
{
  # First, we make sure that required arguments are available
  # and any special scenarios are handled correctly.

  defined $_[0] or return '';     # Missing arguments?
  defined $_[1] or return $_[0];  # Missing arguments?
  my $B = defined $_[2] ? $_[2] : '';    # Replace to --> $B
  my $N = defined $_[3] ? $_[3] : 0x7FFFFFFF;   # Get $N
  my ($LA, $LB) = (length($_[1]), length($B));  # Get string lengths

  # The search string must not be an empty string, or we exit.
  # The string that we search for must not be longer than
  # the string in which we search.
  ($N && $LA && $LA <= length($_[0])) or return $_[0];
  my ($LAST, $F, $X) = (0, 0, $_[0]);

  if ($N > 0x7FFFFFFE)
  {
    # If N was not provided, then that means we have to
    # replace every instance, so we'll use regex...

    my $A = $_[1];
    $X =~ s/\Q$A\E/$B/g;
    return $X;
  }

  if ($N < 0)
  {
    # If we get here, we must not replace every
    # instance, and we must go from right to left.

    $F = length($X);
    while (($F = rindex($X, $_[1], $F)) >= 0)
    {
      substr($X, $F, $LA) = $B;
      ++$N or last;
    }
    return $X;
  }

  if ($LA == $LB)
  {
    # In this case, output string will be the
    # same length as the input string.
    # We must not replace every instance,
    # and we search from left to right.

    while (($F = index($X, $_[1], $F)) >= 0)
    {
      substr($X, $F, $LA) = $B;
      $F += $LB;
      --$N or last;
    }
    return $X;
  }

  # In this final scenario, the output string will
  # NOT be the same length as the input string.
  # We must not replace every instance,
  # and we search from left to right.

  # For performance reasons, we build a new string.
  $X = '';
  while (($F = index($_[0], $_[1], $F)) >= 0)
  {
    $X .= substr($_[0], $LAST, $F - $LAST);
    $X .= $B;
    $F += $LA;
    $LAST = $F;
    --$N or last;
  }
  return $X . substr($_[0], $LAST);
}
##################################################
sub RemoveDuplicates { my %seen; grep !$seen{$_}++, @_; }
##################################################

__DATA__
_
 A
  B
   C   	
    DATA SECTION
    |

_______________

__END__  # End of file

END OF FILE