Okay, I rewrote the whole thing from scratch. This one works much better because it also replaces long variable and sub names with short ones...
Now my disclaimer is that I didn't spend too much time working on this, so it CAN and WILL break your script if it contains here documents and some other things. The resulting code works usually but not always. :P If you run this script, it creates a script called output.pl , and if you run that script, it will overwrite itself. But it runs without errors.
And it is reducing perl code from 13,997 bytes to 4,750 bytes! That's less than half of the original. And we're not even using gzip here.
#!/usr/bin/perl -w
use strict;
use warnings;
my $SCRIPT = ReadFile(__FILE__);
my $SHRUNK = CompressPerl($SCRIPT); # Compress script
CreateFile('output.pl', $SHRUNK);
my $ORIGINAL_SIZE = -s __FILE__;
my $SHRUNK_SIZE = length($SHRUNK);
my $DIFF = $ORIGINAL_SIZE - $SHRUNK_SIZE;
print $SHRUNK, "\n";
print '^' x 78;
print "\n\nFILE NAME : ", __FILE__;
print "\nFILE SIZE : $ORIGINAL_SIZE bytes";
print "\nSHRUNK SIZE : $SHRUNK_SIZE bytes";
if ($SHRUNK_SIZE < $ORIGINAL_SIZE)
{ print "\n\nYou could eliminate $DIFF bytes by compressing this scrip
+t.\n"; }
exit;
##################################################
# Takes a perl script string as input and returns
# condensed perl code.
# Usage: STRING = CompactPerl(STRING)
#
sub CompressPerl
{
defined $_[0] or return '';
my @LINES = split(/\n/, shift);
my $c;
my $p;
my $MODE;
my $START;
my $NOSPACE;
my $NAME;
my $DATA = 0;
my @VARS;
my @OUTPUT;
my $FIRST_LINE = lc($LINES[0]);
if (substr($FIRST_LINE, 0, 15) eq '#!/usr/bin/perl')
{
$OUTPUT[0] = shift(@LINES) . "\n"; # Skip shebang
}
foreach my $CODE (@LINES) # Process line by line
{
$c = 0;
$MODE = 0;
$START = 0;
$NOSPACE = '';
################################### PROCESS EOF MARKER
if (ExtractFirstWord($CODE) eq '__END__') { last; }
################################### PROCESS DATA SECTION
if ($DATA) { push(@OUTPUT, "\n" . RTRIM($CODE)); next; }
###############################################
# Process lines character by character
for (my $i = 0; $i <= length($CODE); $i++)
{
$p = $c; # Store previous character
# $NOSPACE is a look-back buffer that allows us to
# check what came before the current character.
# As the name suggests, $NOSPACE contains
# no whitespace and no line breaks.
if ($p > 32) { $NOSPACE .= chr($p); }
$c = vec($CODE, $i, 8); # Get current character
################################### PROCESS STRING QUOTES
if ($c == 34 || $c == 39) # 34 = " 39 = '
{ # Catch beginning of quote
if ($MODE == 0 && $p != 36) # Ignore $'
{
$MODE = $c;
push(@OUTPUT, Compress(substr($CODE, $START, $i - $START)));
$START = $i;
next;
} # Catch end of quote
if ($MODE == $c && $p != 92) # Ignore \'
{
push(@OUTPUT, substr($CODE, $START, $i - $START + 1));
$START = $i + 1;
$MODE = 0;
next;
}
}
if ($MODE == 34 || $MODE == 39) # Ignore everything between q
+uotes
{ next; }
################################### PROCESS COMMENTS
if ($c == 35) # 35 = #
{
if ($MODE == 0 && $p != 36) # Ignore $#
{
push(@OUTPUT, Compress(substr($CODE, $START, $i - $START)));
$MODE = -1;
last; # Ignore rest of the line
}
}
################################### PROCESS REGEX MATCH
if ($p == 61 && $c == 126) # Look for =~
{
if ($MODE == 0) # We skip lines with regex
{
$MODE = 126;
push(@OUTPUT, Compress(substr($CODE, $START, $i - $START)));
push(@OUTPUT, substr($CODE, $i) . "\n");
$CODE = '';
$START = 0;
last;
}
}
################################### COLLECT SUB AND VARIABLE NAM
+ES
if (isName($c) && $MODE == 0)
{
if ($p == 36 || $p == 64)
{
push(@OUTPUT, Compress(substr($CODE, $START, $i - $START)));
$START = $i;
$MODE = 85; # Expect a variable name to follow
next;
}
elsif (EndsWith($NOSPACE, 'sub'))
{
if ($p == 32 || $p == 9) # Previous char was space
{
push(@OUTPUT, Compress(substr($CODE, $START, $i - $START))
+);
$START = $i;
$MODE = 85; # Expect sub name to follow
next;
}
}
}
if ($MODE == 85) # Capture sub or variable name
{
if (!isName($c))
{
$NAME = Compress(substr($CODE, $START, $i - $START));
push(@OUTPUT, $NAME);
push(@VARS, $NAME);
$START = $i;
$MODE = 0;
next;
}
}
################################### DETECT __DATA__ SECTION
if ($c == 0) # End of line?
{
if ($MODE == 0 && index(substr($CODE, $START), '__DATA__') >=
+0)
{
push(@OUTPUT, Compress(substr($CODE, $START, $i - $START)) .
+ "\n");
push(@OUTPUT, substr($CODE, $i));
$CODE = '';
$DATA = 1;
next;
}
}
}
# Store rest of the line.
if ($MODE >= 0 && $START < length($CODE))
{
push(@OUTPUT, Compress(substr($CODE, $START)));
}
}
$DATA = MergeOutput(@OUTPUT);
# Remove short names
foreach (@VARS)
{ length($_) > 6 or $_ = ''; }
# Remove duplicate names from list of subs and variables.
@VARS = RemoveDuplicates(sort @VARS);
# print join("\n", @VARS);
# Try to replace sub names...
my $i = 0;
my $NEW_NAME;
for ($a = 0; $a < 26; $a++)
{
for ($b = 0; $b < 26; $b++)
{
$NEW_NAME = chr($a + 97) . chr($b + 65);
$i < @VARS or return $DATA;
$DATA = Replace($DATA, $VARS[$i++], $NEW_NAME);
}
}
}
##################################################
# v2019.11.23
# Returns true if string A ends with string B,
# otherwise returns false. This is case sensitive!
# Usage: INTEGER = EndsWith(STRING_A, STRING_B)
#
sub EndsWith
{
defined $_[0] or return 0;
defined $_[1] or return 1;
my $LA = length($_[0]);
my $LB = length($_[1]);
$LB or return 1;
$LA >= $LB or return 0;
$_[1] eq substr($_[0], $LA - $LB);
}
##################################################
# v2019.12.8
# Same as the LTRIM$() function in QBASIC.
# Removes whitespace from the left side of
# string and returns a new string.
# Usage STRING = LTRIM(STRING)
#
sub LTRIM
{
my $S = defined $_[0] ? $_[0] : '';
$S =~ s/^[\s\r\n\0-\x1F]*//g;
return $S;
}
##################################################
# v2019.12.8
# Same as the RTRIM$() function in QBASIC.
# Removes whitespace from the right side of
# string and returns a new string.
# Usage STRING = RTRIM(STRING)
#
sub RTRIM
{
my $S = defined $_[0] ? $_[0] : '';
$S =~ s/[\s\r\n\0-\x1F]*$//g;
return $S;
}
##################################################
# v2019.8.25
# Removes whitespace from before and after STRING.
# Whitespace is here defined as any byte whose
# ASCII value is less than 33. That includes
# tabs, esc, null, vertical tab, new lines, etc.
# Usage: STRING = Trim(STRING)
#
sub Trim
{
my $S = defined $_[0] ? $_[0] : '';
$S =~ s/^[\s\r\n\0-\x1F]*|[\s\r\n\0-\x1F]*$//g;
return $S;
}
##################################################
# This function compresses a portion of code that
# does not contain any quotes, regex, or comments.
# Usage: STRING = Compress(STRING)
#
sub Compress
{
my $S = defined $_[0] ? $_[0] : '';
$S =~ s/\s+/ /g; # Reduce large gaps
$S = TrimOperators(Trim($S));
return $S;
}
##################################################
# This function trims whitespace from
# before and after a list of perl operators.
# Usage: STRING = TrimOperators(STRING)
#
sub TrimOperators
{
my $S = defined $_[0] ? $_[0] : '';
my $c;
my $OP = '+-,.|&*/<>?:;!%^()[]{}=$@';
for (my $i = 0; $i < length($OP); $i++)
{
$c = substr($OP, $i, 1);
$S =~ s/\s*\Q$c\E\s*/$c/g;
}
return $S;
}
##################################################
# v2019.12.8
# Returns the first word from a string which may
# start with whitespace or new line characters.
# Usage: STRING = ExtractFirstWord(STRING)
#
sub ExtractFirstWord
{
defined $_[0] or return '';
my ($i, $P, $L) = (-1, -1, length($_[0]));
$L or return '';
while (++$i < $L)
{
if (vec($_[0], $i, 8) > 32)
{ $P >= 0 or $P = $i; }
else
{ $P < 0 or last; }
}
return substr($_[0], $P, $i - $P);
}
##################################################
#
# Merges two adjoining elements of an array with
# a space in between if the two are both letters.
# Merge with no space if...
# Usage: STRING = MergeOutput(ARRAY)
#
sub MergeOutput
{
@_ or return '';
my @A = @_;
my $OUTPUT = ' '; # Just a placeholder
my $ENDS_WITH_NAME;
my $STARTS_WITH_NAME;
for (my $i = 0; $i < @A; $i++)
{
# Take last character of previous element
$ENDS_WITH_NAME = isName(vec($OUTPUT, length($OUTPUT) - 1, 8));
# Take the first character of the next element
$STARTS_WITH_NAME = isName(vec($A[$i], 0, 8));
# If a line ends with a letter and the next line starts with
# the word 'or' then we need to insert a space between the two
# in order to keep the code functional.
if ($ENDS_WITH_NAME && $STARTS_WITH_NAME)
{ $OUTPUT .= ' '; }
$OUTPUT .= $A[$i];
}
return substr($OUTPUT, 1); # Remove initial space
}
##################################################
# Tests if the ASCII character code is of a name.
# A name may consist of letters and underscore.
# Usage: INTEGER = isName(ASCII_CODE)
#
sub isName
{
defined $_[0] or return 0;
$_[0] > 64 or return 0;
$_[0] < 123 or return 0;
return ($_[0] < 91 || $_[0] == 95 || $_[0] > 96) ? 1 : 0;
}
##################################################
#
# This function reads the entire contents of a file
# in binary mode and returns it as a string. If an
# errors occur, an empty string is returned silently.
# A second argument will move the file pointer before
# reading. And a third argument limits the number
# of bytes to read.
# Usage: STRING = ReadFile(FILENAME, [START, [LENGTH]])
#
sub ReadFile
{
my $NAME = defined $_[0] ? $_[0] : '';
$NAME =~ tr/\"\0*?|<>//d; # Remove special characters
-e $NAME or return '';
-f $NAME or return '';
my $SIZE = -s $NAME;
$SIZE or return '';
my $LEN = defined $_[2] ? $_[2] : $SIZE;
$LEN > 0 or return '';
local *FH;
sysopen(FH, $NAME, 0) or return '';
binmode FH;
my $POS = defined $_[1] ? $_[1] : 0;
$POS < $SIZE or return '';
$POS < 1 or sysseek(FH, 0, $POS); # Move file ptr
my $DATA = '';
sysread(FH, $DATA, $LEN); # Read file
close FH;
return $DATA;
}
##################################################
#
# Creates and overwrites a file in binary mode.
# Returns 1 on success or 0 if something went wrong.
# Usage: INTEGER = CreateFile(FILE_NAME, CONTENT)
#
sub CreateFile
{
defined $_[0] or return 0;
my $F = $_[0];
$F =~ tr#\"\0*?|<>##d; # Remove special characters
length($F) or return 0;
local *FH;
open(FH, ">$F") or return 0;
binmode FH;
if (defined $_[1] ? length($_[1]) : 0) { print FH $_[1]; }
close FH or return 0;
return 1;
}
##################################################
# v2019.12.7
# This function scans string S and replaces the
# first N occurrences of string A with string B
# and returns a new string. If N is -1 then only
# the last instance is replaced.
# Usage: STRING = Replace(STRING_S, STRING_A, [STRING_B, [N]])
#
sub Replace
{
# First, we make sure that required arguments are available
# and any special scenarios are handled correctly.
defined $_[0] or return ''; # Missing arguments?
defined $_[1] or return $_[0]; # Missing arguments?
my $B = defined $_[2] ? $_[2] : ''; # Replace to --> $B
my $N = defined $_[3] ? $_[3] : 0x7FFFFFFF; # Get $N
my ($LA, $LB) = (length($_[1]), length($B)); # Get string lengths
# The search string must not be an empty string, or we exit.
# The string that we search for must not be longer than
# the string in which we search.
($N && $LA && $LA <= length($_[0])) or return $_[0];
my ($LAST, $F, $X) = (0, 0, $_[0]);
if ($N > 0x7FFFFFFE)
{
# If N was not provided, then that means we have to
# replace every instance, so we'll use regex...
my $A = $_[1];
$X =~ s/\Q$A\E/$B/g;
return $X;
}
if ($N < 0)
{
# If we get here, we must not replace every
# instance, and we must go from right to left.
$F = length($X);
while (($F = rindex($X, $_[1], $F)) >= 0)
{
substr($X, $F, $LA) = $B;
++$N or last;
}
return $X;
}
if ($LA == $LB)
{
# In this case, output string will be the
# same length as the input string.
# We must not replace every instance,
# and we search from left to right.
while (($F = index($X, $_[1], $F)) >= 0)
{
substr($X, $F, $LA) = $B;
$F += $LB;
--$N or last;
}
return $X;
}
# In this final scenario, the output string will
# NOT be the same length as the input string.
# We must not replace every instance,
# and we search from left to right.
# For performance reasons, we build a new string.
$X = '';
while (($F = index($_[0], $_[1], $F)) >= 0)
{
$X .= substr($_[0], $LAST, $F - $LAST);
$X .= $B;
$F += $LA;
$LAST = $F;
--$N or last;
}
return $X . substr($_[0], $LAST);
}
##################################################
sub RemoveDuplicates { my %seen; grep !$seen{$_}++, @_; }
##################################################
__DATA__
_
A
B
C
DATA SECTION
|
_______________
__END__ # End of file
END OF FILE
| [reply] [d/l] |