#!/usr/bin/perl -w use strict; use warnings; my $SCRIPT = ReadFile(__FILE__); my $SHRUNK = CompressPerl($SCRIPT); # Compress script CreateFile('output.pl', $SHRUNK); my $ORIGINAL_SIZE = -s __FILE__; my $SHRUNK_SIZE = length($SHRUNK); my $DIFF = $ORIGINAL_SIZE - $SHRUNK_SIZE; print $SHRUNK, "\n"; print '^' x 78; print "\n\nFILE NAME : ", __FILE__; print "\nFILE SIZE : $ORIGINAL_SIZE bytes"; print "\nSHRUNK SIZE : $SHRUNK_SIZE bytes"; if ($SHRUNK_SIZE < $ORIGINAL_SIZE) { print "\n\nYou could eliminate $DIFF bytes by compressing this script.\n"; } exit; ################################################## # Takes a perl script string as input and returns # condensed perl code. # Usage: STRING = CompactPerl(STRING) # sub CompressPerl { defined $_[0] or return ''; my @LINES = split(/\n/, shift); my $c; my $p; my $MODE; my $START; my $NOSPACE; my $NAME; my $DATA = 0; my @VARS; my @OUTPUT; my $FIRST_LINE = lc($LINES[0]); if (substr($FIRST_LINE, 0, 15) eq '#!/usr/bin/perl') { $OUTPUT[0] = shift(@LINES) . "\n"; # Skip shebang } foreach my $CODE (@LINES) # Process line by line { $c = 0; $MODE = 0; $START = 0; $NOSPACE = ''; ################################### PROCESS EOF MARKER if (ExtractFirstWord($CODE) eq '__END__') { last; } ################################### PROCESS DATA SECTION if ($DATA) { push(@OUTPUT, "\n" . RTRIM($CODE)); next; } ############################################### # Process lines character by character for (my $i = 0; $i <= length($CODE); $i++) { $p = $c; # Store previous character # $NOSPACE is a look-back buffer that allows us to # check what came before the current character. # As the name suggests, $NOSPACE contains # no whitespace and no line breaks. if ($p > 32) { $NOSPACE .= chr($p); } $c = vec($CODE, $i, 8); # Get current character ################################### PROCESS STRING QUOTES if ($c == 34 || $c == 39) # 34 = " 39 = ' { # Catch beginning of quote if ($MODE == 0 && $p != 36) # Ignore $' { $MODE = $c; push(@OUTPUT, Compress(substr($CODE, $START, $i - $START))); $START = $i; next; } # Catch end of quote if ($MODE == $c && $p != 92) # Ignore \' { push(@OUTPUT, substr($CODE, $START, $i - $START + 1)); $START = $i + 1; $MODE = 0; next; } } if ($MODE == 34 || $MODE == 39) # Ignore everything between quotes { next; } ################################### PROCESS COMMENTS if ($c == 35) # 35 = # { if ($MODE == 0 && $p != 36) # Ignore $# { push(@OUTPUT, Compress(substr($CODE, $START, $i - $START))); $MODE = -1; last; # Ignore rest of the line } } ################################### PROCESS REGEX MATCH if ($p == 61 && $c == 126) # Look for =~ { if ($MODE == 0) # We skip lines with regex { $MODE = 126; push(@OUTPUT, Compress(substr($CODE, $START, $i - $START))); push(@OUTPUT, substr($CODE, $i) . "\n"); $CODE = ''; $START = 0; last; } } ################################### COLLECT SUB AND VARIABLE NAMES if (isName($c) && $MODE == 0) { if ($p == 36 || $p == 64) { push(@OUTPUT, Compress(substr($CODE, $START, $i - $START))); $START = $i; $MODE = 85; # Expect a variable name to follow next; } elsif (EndsWith($NOSPACE, 'sub')) { if ($p == 32 || $p == 9) # Previous char was space { push(@OUTPUT, Compress(substr($CODE, $START, $i - $START))); $START = $i; $MODE = 85; # Expect sub name to follow next; } } } if ($MODE == 85) # Capture sub or variable name { if (!isName($c)) { $NAME = Compress(substr($CODE, $START, $i - $START)); push(@OUTPUT, $NAME); push(@VARS, $NAME); $START = $i; $MODE = 0; next; } } ################################### DETECT __DATA__ SECTION if ($c == 0) # End of line? { if ($MODE == 0 && index(substr($CODE, $START), '__DATA__') >= 0) { push(@OUTPUT, Compress(substr($CODE, $START, $i - $START)) . "\n"); push(@OUTPUT, substr($CODE, $i)); $CODE = ''; $DATA = 1; next; } } } # Store rest of the line. if ($MODE >= 0 && $START < length($CODE)) { push(@OUTPUT, Compress(substr($CODE, $START))); } } $DATA = MergeOutput(@OUTPUT); # Remove short names foreach (@VARS) { length($_) > 6 or $_ = ''; } # Remove duplicate names from list of subs and variables. @VARS = RemoveDuplicates(sort @VARS); # print join("\n", @VARS); # Try to replace sub names... my $i = 0; my $NEW_NAME; for ($a = 0; $a < 26; $a++) { for ($b = 0; $b < 26; $b++) { $NEW_NAME = chr($a + 97) . chr($b + 65); $i < @VARS or return $DATA; $DATA = Replace($DATA, $VARS[$i++], $NEW_NAME); } } } ################################################## # v2019.11.23 # Returns true if string A ends with string B, # otherwise returns false. This is case sensitive! # Usage: INTEGER = EndsWith(STRING_A, STRING_B) # sub EndsWith { defined $_[0] or return 0; defined $_[1] or return 1; my $LA = length($_[0]); my $LB = length($_[1]); $LB or return 1; $LA >= $LB or return 0; $_[1] eq substr($_[0], $LA - $LB); } ################################################## # v2019.12.8 # Same as the LTRIM$() function in QBASIC. # Removes whitespace from the left side of # string and returns a new string. # Usage STRING = LTRIM(STRING) # sub LTRIM { my $S = defined $_[0] ? $_[0] : ''; $S =~ s/^[\s\r\n\0-\x1F]*//g; return $S; } ################################################## # v2019.12.8 # Same as the RTRIM$() function in QBASIC. # Removes whitespace from the right side of # string and returns a new string. # Usage STRING = RTRIM(STRING) # sub RTRIM { my $S = defined $_[0] ? $_[0] : ''; $S =~ s/[\s\r\n\0-\x1F]*$//g; return $S; } ################################################## # v2019.8.25 # Removes whitespace from before and after STRING. # Whitespace is here defined as any byte whose # ASCII value is less than 33. That includes # tabs, esc, null, vertical tab, new lines, etc. # Usage: STRING = Trim(STRING) # sub Trim { my $S = defined $_[0] ? $_[0] : ''; $S =~ s/^[\s\r\n\0-\x1F]*|[\s\r\n\0-\x1F]*$//g; return $S; } ################################################## # This function compresses a portion of code that # does not contain any quotes, regex, or comments. # Usage: STRING = Compress(STRING) # sub Compress { my $S = defined $_[0] ? $_[0] : ''; $S =~ s/\s+/ /g; # Reduce large gaps $S = TrimOperators(Trim($S)); return $S; } ################################################## # This function trims whitespace from # before and after a list of perl operators. # Usage: STRING = TrimOperators(STRING) # sub TrimOperators { my $S = defined $_[0] ? $_[0] : ''; my $c; my $OP = '+-,.|&*/<>?:;!%^()[]{}=$@'; for (my $i = 0; $i < length($OP); $i++) { $c = substr($OP, $i, 1); $S =~ s/\s*\Q$c\E\s*/$c/g; } return $S; } ################################################## # v2019.12.8 # Returns the first word from a string which may # start with whitespace or new line characters. # Usage: STRING = ExtractFirstWord(STRING) # sub ExtractFirstWord { defined $_[0] or return ''; my ($i, $P, $L) = (-1, -1, length($_[0])); $L or return ''; while (++$i < $L) { if (vec($_[0], $i, 8) > 32) { $P >= 0 or $P = $i; } else { $P < 0 or last; } } return substr($_[0], $P, $i - $P); } ################################################## # # Merges two adjoining elements of an array with # a space in between if the two are both letters. # Merge with no space if... # Usage: STRING = MergeOutput(ARRAY) # sub MergeOutput { @_ or return ''; my @A = @_; my $OUTPUT = ' '; # Just a placeholder my $ENDS_WITH_NAME; my $STARTS_WITH_NAME; for (my $i = 0; $i < @A; $i++) { # Take last character of previous element $ENDS_WITH_NAME = isName(vec($OUTPUT, length($OUTPUT) - 1, 8)); # Take the first character of the next element $STARTS_WITH_NAME = isName(vec($A[$i], 0, 8)); # If a line ends with a letter and the next line starts with # the word 'or' then we need to insert a space between the two # in order to keep the code functional. if ($ENDS_WITH_NAME && $STARTS_WITH_NAME) { $OUTPUT .= ' '; } $OUTPUT .= $A[$i]; } return substr($OUTPUT, 1); # Remove initial space } ################################################## # Tests if the ASCII character code is of a name. # A name may consist of letters and underscore. # Usage: INTEGER = isName(ASCII_CODE) # sub isName { defined $_[0] or return 0; $_[0] > 64 or return 0; $_[0] < 123 or return 0; return ($_[0] < 91 || $_[0] == 95 || $_[0] > 96) ? 1 : 0; } ################################################## # # This function reads the entire contents of a file # in binary mode and returns it as a string. If an # errors occur, an empty string is returned silently. # A second argument will move the file pointer before # reading. And a third argument limits the number # of bytes to read. # Usage: STRING = ReadFile(FILENAME, [START, [LENGTH]]) # sub ReadFile { my $NAME = defined $_[0] ? $_[0] : ''; $NAME =~ tr/\"\0*?|<>//d; # Remove special characters -e $NAME or return ''; -f $NAME or return ''; my $SIZE = -s $NAME; $SIZE or return ''; my $LEN = defined $_[2] ? $_[2] : $SIZE; $LEN > 0 or return ''; local *FH; sysopen(FH, $NAME, 0) or return ''; binmode FH; my $POS = defined $_[1] ? $_[1] : 0; $POS < $SIZE or return ''; $POS < 1 or sysseek(FH, 0, $POS); # Move file ptr my $DATA = ''; sysread(FH, $DATA, $LEN); # Read file close FH; return $DATA; } ################################################## # # Creates and overwrites a file in binary mode. # Returns 1 on success or 0 if something went wrong. # Usage: INTEGER = CreateFile(FILE_NAME, CONTENT) # sub CreateFile { defined $_[0] or return 0; my $F = $_[0]; $F =~ tr#\"\0*?|<>##d; # Remove special characters length($F) or return 0; local *FH; open(FH, ">$F") or return 0; binmode FH; if (defined $_[1] ? length($_[1]) : 0) { print FH $_[1]; } close FH or return 0; return 1; } ################################################## # v2019.12.7 # This function scans string S and replaces the # first N occurrences of string A with string B # and returns a new string. If N is -1 then only # the last instance is replaced. # Usage: STRING = Replace(STRING_S, STRING_A, [STRING_B, [N]]) # sub Replace { # First, we make sure that required arguments are available # and any special scenarios are handled correctly. defined $_[0] or return ''; # Missing arguments? defined $_[1] or return $_[0]; # Missing arguments? my $B = defined $_[2] ? $_[2] : ''; # Replace to --> $B my $N = defined $_[3] ? $_[3] : 0x7FFFFFFF; # Get $N my ($LA, $LB) = (length($_[1]), length($B)); # Get string lengths # The search string must not be an empty string, or we exit. # The string that we search for must not be longer than # the string in which we search. ($N && $LA && $LA <= length($_[0])) or return $_[0]; my ($LAST, $F, $X) = (0, 0, $_[0]); if ($N > 0x7FFFFFFE) { # If N was not provided, then that means we have to # replace every instance, so we'll use regex... my $A = $_[1]; $X =~ s/\Q$A\E/$B/g; return $X; } if ($N < 0) { # If we get here, we must not replace every # instance, and we must go from right to left. $F = length($X); while (($F = rindex($X, $_[1], $F)) >= 0) { substr($X, $F, $LA) = $B; ++$N or last; } return $X; } if ($LA == $LB) { # In this case, output string will be the # same length as the input string. # We must not replace every instance, # and we search from left to right. while (($F = index($X, $_[1], $F)) >= 0) { substr($X, $F, $LA) = $B; $F += $LB; --$N or last; } return $X; } # In this final scenario, the output string will # NOT be the same length as the input string. # We must not replace every instance, # and we search from left to right. # For performance reasons, we build a new string. $X = ''; while (($F = index($_[0], $_[1], $F)) >= 0) { $X .= substr($_[0], $LAST, $F - $LAST); $X .= $B; $F += $LA; $LAST = $F; --$N or last; } return $X . substr($_[0], $LAST); } ################################################## sub RemoveDuplicates { my %seen; grep !$seen{$_}++, @_; } ################################################## __DATA__ _ A B C DATA SECTION | _______________ __END__ # End of file END OF FILE