Beefy Boxes and Bandwidth Generously Provided by pair Networks
XP is just a number
 
PerlMonks  

Re: Perl script compressor

by clueless newbie (Curate)
on Dec 08, 2019 at 14:22 UTC ( [id://11109839] : note . print w/replies, xml ) Need Help??


in reply to Perl script compressor

perltidy --mangle --dac ... Here's your script after perltidy --mangle --dac
#!/usr/bin/perl -w use strict; use warnings; my$WRITE_FILE=0; my$SELF=ReadFile(__FILE__); my$SHRUNK=CompactPerl($SELF); my$ORIGINAL_SIZE=length($SELF); my$SHRUNK_SIZE=length($SHRUNK); my$DIFF=$ORIGINAL_SIZE-$SHRUNK_SIZE; print '>' x 78; print"\n",$SHRUNK; print '^' x 78; print"\n\nFILE NAME : ",__FILE__; print"\nFILE SIZE : $ORIGINAL_SIZE bytes"; print"\nSHRUNK SIZE : $SHRUNK_SIZE bytes"; WriteOutput($SHRUNK)if($WRITE_FILE); if($SHRUNK_SIZE<$ORIGINAL_SIZE){print"\n\nYou could eliminate $DIFF by +tes by compressing this script.\n";}print <<'END_OF_MESSAGE'; Wow, this script is amazing! * * * * * * * * * * * * * * * * Yes, it really is! :D END_OF_MESSAGE exit; sub CreateFile{defined$_[0]or return 0; my$F=$_[0]; $F=~tr#\"\0*?|<>##d; length($F)or return 0; local*FH; open(FH,">$F")or return 0; binmode FH; if(defined$_[1]?length($_[1]):0){print FH$_[1];}close FH or return 0; print"\n$F was saved.\n"; return 1;}sub ReadFile{my$NAME=defined$_[0]?$_[0]:''; $NAME=~tr/\"\0*?|<>//d; -e$NAME or return ''; -f$NAME or return ''; my$SIZE=-s$NAME; $SIZE or return ''; my$LEN=defined$_[2]?$_[2]:$SIZE; $LEN>0 or return ''; local*FH; sysopen(FH,$NAME,0)or return ''; binmode FH; my$POS=defined$_[1]?$_[1]:0; $POS<$SIZE or return ''; $POS<1 or sysseek(FH,0,$POS); my$DATA=''; sysread(FH,$DATA,$LEN); close FH; return$DATA;}sub Trim{defined$_[0]or return ''; my$L=length($_[0]); $L or return ''; my$P=0; while($P<=$L&&vec($_[0],$P++,8)<33){}for($P--;$P<=$L&&vec($_[0],$L--,8 +)<33;){}substr($_[0],$P,$L-$P+2)}sub WriteOutput{defined$_[0]or retur +n; length($_[0])or return; my$OUTPUT_FILENAME; for(my$i=1;$i<100;$i++){$OUTPUT_FILENAME="compact$i.pl"; -e$OUTPUT_FILENAME or return CreateFile($OUTPUT_FILENAME,$_[0]);}}sub +EndsWithChar{(@_==2&&defined$_[0]&&defined$_[1]&&length($_[1])&&lengt +h($_[0]))?(index($_[1],substr($_[0],length($_[0])-1))<0?0:1):0;}sub C +ompactLine{defined$_[0]or return ''; length($_[0])or return ''; my$OUTPUT=Trim($_[0]); $OUTPUT.="\n"; return$OUTPUT;}sub TrimOperators{defined$_[0]or return; length($_[0])or return; my$c; my$OP='+-,.|&*/<>?:;!%^()[]{}='; for(my$i=0;$i<length($OP);$i++){$c=substr($OP,$i,1); $_[0]=~s/\s*\Q$c\E\s*/$c/g;}}sub CompactPerl{defined$_[0]or return ''; my$CODE=$_[0]; $CODE=~tr|\r||d; my@A=split(/\n/,$CODE); my$OP='+-,.|&*/<>?:;!%^()[]{}='; my$STRIP; my$P; my$LINE; my$TRIMMED; my$END_MARKER=''; my$MULTILINE_STRING=0; my$STOP=0; $A[0].="\n"; for(my$i=1;$i<@A;$i++){$LINE=$A[$i]; $P=index($LINE,'#'); $STRIP=Trim(($P<0)?$LINE:substr($LINE,0,$P)); if($MULTILINE_STRING==0&&$STRIP=~m/\s*<<['"]+([_A-Za-z0-9]*)['"]+/){$E +ND_MARKER=$1;$MULTILINE_STRING=2;$A[$i].="\n";next;}if($MULTILINE_STR +ING==2){index($LINE,$END_MARKER)<0 or$MULTILINE_STRING=0;$A[$i].="\n" +;next;}if($MULTILINE_STRING==0&&index($STRIP,'qq{')>=0){$MULTILINE_ST +RING=1;}if($MULTILINE_STRING==1){if(index($LINE,'}')<0){$A[$i].="\n"; +}else{$MULTILINE_STRING=0;}next;}if($STRIP eq '__END__'){$STOP=1;$MUL +TILINE_STRING=-1;}if($STOP==1){$A[$i]='';next;}if($MULTILINE_STRING== +0&&$STRIP eq '__DATA__'){$A[$i]="\n__DATA__";$MULTILINE_STRING=3;}if( +$MULTILINE_STRING==3){$A[$i].="\n";next;}if(length($STRIP)==0){$A[$i] +='';next;}if($LINE=~m/[~\"\']+/){$A[$i]=CompactLine($LINE); next;}$TRIMMED=Trim($STRIP); $TRIMMED=~s/([a-z])\s+\(/$1\(/g; $TRIMMED=~s/([a-z])\s+\%/$1\%/g; $TRIMMED=~s/([a-z])\s+\$/$1\$/g; $TRIMMED=~s/([a-z])\s+\@/$1\@/g; TrimOperators($TRIMMED); EndsWithChar($TRIMMED,$OP)or$TRIMMED.=' '; $LINE or$LINE; $A[$i]=$TRIMMED;}return join('',@A);} # sub CompactPerl __DATA__ Everything after this point will not get compacted. It gets written .... AS IS. No change. __END__ # This is the end of file and whatever comes after this point gets discarded forever..... ............

Replies are listed 'Best First'.
Re^2: Perl script compressor
by harangzsolt33 (Chaplain) on Dec 08, 2019 at 19:17 UTC
    Yes, but.... Have you tried to run it after perltidy? If you run it, it prints this:

    You could eliminate 261 bytes by compressing this script.

    In other words, my compressor script was able to eliminate another 261 bytes after perltidy did its job. So, I am not even finished with this script, yet it was already able to "outperform" perltidy by yielding a smaller code. :D

      If you run your program on itself ... you get
      FILE SIZE : 8688 bytes SHRUNK SIZE : 4021 bytes
      If you run perltidy on your program the result is 3,701 bytes.

        If you run your program on itself ... you get

        FILE SIZE : 8688 bytes SHRUNK SIZE : 4021 bytes
        If you run perltidy on your program the result is 3,701 bytes.

        And if you use state of the art compression algorithms, you get even smaller files, without ANY loss of information:

        /tmp>gzip -9 < 11109831.pl > 11109831.pl.gzip /tmp>bzip2 -9 < 11109831.pl > 11109831.pl.bz2 /tmp>lzma -9 < 11109831.pl > 11109831.pl.lzma /tmp>dir 11* -rw-r--r-- 1 alex users 8690 Dec 8 21:45 11109831.pl -rw-r--r-- 1 alex users 3125 Dec 8 21:46 11109831.pl.bz2 -rw-r--r-- 1 alex users 3160 Dec 8 21:46 11109831.pl.gzip -rw-r--r-- 1 alex users 3064 Dec 8 21:46 11109831.pl.lzma /tmp>

        Alexander

        --
        Today I will gladly share my knowledge and experience, for there are no sweeter words than "I told you so". ;-)
        Okay, I rewrote the whole thing from scratch. This one works much better because it also replaces long variable and sub names with short ones...

        Now my disclaimer is that I didn't spend too much time working on this, so it CAN and WILL break your script if it contains here documents and some other things. The resulting code works usually but not always. :P If you run this script, it creates a script called output.pl , and if you run that script, it will overwrite itself. But it runs without errors.

        And it is reducing perl code from 13,997 bytes to 4,750 bytes! That's less than half of the original. And we're not even using gzip here.

        #!/usr/bin/perl -w use strict; use warnings; my $SCRIPT = ReadFile(__FILE__); my $SHRUNK = CompressPerl($SCRIPT); # Compress script CreateFile('output.pl', $SHRUNK); my $ORIGINAL_SIZE = -s __FILE__; my $SHRUNK_SIZE = length($SHRUNK); my $DIFF = $ORIGINAL_SIZE - $SHRUNK_SIZE; print $SHRUNK, "\n"; print '^' x 78; print "\n\nFILE NAME : ", __FILE__; print "\nFILE SIZE : $ORIGINAL_SIZE bytes"; print "\nSHRUNK SIZE : $SHRUNK_SIZE bytes"; if ($SHRUNK_SIZE < $ORIGINAL_SIZE) { print "\n\nYou could eliminate $DIFF bytes by compressing this scrip +t.\n"; } exit; ################################################## # Takes a perl script string as input and returns # condensed perl code. # Usage: STRING = CompactPerl(STRING) # sub CompressPerl { defined $_[0] or return ''; my @LINES = split(/\n/, shift); my $c; my $p; my $MODE; my $START; my $NOSPACE; my $NAME; my $DATA = 0; my @VARS; my @OUTPUT; my $FIRST_LINE = lc($LINES[0]); if (substr($FIRST_LINE, 0, 15) eq '#!/usr/bin/perl') { $OUTPUT[0] = shift(@LINES) . "\n"; # Skip shebang } foreach my $CODE (@LINES) # Process line by line { $c = 0; $MODE = 0; $START = 0; $NOSPACE = ''; ################################### PROCESS EOF MARKER if (ExtractFirstWord($CODE) eq '__END__') { last; } ################################### PROCESS DATA SECTION if ($DATA) { push(@OUTPUT, "\n" . RTRIM($CODE)); next; } ############################################### # Process lines character by character for (my $i = 0; $i <= length($CODE); $i++) { $p = $c; # Store previous character # $NOSPACE is a look-back buffer that allows us to # check what came before the current character. # As the name suggests, $NOSPACE contains # no whitespace and no line breaks. if ($p > 32) { $NOSPACE .= chr($p); } $c = vec($CODE, $i, 8); # Get current character ################################### PROCESS STRING QUOTES if ($c == 34 || $c == 39) # 34 = " 39 = ' { # Catch beginning of quote if ($MODE == 0 && $p != 36) # Ignore $' { $MODE = $c; push(@OUTPUT, Compress(substr($CODE, $START, $i - $START))); $START = $i; next; } # Catch end of quote if ($MODE == $c && $p != 92) # Ignore \' { push(@OUTPUT, substr($CODE, $START, $i - $START + 1)); $START = $i + 1; $MODE = 0; next; } } if ($MODE == 34 || $MODE == 39) # Ignore everything between q +uotes { next; } ################################### PROCESS COMMENTS if ($c == 35) # 35 = # { if ($MODE == 0 && $p != 36) # Ignore $# { push(@OUTPUT, Compress(substr($CODE, $START, $i - $START))); $MODE = -1; last; # Ignore rest of the line } } ################################### PROCESS REGEX MATCH if ($p == 61 && $c == 126) # Look for =~ { if ($MODE == 0) # We skip lines with regex { $MODE = 126; push(@OUTPUT, Compress(substr($CODE, $START, $i - $START))); push(@OUTPUT, substr($CODE, $i) . "\n"); $CODE = ''; $START = 0; last; } } ################################### COLLECT SUB AND VARIABLE NAM +ES if (isName($c) && $MODE == 0) { if ($p == 36 || $p == 64) { push(@OUTPUT, Compress(substr($CODE, $START, $i - $START))); $START = $i; $MODE = 85; # Expect a variable name to follow next; } elsif (EndsWith($NOSPACE, 'sub')) { if ($p == 32 || $p == 9) # Previous char was space { push(@OUTPUT, Compress(substr($CODE, $START, $i - $START)) +); $START = $i; $MODE = 85; # Expect sub name to follow next; } } } if ($MODE == 85) # Capture sub or variable name { if (!isName($c)) { $NAME = Compress(substr($CODE, $START, $i - $START)); push(@OUTPUT, $NAME); push(@VARS, $NAME); $START = $i; $MODE = 0; next; } } ################################### DETECT __DATA__ SECTION if ($c == 0) # End of line? { if ($MODE == 0 && index(substr($CODE, $START), '__DATA__') >= +0) { push(@OUTPUT, Compress(substr($CODE, $START, $i - $START)) . + "\n"); push(@OUTPUT, substr($CODE, $i)); $CODE = ''; $DATA = 1; next; } } } # Store rest of the line. if ($MODE >= 0 && $START < length($CODE)) { push(@OUTPUT, Compress(substr($CODE, $START))); } } $DATA = MergeOutput(@OUTPUT); # Remove short names foreach (@VARS) { length($_) > 6 or $_ = ''; } # Remove duplicate names from list of subs and variables. @VARS = RemoveDuplicates(sort @VARS); # print join("\n", @VARS); # Try to replace sub names... my $i = 0; my $NEW_NAME; for ($a = 0; $a < 26; $a++) { for ($b = 0; $b < 26; $b++) { $NEW_NAME = chr($a + 97) . chr($b + 65); $i < @VARS or return $DATA; $DATA = Replace($DATA, $VARS[$i++], $NEW_NAME); } } } ################################################## # v2019.11.23 # Returns true if string A ends with string B, # otherwise returns false. This is case sensitive! # Usage: INTEGER = EndsWith(STRING_A, STRING_B) # sub EndsWith { defined $_[0] or return 0; defined $_[1] or return 1; my $LA = length($_[0]); my $LB = length($_[1]); $LB or return 1; $LA >= $LB or return 0; $_[1] eq substr($_[0], $LA - $LB); } ################################################## # v2019.12.8 # Same as the LTRIM$() function in QBASIC. # Removes whitespace from the left side of # string and returns a new string. # Usage STRING = LTRIM(STRING) # sub LTRIM { my $S = defined $_[0] ? $_[0] : ''; $S =~ s/^[\s\r\n\0-\x1F]*//g; return $S; } ################################################## # v2019.12.8 # Same as the RTRIM$() function in QBASIC. # Removes whitespace from the right side of # string and returns a new string. # Usage STRING = RTRIM(STRING) # sub RTRIM { my $S = defined $_[0] ? $_[0] : ''; $S =~ s/[\s\r\n\0-\x1F]*$//g; return $S; } ################################################## # v2019.8.25 # Removes whitespace from before and after STRING. # Whitespace is here defined as any byte whose # ASCII value is less than 33. That includes # tabs, esc, null, vertical tab, new lines, etc. # Usage: STRING = Trim(STRING) # sub Trim { my $S = defined $_[0] ? $_[0] : ''; $S =~ s/^[\s\r\n\0-\x1F]*|[\s\r\n\0-\x1F]*$//g; return $S; } ################################################## # This function compresses a portion of code that # does not contain any quotes, regex, or comments. # Usage: STRING = Compress(STRING) # sub Compress { my $S = defined $_[0] ? $_[0] : ''; $S =~ s/\s+/ /g; # Reduce large gaps $S = TrimOperators(Trim($S)); return $S; } ################################################## # This function trims whitespace from # before and after a list of perl operators. # Usage: STRING = TrimOperators(STRING) # sub TrimOperators { my $S = defined $_[0] ? $_[0] : ''; my $c; my $OP = '+-,.|&*/<>?:;!%^()[]{}=$@'; for (my $i = 0; $i < length($OP); $i++) { $c = substr($OP, $i, 1); $S =~ s/\s*\Q$c\E\s*/$c/g; } return $S; } ################################################## # v2019.12.8 # Returns the first word from a string which may # start with whitespace or new line characters. # Usage: STRING = ExtractFirstWord(STRING) # sub ExtractFirstWord { defined $_[0] or return ''; my ($i, $P, $L) = (-1, -1, length($_[0])); $L or return ''; while (++$i < $L) { if (vec($_[0], $i, 8) > 32) { $P >= 0 or $P = $i; } else { $P < 0 or last; } } return substr($_[0], $P, $i - $P); } ################################################## # # Merges two adjoining elements of an array with # a space in between if the two are both letters. # Merge with no space if... # Usage: STRING = MergeOutput(ARRAY) # sub MergeOutput { @_ or return ''; my @A = @_; my $OUTPUT = ' '; # Just a placeholder my $ENDS_WITH_NAME; my $STARTS_WITH_NAME; for (my $i = 0; $i < @A; $i++) { # Take last character of previous element $ENDS_WITH_NAME = isName(vec($OUTPUT, length($OUTPUT) - 1, 8)); # Take the first character of the next element $STARTS_WITH_NAME = isName(vec($A[$i], 0, 8)); # If a line ends with a letter and the next line starts with # the word 'or' then we need to insert a space between the two # in order to keep the code functional. if ($ENDS_WITH_NAME && $STARTS_WITH_NAME) { $OUTPUT .= ' '; } $OUTPUT .= $A[$i]; } return substr($OUTPUT, 1); # Remove initial space } ################################################## # Tests if the ASCII character code is of a name. # A name may consist of letters and underscore. # Usage: INTEGER = isName(ASCII_CODE) # sub isName { defined $_[0] or return 0; $_[0] > 64 or return 0; $_[0] < 123 or return 0; return ($_[0] < 91 || $_[0] == 95 || $_[0] > 96) ? 1 : 0; } ################################################## # # This function reads the entire contents of a file # in binary mode and returns it as a string. If an # errors occur, an empty string is returned silently. # A second argument will move the file pointer before # reading. And a third argument limits the number # of bytes to read. # Usage: STRING = ReadFile(FILENAME, [START, [LENGTH]]) # sub ReadFile { my $NAME = defined $_[0] ? $_[0] : ''; $NAME =~ tr/\"\0*?|<>//d; # Remove special characters -e $NAME or return ''; -f $NAME or return ''; my $SIZE = -s $NAME; $SIZE or return ''; my $LEN = defined $_[2] ? $_[2] : $SIZE; $LEN > 0 or return ''; local *FH; sysopen(FH, $NAME, 0) or return ''; binmode FH; my $POS = defined $_[1] ? $_[1] : 0; $POS < $SIZE or return ''; $POS < 1 or sysseek(FH, 0, $POS); # Move file ptr my $DATA = ''; sysread(FH, $DATA, $LEN); # Read file close FH; return $DATA; } ################################################## # # Creates and overwrites a file in binary mode. # Returns 1 on success or 0 if something went wrong. # Usage: INTEGER = CreateFile(FILE_NAME, CONTENT) # sub CreateFile { defined $_[0] or return 0; my $F = $_[0]; $F =~ tr#\"\0*?|<>##d; # Remove special characters length($F) or return 0; local *FH; open(FH, ">$F") or return 0; binmode FH; if (defined $_[1] ? length($_[1]) : 0) { print FH $_[1]; } close FH or return 0; return 1; } ################################################## # v2019.12.7 # This function scans string S and replaces the # first N occurrences of string A with string B # and returns a new string. If N is -1 then only # the last instance is replaced. # Usage: STRING = Replace(STRING_S, STRING_A, [STRING_B, [N]]) # sub Replace { # First, we make sure that required arguments are available # and any special scenarios are handled correctly. defined $_[0] or return ''; # Missing arguments? defined $_[1] or return $_[0]; # Missing arguments? my $B = defined $_[2] ? $_[2] : ''; # Replace to --> $B my $N = defined $_[3] ? $_[3] : 0x7FFFFFFF; # Get $N my ($LA, $LB) = (length($_[1]), length($B)); # Get string lengths # The search string must not be an empty string, or we exit. # The string that we search for must not be longer than # the string in which we search. ($N && $LA && $LA <= length($_[0])) or return $_[0]; my ($LAST, $F, $X) = (0, 0, $_[0]); if ($N > 0x7FFFFFFE) { # If N was not provided, then that means we have to # replace every instance, so we'll use regex... my $A = $_[1]; $X =~ s/\Q$A\E/$B/g; return $X; } if ($N < 0) { # If we get here, we must not replace every # instance, and we must go from right to left. $F = length($X); while (($F = rindex($X, $_[1], $F)) >= 0) { substr($X, $F, $LA) = $B; ++$N or last; } return $X; } if ($LA == $LB) { # In this case, output string will be the # same length as the input string. # We must not replace every instance, # and we search from left to right. while (($F = index($X, $_[1], $F)) >= 0) { substr($X, $F, $LA) = $B; $F += $LB; --$N or last; } return $X; } # In this final scenario, the output string will # NOT be the same length as the input string. # We must not replace every instance, # and we search from left to right. # For performance reasons, we build a new string. $X = ''; while (($F = index($_[0], $_[1], $F)) >= 0) { $X .= substr($_[0], $LAST, $F - $LAST); $X .= $B; $F += $LA; $LAST = $F; --$N or last; } return $X . substr($_[0], $LAST); } ################################################## sub RemoveDuplicates { my %seen; grep !$seen{$_}++, @_; } ################################################## __DATA__ _ A B C DATA SECTION | _______________ __END__ # End of file END OF FILE