#!/usr/bin/perl -w use strict; use warnings; my $STRING = '\r\n\t\tHello\tWorld\uFF00\\0\\n\\n\\!!\\"--->\\xf3\\"abc\\x4\\2\\8\\2'; print "\n\n$STRING\n\n"; $STRING = DecodeCString($STRING); HexDump($STRING); exit; ################################################## # String | v2022.12.31 # This function interprets a C style string and # returns its value. A C string may contain escape # sequences such as \r \n \t \0 \xFF \" and so on. # This function decodes these escape sequences and # returns the resulting string. # # If an incomplete escape sequence is found, it # will be ignored. For example, in "\x=Z" the # "\x" should be followed by a hexadecimal number. # In this case, "\x" will be ignored and "=Z" # will be written to the output. No error # messages will be displayed at all. # # Usage: STRING = DecodeCString(STRING) # sub DecodeCString { defined $_[0] or return ''; # If the input string contains no backslash at all, # then we just return it as we found it: if (index($_[0], "\\") < 0) { return $_[0]; } my $OUTPUT = ''; my $L = length($_[0]); my ($N, $LEN, $OCT, $START, $EXPECT) = (0) x 5; my $LAST = $L - 1; for (my $i = 0; $i < $L; $i++) { if (++$N == 1) # Find next backslash when ++N == 1 { $i = index($_[0], "\\", $i); # Find next backslash if ($i < 0) { $OUTPUT .= substr($_[0], $START); last; } if ($i > $START) { $OUTPUT .= substr($_[0], $START, $i - $START); $START = $i; } next; } if ($N == 2) # Read first character following a backslash: { $EXPECT = 0; # Maximum number of digits we're expecting $LEN = 0; # How many digits we got so far $OCT = 1; # 0=Hexadecimal number, 1=Octal number $START = $i + 1; # digits begin with the next character my $C = substr($_[0], $i, 1); my $P = index('01234567abtnvfrexuU', $C); # Escape codes if ($P < 0) { $N = 0; $OUTPUT .= $C; } # Write Literal elsif ($P < 8) { $EXPECT = 3; $LEN = 1; $START--; } # 0-7 octal elsif ($P < 15) { $N = 0; $OUTPUT .= chr($P - 1); } # abtnvfr elsif ($P == 15) { $N = 0; $OUTPUT .= "\x1B"; } # e elsif ($P == 16) { $EXPECT = 2; $OCT = 0; } # x elsif ($P == 17) { $EXPECT = 4; $OCT = 0; } # u elsif ($P == 18) { $EXPECT = 8; $OCT = 0; } # U # The following next statement must be conditional, because # if the string ends with an incomplete octal number such as # "\0" we must trickle down and write it to output instead # of trying to reach for the next non-existent digit: $OCT && $i == $LAST or next; } # Subsequent characters following a backslash are processed here: if ($EXPECT) { my $C = substr($_[0], $i, 1); my $P = index('0123456789ABCDEFabcdef', $C); # Check digits if ($P >= 0) { $LEN++ } # Count it if it's a valid digit. # If we encounter an 'x' or digit '8' while reading an octal # number, it signals the end of the number. Or if we're # reading a hexadecimal number such as \x3 but then it's # immediately followed by the letter 'z' then we know that # '3' is the only digit we got. # $UNEXPECTED will be true if we got an unexpected character: my $UNEXPECTED = ($P < 0 || ($OCT && $P > 7)); # $END_OF_SEQ will be true if we either encountered an # unexpected character OR we have read all of expected digits # OR we have reached the end of the input string: my $END_OF_SEQ = $UNEXPECTED || $LEN == $EXPECT || $i == $LAST; if ($END_OF_SEQ) { # If "\x" is immediately followed by something other than # hexadecimal digits, then we abandon ship and ignore it. # So, here we check $LEN to see if we got any valid digits # so far. If not, then we don't have to write anything. #print "\n<$LEN> ", substr($_[0], $START, $LEN); #print " $C = substr($_[0], $START, $LEN); if ($LEN) { $C = substr($_[0], $START, $LEN); $C = ($OCT) ? oct($C) : hex($C); # "\xFF" produces one byte, but "\u1234" will produce a # 2-byte output in big-endian order, and "\U12345678" # will produce a 4-byte output. $OUTPUT .= pack(substr('CCnnN', ($EXPECT >> 1), 1), $C); } $START = $i; $EXPECT = $OCT = $LEN = $N = 0; if ($UNEXPECTED) { $i--; } else { $START++; } } } } return $OUTPUT; } ################################################## # String | v2022.11.14 # This function prints the contents of a string # in hexadecimal format and plain text along with # the address. A second argument may be provided # to limit the number of bytes to be printed. # # Usage: HexDump(STRING, [LIMIT]) # sub HexDump { defined $_[0] or return 0; my $LIMIT = defined $_[1] ? $_[1] : length($_[0]); if ($LIMIT > length($_[0])) { $LIMIT = length($_[0]); } $| = 1; my $PTR = 0; my $ROWS = int(($LIMIT + 15) / 16); while ($ROWS--) { my $LINE = sprintf("\n %0.8X:", $PTR) . (' ' x 69); my ($CP, $NP, $CC) = (63, 13, 16); while ($CC--) { my $c = vec($_[0], $PTR++, 8); substr($LINE, $NP, 2) = sprintf('%0.2X', $c); vec($LINE, $CP++, 8) = ($c < 32 || $c > 126) ? 46 : $c; $NP += 3; if ($CC == 7) { vec($LINE, 36, 8) = 45; } if ($PTR >= $LIMIT) { $ROWS = 0; last; } } print $LINE; } print "\n"; return 1; } ##################################################