#!/usr/bin/perl -- print "Content-Type: text/html\n\n"; print ""; print ""; print ""; &pair_split; # Results can be manipulated here print ""; exit; # Parse input -- Use POST method sub pair_split{ read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'}); @pairs = split(/&/, $buffer); foreach $pair (@pairs) { local($name, $value) = split(/=/, $pair); if ($name =~ /\%/) {$strng = $name; &xxutf; $name = $sbuf;} if ($value =~ /\%/) {$strng = $value; &xxutf; $value = $sbuf;} $name =~ tr/+/ /; $name =~ tr/\0//d; $value =~ tr/+/ /; $value =~ tr/\0//d; $$name = $value; # Assign values to $names } } # Decode XX-encoded letters to UTF-8 sub xxutf { $l = length($strng); $ch = -1; $b = 0; $sumb = 0; for ($i = 0, $more = -1; $i < $l; $i++) { # Get next byte b from URL segment strng $ch = substr $strng, $i, 1; if ($ch eq '%') { $i++; $ch = substr $strng, $i, 1; $hb = ($ch =~ /[0-9]/) ? $ch - '0' : 10+(ord(lc$ch) - ord('a')) & 15; $i++; $ch = substr $strng, $i, 1; $lb = ($ch =~ /[0-9]/) ? $ch - '0' : 10+(ord(lc$ch) - ord('a')) & 15; $b = ($hb << 4) | $lb; } elsif ($ch eq '+') { $b = ' '; } else {$b = $ch}; # Decode byte b as UTF-8, sumb collects incomplete chars if (($b & 192) == 128) { # 10xxxxxx (continuation byte) $sumb = ($sumb << 6) | ($b & 63); # Add 6 bits to sumb if (--$more == 0) { $sumb = "&#" . $sumb; # Create UTF-8 encoding $sumb = $sumb . ";"; $sbuf = $sbuf . $sumb; # Add char to sbuf } } elsif (($b & 128) == 0) { # 0xxxxxxx (yields 7 bits) $sbuf = $sbuf . $b; # Store in sbuf } elsif (($b & 224) == 192) { # 110xxxxx (yields 5 bits) $sumb = $b & 31; $more = 1; # Expect 1 more byte } elsif (($b & 240) == 224) { # 1110xxxx (yields 4 bits) $sumb = $b & 15; $more = 2; # Expect 2 more bytes } elsif (($b & 248) == 240) { # 11110xxx (yields 3 bits) $sumb = $b & 7; $more = 3; # Expect 3 more bytes } elsif (($b & 252) == 248) { # 111110xx (yields 2 bits) $sumb = $b & 3; $more = 4; # Expect 4 more bytes } else { # if ((b & 0xfe) == 0xfc) $sumb = $b & 1; # 1111110x (yields 1 bit) $more = 5; # Expect 5 more bytes } # We don't test if the UTF-8 encoding is well-formed } }