#!/usr/bin/perl -w

use strict;  # Don't you dare write code without this line and the -w switch

my %charcodes;
my ($filename = shift @ARGV);

%charcodes = ( "&#091;" => "[",
               "&#093;" => "]",
               "&#91;"  => "[",
               "&#93;"  => "]",
               "&quot;" => "\"",
               "&lt;"   => "<",
               "&gt;"   => ">",
               "&amp;"  => "&"
             );

# Using '+<' to open the file in update mode

open (FILEHANDLE, "+< $filename") || die "Can't open $filename in update mode: $!\n";

# Reading the entire file into the array

my @program_line = <FILEHANDLE>; 

foreach (@program_line){
	# The regex below might confuse some people new to perl, 
	# so I'll do some explaining here.
	# You might think that I could use &.*; to match a hash value.  
	# This fails for two reasons:
	#   1.  We might have a sub which is identified with ampersand
	#   2.  If there is more than one semicolon after the ampersand, 
	#       the regex will be "greedy" and will include the
	#       rightmost semicolon.  We can use &.*?; to try to force 
	#       the regex to be lazy, but this could involve a lot of 
	#       backtracking and make the regex less efficient.
	# &[^;]{2,6}; is a good regex.  The negated character class guarantees 
	# that we will only match 2 to 6 non-semicolons after the ampersand 
	# (and we go out to six characters in case this script is upgraded
	# to translate things like &eacute; to é.)

	# The right side of this substitution uses the trinary operator 
	# ($x = ($a > b) ? $a : $c) to substitute the hash value of of 
	# character code if such hash value exists, otherwise it substitues $1 
	# back to itself.  This is not the most efficient way of doing this as 
	# we have a null substitution, but it works.

	# The /e modifier makes the trinary operator executable.
	# The /g modifier makes the regex global (i.e. we will modify every 
	# character code on a single line

	s/(&[^;]{2,6};)/(exists $charcodes{$1}) ? $charcodes{$1} : $1/eg;

	# The following code will correct for URL expansion of code like 
	# $some_hash_var[0] which gets posted with <PRE> tags rather 
	# than <CODE> tags.  Don't use it for other URL substitutions
	# because it relies on Perlmonks specific syntax.

	s/(<a href="[^"]+">(\d+)<\/a>)/[$2]/g;
}

# Go back to start of file

seek(FILEHANDLE, 0, 0) or die ("Seek failed on $filename: $!\n"); 
print FILEHANDLE @program_line or die ("Print failed on $filename: $!\n"); 

# truncate the file so we don't have excess garbage at the end 

truncate(FILEHANDLE, tell(FILEHANDLE)) or die ("Truncate failed on $filename: $!\n"); 
close (FILEHANDLE) or die ("Close failed on $filename: $!\n");