#!/usr/bin/perl -w use strict; # Don't you dare write code without this line and the -w switch my %charcodes; my ($filename = shift @ARGV); %charcodes = ( "[" => "[", "]" => "]", "[" => "[", "]" => "]", """ => "\"", "<" => "<", ">" => ">", "&" => "&" ); # Using '+<' to open the file in update mode open (FILEHANDLE, "+< $filename") || die "Can't open $filename in update mode: $!\n"; # Reading the entire file into the array my @program_line = ; foreach (@program_line){ # The regex below might confuse some people new to perl, # so I'll do some explaining here. # You might think that I could use &.*; to match a hash value. # This fails for two reasons: # 1. We might have a sub which is identified with ampersand # 2. If there is more than one semicolon after the ampersand, # the regex will be "greedy" and will include the # rightmost semicolon. We can use &.*?; to try to force # the regex to be lazy, but this could involve a lot of # backtracking and make the regex less efficient. # &[^;]{2,6}; is a good regex. The negated character class guarantees # that we will only match 2 to 6 non-semicolons after the ampersand # (and we go out to six characters in case this script is upgraded # to translate things like é to é.) # The right side of this substitution uses the trinary operator # ($x = ($a > b) ? $a : $c) to substitute the hash value of of # character code if such hash value exists, otherwise it substitues $1 # back to itself. This is not the most efficient way of doing this as # we have a null substitution, but it works. # The /e modifier makes the trinary operator executable. # The /g modifier makes the regex global (i.e. we will modify every # character code on a single line s/(&[^;]{2,6};)/(exists $charcodes{$1}) ? $charcodes{$1} : $1/eg; # The following code will correct for URL expansion of code like # $some_hash_var[0] which gets posted with
 tags rather 
	# than  tags.  Don't use it for other URL substitutions
	# because it relies on Perlmonks specific syntax.

	s/((\d+)<\/a>)/[$2]/g;
}

# Go back to start of file

seek(FILEHANDLE, 0, 0) or die ("Seek failed on $filename: $!\n"); 
print FILEHANDLE @program_line or die ("Print failed on $filename: $!\n"); 

# truncate the file so we don't have excess garbage at the end 

truncate(FILEHANDLE, tell(FILEHANDLE)) or die ("Truncate failed on $filename: $!\n"); 
close (FILEHANDLE) or die ("Close failed on $filename: $!\n");