#!perl
#
# perl2html
# 2007 Markus Holzer
#
# script for convertin Perl-Sourcecode to Html with
# syntax-highlighting
#
# call with sourcefile as first parameter
# if the second parameter is true, output contains line numbers
#
# never leave home without it
use warnings;
use strict;
use Data::Dumper;
# we need this to escape special html chars
use HTML::Entities;
# performs the config parsing from the DATA-section
use Config::IniFiles;
my $cfg = Config::IniFiles->new( -file => *DATA );
unless ( $cfg )
{
die join ("\n", @Config::IniFiles::errors) . "\n";
}
# performs the parsing of the sourcecode
use PPI;
# open the sourcefile
my $document = PPI::Document->new( $ARGV[0] );
#print Dumper ($document);
# see if we need to output line_numbers
my $line_number;
my $line_numbers;
my $line_number_format;
if ( $ARGV[1] )
{
open IN, $ARGV[0] or die "cannot count line numbers\n";
while () { $line_numbers++ };
close IN;
$line_number_format = "%0".length($line_numbers)."d";
}
unless ( $document )
{
die "Dokument could not be parsed\n";
}
# counter for skipping elements after a heredoc
my $skip_next;
# Routinen für die Behandlung der Spezialfälle
my %dispatch =
(
# ugly but necessary
# when a heredoc is parsed the elemtens after <<"FOO"
# and until the semicolon are siblings of the herdoc-element
# for the output to be correct we have to preread those elements
'Token::HereDoc' => sub
{
my ( $section, $node ) = @_;
#<<"HERE"
my $content = $node->{content};
# inner text of the heredoc
my $text = join "", @{$node->{_heredoc}};
#HERE
my $terminator = $node->{_terminator_line};
chomp $terminator;
my $skip = 0;
print encode($content);
# preread until we find the semicolon after <<"HERE"
while ( 1 )
{
$skip++;
$node = $node->next_sibling;
process_node ($node,1);
last
if $node->{content} eq ";"
}
print "
", encode("$text$terminator");
$skip_next = $skip;
}
);
print join ("\n", @{$cfg->{v}->{Html}->{header}}), "\n";
$line_number++,
print line_number($line_number)
if $line_numbers;
process_node($document);
print "\n", join ("\n", @{$cfg->{v}->{Html}->{footer}});
sub process_node
{
my $node = shift;
my $self = shift;
my @list;
if ( $self )
{
@list = ($node);
}
else
{
@list = @{$node->{children}}
if $node->{children};
}
for my $child ( @list )
{
if ( $skip_next )
{
$skip_next--;
next;
}
# what have we got here?
my $class = ref( $child );
$class =~ s/^PPI:://;
# opening element?
if ( $child->{start} )
{
out( "brackets", $child, "start");
}
# do we have a special case for this element?
if ( $dispatch{$class} )
{
# yes. use the dispatcher
$dispatch{$class}->( $class, $child );
}
else
{
# no, so standard ouptput
out ( $class, $child );
}
# process the children
process_node( $child );
# closing element?
if ( $child->{finish} )
{
out( "brackets", $child, "finish");
}
}
}
# Outputs an element as Html aus und formats it
# using the styles from the configuration
sub out
{
my ( $section, $node, $index ) = @_;
my $content;
# Index is true for opening and closing elements
# so we can use the correct bracket
# otherwise we use the elements text value
if ( $index )
{
$content = $node->{$index}->{content} || "";
}
else
{
# Text des Elements ausgeben
$content = $node->{content} || "";
}
$content = encode($content);
# is there a style for this element?
if ( $cfg->{v}->{$section} )
{
my $class = $cfg->{v}->{$section}->{ $content } ||
$cfg->{v}->{$section}->{ '*default' } ||
"";
print "$content";
}
else
{
print $content;
}
}
# encodes html entities and converts whitespace
sub encode
{
my $content = encode_entities(shift);
$content =~ s^ ^ ^msg;
if ( $line_numbers )
{
while ( $content =~ /\n/ )
{
$line_number++;
my $ln = line_number($line_number);
$content =~ s^\n^
$ln^ms;
}
}
else
{
$content =~ s^\n^
^msg;
}
$content =~ s^\t^ ^msg;
return $content;
}
sub line_number
{
my $ln = sprintf($line_number_format, shift);
return '$ln: ";
}
__DATA__
[brackets]
*default = color:teal;
[Token::Symbol]
*default = color:maroon;
[Token::ArrayIndex]
*default = color:maroon;
[Token::Cast]
*default = color:maroon;
[Token::Magic]
*default = color:maroon;
[Token::Quote::Double]
*default = color:purple;
[Token::Quote::Interpolate]
*default = color:purple;
[Token::Quote::Literal]
*default = color:purple;
[Token::Quote::Single]
*default = color:purple;
[Token::QuoteLike]
*default = color:purple;
[Token::QuoteLike::Backtick]
*default = color:purple;
[Token::QuoteLike::Command]
*default = color:purple;
[Token::QuoteLike::Readline]
*default = color:purple;
[Token::QuoteLike::Regexp]
*default = color:purple;
[Token::QuoteLike::Words]
*default = color:purple;
[Token::DashedWord]
*default = color:purple;
[Token::HereDoc]
*default = color:purple;
[Token::Data]
*default = color:purple;
[Token::Separator]
*default = color:purple;
[Token::End]
*default = color:purple;
[Token::Number]
*default = color:purple;
[Token::Structure]
*default = color:black;
[Token::Operator]
*default = color:lime;
and = color:lime;
cmp = color:lime;
eq = color:lime;
le = color:lime;
ge = color:lime;
ne = color:lime;
or = color:lime;
q = color:lime;
qq = color:lime;
qw = color:lime;
qx = color:lime;
tr = color:lime;
xor = color:lime;
[Token::Word]
*default = color:black;
AUTOLOAD = color:yellow;
BEGIN = color:yellow;
CORE = color:yellow;
DESTROY = color:yellow;
END = color:yellow;
STDERR = color:gray;
STDIN = color:gray;
STDOUT = color:gray;
local = color:blue;
my = color:blue;
package = color:blue;
return = color:blue;
sub = color:blue;
use = color:blue;
require = color:blue;
do = color:blue;
else = color:blue;
elsif = color:blue;
foreach = color:blue;
for = color:blue;
if = color:blue;
unless = color:blue;
until = color:blue;
continue = color:blue;
while = color:blue;
goto = color:blue;
import = color:blue;
last = color:blue;
next = color:blue;
no = color:blue;
our = color:blue;
redo = color:blue;
bless = color:red;
close = color:red;
closedir = color:red;
die = color:red;
eval = color:red;
exit = color:red;
grep = color:red;
map = color:red;
open = color:red;
opendir = color:red;
print = color:red;
splice = color:red;
split = color:red;
sysopen = color:red;
warn = color:red;
each = color:red;
values = color:red;
accept = color:red;
alarm = color:red;
atan2 = color:red;
bind = color:red;
binmode = color:red;
caller = color:red;
chdir = color:red;
chmod = color:red;
chomp = color:red;
chop = color:red;
chown = color:red;
chr = color:red;
chroot = color:red;
abs = color:red;
connect = color:red;
cos = color:red;
crypt = color:red;
dbmclose = color:red;
dbmopen = color:red;
defined = color:red;
delete = color:red;
dump = color:red;
endgrent = color:red;
endhostent = color:red;
endnetent = color:red;
endpwent = color:red;
endservent = color:red;
eof = color:red;
exec = color:red;
exists = color:red;
exp = color:red;
fcntl = color:red;
fileno = color:red;
flock = color:red;
fork = color:red;
format = color:red;
formline = color:red;
getc = color:red;
getgrent = color:red;
getgrgid = color:red;
getgrnam = color:red;
gethostbyaddr = color:red;
gethostbyname = color:red;
gethostent = color:red;
getlogin = color:red;
getnetbyaddr = color:red;
getnetbyname = color:red;
getnetent = color:red;
getpeername = color:red;
getpgrp = color:red;
getppid = color:red;
getpriority = color:red;
getprotobyname = color:red;
getprotobynumber = color:red;
getprotoent = color:red;
getpwent = color:red;
getpwnam = color:red;
getpwuid = color:red;
getservbyname = color:red;
getservbyport = color:red;
getservent = color:red;
getsockname = color:red;
getsockopt = color:red;
glob = color:red;
gmtime = color:red;
hex = color:red;
index = color:red;
int = color:red;
ioctl = color:red;
join = color:red;
keys = color:red;
kill = color:red;
lc = color:red;
lcfirst = color:red;
length = color:red;
link = color:red;
listen = color:red;
localtime = color:red;
log = color:red;
lstat = color:red;
mkdir = color:red;
msgctl = color:red;
msgget = color:red;
msgrcv = color:red;
msgsnd = color:red;
new = color:red;
oct = color:red;
ord = color:red;
pack = color:red;
pipe = color:red;
pop = color:red;
pos = color:red;
quotemeta = color:red;
read = color:red;
readdir = color:red;
readline = color:red;
readlink = color:red;
readpipe = color:red;
recv = color:red;
ref = color:red;
rename = color:red;
reset = color:red;
reverse = color:red;
rewinddir = color:red;
rindex = color:red;
rmdir = color:red;
scalar = color:red;
seek = color:red;
seekdir = color:red;
select = color:red;
semctl = color:red;
semget = color:red;
semop = color:red;
send = color:red;
setgrent = color:red;
sethostent = color:red;
setnetent = color:red;
setpgrp = color:red;
setpriority = color:red;
setprotoent = color:red;
setpwent = color:red;
setservent = color:red;
setsockopt = color:red;
shmctl = color:red;
shmget = color:red;
shmread = color:red;
shmwrite = color:red;
shutdown = color:red;
sin = color:red;
sleep = color:red;
socket = color:red;
socketpair = color:red;
sort = color:red;
sprintf = color:red;
printf = color:red;
sqrt = color:red;
srand = color:red;
rand = color:red;
stat = color:red;
shift = color:red;
study = color:red;
substr = color:red;
symlink = color:red;
syscall = color:red;
sysread = color:red;
system = color:red;
syswrite = color:red;
tell = color:red;
telldir = color:red;
time = color:red;
times = color:red;
truncate = color:red;
uc = color:red;
ucfirst = color:red;
umask = color:red;
undef = color:red;
unlink = color:red;
unpack = color:red;
unshift = color:red;
untie = color:red;
utime = color:red;
vec = color:red;
wait = color:red;
waitpid = color:red;
wantarray = color:red;
write = color:red;
[Token::Attribute]
*default = color:black;
[Token::Comment]
*default = color:green;
[Token::Label]
*default = color:black;
[Token::Pod]
*default = color:silver;
[Token::Prototype]
*default = color:navy;
[Token::Regexp::Match]
*default = color:olive;
[Token::Regexp::Substitute]
*default = color:olive;
[Token::Regexp::Transliterate]
*default = color:olive;
[LineNumber]
*default = color:olive;
[Html]
header=<