##
sub format {
my $self = shift;
my $source = shift;
$previndent = 0;
$prevtype = 0;
@indentstack = (0);
@rulestack = (0); # prime the pump with the null ruleset
html();
foreach ( split("\n", $source) ) {
($indent, $type, $line, $info) = classify_line($_);
$indent = $previndent if $type == $BLANK;
if ( not maybe_new_scope() ) {
while ( maybe_exit_scope() ) {
}
maybe_new_scope();
}
maybe_emit_markup();
emit($self->markup($line));
$previndent = $indent;
$prevtype = $type;
}
exit_scopes();
my $html = html();
return cleanup_html($html);
}
####
sub classify_line { # return (indent, type, modified_line, info)
local $_ = shift;
s/^(\s*)$// && return (length($1), $BLANK, $_, undef);
s/^(\s*\*\s+)// && return (length($1), $BULLET, $_, undef);
s/^(\s*(\d+)\.\s+)// && return (length($1), $NUMBER, $_, $2);
s/^(\s*)// && return (length($1), $PLAIN, $_, undef);
die "classify_line:\n\t" . $_ . "\n";
}
####
sub maybe_new_scope {
# if the "enter new scope" rule for the current scope fires then
# find a scope whose "can enter" fires
# push the scope
# fire the "on enter" rule for the new scope
my $ruleset = $rulesets{$rulestack[-1]};
my $enter_rule = $ruleset->{maybe_push_scope};
if ( &$enter_rule() ) {
print "$rulestack[-1].enter_new_scope true\n" if $DEBUG;
foreach my $rulesetname ( keys %rulesets ) {
$ruleset = $rulesets{$rulesetname};
my $can_enter_rule = $ruleset->{can_enter};
if ( &$can_enter_rule() ) {
print "$rulesetname.can_enter true\n" if $DEBUG;
my $on_enter_rule = $ruleset->{on_enter};
&$on_enter_rule();
push @rulestack, $rulesetname;
push @indentstack, $indent;
}
}
}
return 0;
}
sub maybe_exit_scope {
# if the "exit scope" rule for the current scope fires then
# fire the "on exit" rule for the current scope
# (pop)
my $ruleset = $rulesets{$rulestack[-1]};
my $maybe_exit_rule = $ruleset->{maybe_exit_scope};
if ( &$maybe_exit_rule() ) {
print "$rulestack[-1].maybe_exit_scope true\n" if $DEBUG;
my $on_exit = $ruleset->{on_exit};
&$on_exit();
$prevtype = 0; # reprime the pump for maybe_emit_markup
pop @rulestack;
pop @indentstack;
return 1;
}
return 0;
}
####
my %rulesets = (
# The "null" ruleset exists to force us into an initial scope
0 => {
can_enter =>
sub { },
on_enter =>
sub { },
maybe_emit_markup =>
sub { },
maybe_push_scope =>
sub { 1 },
maybe_exit_scope =>
sub { },
on_exit =>
sub { }
},
# The "Plain" (P) ruleset handles normal, unindented paragraphs
P => {
can_enter =>
sub { $indent == 0 },
on_enter =>
sub { },
maybe_emit_markup =>
sub {
if ( $type == $BLANK ) {
emit("
\n") if $prevtype == $BLANK;
}
elsif ( $type == $PLAIN ) {
emit("") if $prevtype != $PLAIN;
}
},
maybe_push_scope =>
sub { $indent > 0 },
maybe_exit_scope =>
sub { 0 },
on_exit =>
sub { }
},
# The "BQ" (Blockquote) ruleset handles indented paragraphs
BQ => {
can_enter =>
sub {
$type == $PLAIN &&
$indent > $indentstack[-1] &&
($indent - $indentstack[-1]) < 8 # hack for
},
on_enter =>
sub {
# emit("") if $prevtype == $BLANK;
emit("
\n");
$prevtype = 0; # prime pump for maybe_emit_markup
},
maybe_emit_markup =>
sub {
if ( $type == $BLANK ) {
# emit("
\n") if $prevtype == $BLANK;
}
elsif ( $type == $PLAIN ) {
emit("") if $prevtype == $BLANK; #TBD && $prevtype != 0;
}
},
maybe_push_scope =>
sub { $indent > $indentstack[-1] }, #TBD fix me for
maybe_exit_scope =>
sub {
$type == $BULLET ||
$type == $NUMBER ||
$indent < $indentstack[-1]
},
on_exit =>
sub { emit("
\n") }
},
# The "UL" (Unordered List) ruleset handles bullet lists
UL => {
can_enter =>
sub { $type == $BULLET &&
$indent > $indentstack[-1] &&
($indent - $indentstack[-1]) < 8 # hack for
},
on_enter =>
sub { emit("\n") },
maybe_emit_markup =>
sub {
emit("- ") if $type == $BULLET;
if ( $type == $BLANK ) {
emit("
") unless $prevtype == $BLANK;
emit("
");
}
},
maybe_push_scope =>
sub { $indent > $indentstack[-1] },
maybe_exit_scope =>
sub {
$type == $NUMBER ||
$indent < $indentstack[-1]
},
on_exit =>
sub { emit("
\n") }
},
# The "OL" (Ordered List) ruleset handles numbered lists
OL => {
can_enter =>
sub { $type == $NUMBER &&
$indent > $indentstack[-1] &&
($indent - $indentstack[-1]) < 8 # hack for
},
on_enter =>
sub { emit("\n") },
maybe_emit_markup =>
sub {
emit("- ") if $type == $NUMBER;
if ( $type == $BLANK ) {
emit("
") unless $prevtype == $BLANK;
emit("
");
}
},
maybe_push_scope =>
sub { $indent > $indentstack[-1] },
maybe_exit_scope =>
sub {
$type == $BULLET ||
$indent < $indentstack[-1]
},
on_exit =>
sub { emit("
\n") }
},
# The "PRE" (Preformatted) ruleset handles preformatted text
PRE => {
can_enter =>
sub { $indent > $indentstack[-1] && $indent >= 8 }, #TBD hack
on_enter => sub { emit("") },
maybe_emit_markup =>
sub { emit(" " x ($indent - 8)) if $indent > 8 },
maybe_push_scope =>
sub { 0 },
maybe_exit_scope =>
sub { $indent < $indentstack[-1] },
on_exit =>
sub { emit("\n") },
}
);
####
# The HTML that we've generated may need to be cleaned up. By deferring
# cleanup, the algorithms above can be simpler.
#
sub cleanup_html {
my $html = shift;
$html =~ s|
\n||sg;
$html =~ s|
\n||sg;
$html =~ s|()|$1|g;
$html =~ s|
\n||sg;
$html =~ s|\n\n|\n
|sg;
return $html;
}