Enclose paragraphs separated by blank lines; place <br /> before single newlines. Be smart about block elements like <blockquote>, e.g.:

I quote Lord Pilaf:

<blockquote>
Blah blah blah.

Blah!
</blockquote>

becomes

<p>I quote Lord Pilaf:</p>

<blockquote>
<p>Blah blah blah.</p>

<p>Blah!</p>
</blockquote>

my %is_approved_element = map { $_ => 1 } qw(a b big blockquote br cen +ter cite code dd div dl dt em font hr i img i ol p pre samp small span strong sub sup table td th tr tt u ul); my %is_block_element = map { $_ => 1 } qw(blockquote center div dl ol +pre table ul); my %is_raw_element = map { $_ => 1 } qw(dl ol pre table ul); my %is_empty_element = map { $_ => 1} qw(br img hr); sub render_text { my $str = shift; my $no_margin = shift; my $buf; my @tag_stack; my $raw; my $nl; my $in_p; $str =~ s/&(?!\#?[a-zA-Z0-9]+;)/&amp;/g; $str =~ s{<person>([^<>]+)</person>} {sprintf(qq|<a href="%s?%s">% +s</a>|, $script_name, user_id($1), $1)}gei; while ($str) { my $out; my $need_p = !$in_p && !$raw; if ($str =~ s/^([^<>\n]+)//) { $out = $1; for ($out) { my $in_a = grep { $_ eq 'a' } @tag_stack; unless ($in_a) { s{((?:http|https|ftp|mailto):\S+[a-zA-Z0-9/])} {<a href="$ +1">$1</a>}g; } } } elsif ($str =~ s/^<//) { if ($str =~ s/^(\/?)([a-zA-Z][a-zA-Z0-9\.-]*)(.*?)>//) { my $close = $1; my $name = lc $2; my $attributes = $3; if ($is_approved_element{$name}) { if ($is_block_element{$name}) { # A <p> tag produced by \n\n should go after a <blockquote +> tag, # not before it. $need_p = 0; if ($close && $in_p) { $buf .= '</p>'; $in_p = 0; } if ($is_raw_element{$name}) { if ($close) { $raw--; } else { $raw++; } } } if ($close) { # Catch a pending \n so it doesn't get translated to <br / +>. if ($nl) { $out .= "\n"; $nl = 0; } if ($tag_stack[0] eq $name) { $out .= "</$name>"; shift(@tag_stack); } } else { $out .= "<$name$attributes>"; unless ($is_empty_element{$name}) { unshift(@tag_stack, $name); $str =~ s/^(\n+)//; $out .= $1; } } } else { $out = "&lt;$close$name$attributes&gt;"; } } else { $out = '&lt;'; } } elsif ($str =~ s/^>//) { $out = '&gt;'; } elsif ($str =~ s/^(\n{2,})//) { if ($in_p) { $buf .= '</p>'; $in_p = 0; } $buf .= $1; } elsif ($str =~ s/^\n//) { if ($raw) { $buf .= "\n"; } else { $nl = 1; } } if ($out) { if ($nl) { $buf .= "<br />\n"; $nl = 0; } if ($need_p) { $buf .= '<p>'; $in_p = 1; } $buf .= $out; } } for my $name (@tag_stack) { $buf .= "</$name>"; } if ($in_p) { $buf .= '</p>'; } if ($no_margin) { my $first_p = index($buf, '<p>'); my $last_p = rindex($buf, '<p>'); if ($first_p == $last_p) { substr($buf, $first_p + 2, 0) = q| style="margin: 0"|; } else { if ($last_p >= 0) { substr($buf, $last_p + 2, 0) = q| style="margin-bottom: 0"|; } if ($first_p >= 0) { substr($buf, $first_p + 2, 0) = q| style="margin-top: 0"|; } } } return $buf; }

Replies are listed 'Best First'.
Re: Text to XHTML
by Aristotle (Chancellor) on Sep 01, 2002 at 20:39 UTC
    You may want to have a look at HTML::FromText.

    Makeshifts last the longest.

Re: Text to XHTML
by widget (Initiate) on Sep 13, 2002 at 14:20 UTC