ayob has asked for the wisdom of the Perl Monks concerning the following question:

this coding convert the file html2text. so, i want to know how this coding run in CGI to make web application?
#!/usr/bin/perl # $Header: Penyemakan Ejaan (html2text) $ use integer; use HTML::Parser; use CGI; my $parser = HTML::Parser->new('api_version' => 3, 'start_h' => [ \&start, "tagname, attr" ], 'end_h' => [ \&end, 'tagname' ], 'text_h' => [ \&text, 'dtext' ]);
# stack is a stack of $doc values my @stack; # $count is used to count section numbers within the document # (each section being defined by an anchor) my $count; # $doc accumulates the text of the document. It might be a string, or # an array ref containing subsiduary $doc values or further array refs # which contain a tag and a $doc value. my $doc; my $debug = 0; my $width = 76; my %names; # FILE OUTPUT open (Out, '/home/ayob/temp/test.txt') || die "Fail tidak ada\n"; @file = <Out>; $file = @file; close Out; open (In, '>/home/ayob/projekmajor/cc') || die "File does not exists\n +"; for ($n = 0; $n <= $file; $n++) { $_ = $file[$n]; print In $_; } $n--; print In "\n$n.\t$operator-$nombor\t$nama"; close In; # restore the previous context, and return the old one sub popit { my $d = $doc; $doc = pop(@stack); print In "POP[$#stack]: doc-> {$doc}\n" if $debug; return $d; } # handle a start tag sub start { my ($tag, $attrs) = @_; return if $tag eq "ul"; if($tag ne "a") { if(!defined $doc) { $doc = []; } } print In "<$tag>\n" if $debug; print In map(" $_=$$attrs{$_}\n", keys %$attrs) if $debug; if($tag eq "h1" || $tag eq "h2" || $tag eq "h3" || $tag eq "p" || $tag eq "cite" || $tag eq "pre" || $tag eq "li") { push(@stack, $doc); $doc = ""; print In "NEW[$#stack]: doc-> {$doc}\n" if $debug; } elsif($tag eq "table" || $tag eq "tr" || $tag eq "td" || $tag eq "th") { push(@stack, $doc); if($tag eq "td" || $tag eq "th") { undef $doc; print In "NEW[$#stack]: doc-> {undef}\n" if $debug; } else { $doc = []; print In "NEW[$#stack]: doc-> {$doc}\n" if $debug; } } elsif($tag eq "a") { if(exists $$attrs{'href'} && $$attrs{'href'} =~ /^\#/) { my $name = $$attrs{'href'}; $name =~ s/^\#//; if(!exists $names{$name}) { $names{$name} = $count++; } text("[" . $names{$name} . "] "); } elsif(exists $$attrs{'name'}) { my $name = $$attrs{'name'}; if(exists $names{$name}) { text("[" . $names{$name} . "] "); } } } elsif($tag eq "em") { text("_"); } } # handle an end tag sub end { my ($tag) = @_; print In "</$tag>\n" if $debug; if($tag eq "h1" || $tag eq "h2" || $tag eq "h3" || $tag eq "p" || $tag eq "cite" || $tag eq "pre" || $tag eq "li") { my $d = popit; push (@$doc, [$tag, $d]); } elsif($tag eq "em") { text("_"); } elsif($tag eq "table") { my $table = popit; my $width = 0; for my $row (@$table) { my $thiswidth = $#$row + 1; $width = $thiswidth if($thiswidth > $width); } push(@$doc, ["table", $table, $width]); } elsif($tag eq "tr") { # XXX rowspan my $row = popit; push(@$doc, $row); } elsif($tag eq "td" || $tag eq "th") { # XXX colspan if(!defined $doc) { $doc = ""; } my $cell = popit; push(@$doc, $cell); } } # handle text sub text { my ($text) = @_; if(!defined $doc) { $doc = ""; } if(!ref $doc) { $doc .= $text; print In "TXT[$#stack]: doc-> {$doc}\n" if $debug; } } sub format ($$) { my ($doc, $width) = @_; my @ret = (); if(!ref $doc) { return &format([["p", $doc]], $width); } for my $e (@$doc) { my ($type, $value, $cols) = @$e; if($type eq "h1") { $value =~ s/\s+/ /; $value =~ s/^ //; $value =~ s/ $//; my $l = length $value; push(@ret, "") if($#ret >= 0 && $ret[$#ret] ne ""); my $padding = ($l < $width ? (" " x (($width - $l) / 2)) : ""); push(@ret, $padding . $value); push(@ret, $padding . ("-" x $l)); } elsif($type eq "p" || $type eq "cite" || $type eq "h2" || $type eq "h3" || $type eq "li") { my @s = split(/\s+/, $value); my @break = ($type eq "p" || $type eq "li") ? ("") : ("", ""); push(@ret, @break) if($#ret >= 0 && $ret[$#ret] ne ""); my $x; my $line; my $prefix = ""; if($type eq "li") { $prefix = " "; $line = " *"; $x = 2; } for my $word (@s) { if(defined $line && $x + 1 + length $word > $width) { if(defined $line) { push(@ret, $line); push(@ret, '-' x length $line) if($type eq 'h2'); } undef $line; } if(!defined $line) { $line = $prefix; $x = length $prefix; } if($x != 0) { $line .= " "; ++$x; } $line .= "$word"; $x += length $word; } if(defined $line) { push(@ret, $line); push(@ret, '-' x length $line) if($type eq 'h2'); } } elsif($type eq "pre") { push(@ret, "") if($#ret >= 0 && $ret[$#ret] ne ""); push(@ret, split(/\n/, $value)); } elsif($type eq "table") { push(@ret, "") if($#ret >= 0 && $ret[$#ret] ne ""); # for each row, fit into 1/N of the space available # XXX do something more sophisticated my $colwidth = ($width - 1) / $cols - 1; # this will be the maximum column width my @max; for my $try (1, 2) { my $divider; my $total; if($try == 2) { $total = -1; for my $w (@max) { $total += $w + 1; } push(@ret, "," . ("-" x $total) . "."); $divider = "+"; for my $w (@max) { $divider .= "-" x $w; $divider .= "+"; } } my $first = 1; for my $row (@$value) { if ($first) { $first = 0; } elsif($try == 2) { push(@ret, $divider); } # format each cell my @formatted = (); for my $cell (@$row) { my @f = &format($cell, $colwidth); push(@formatted, \@f); } # pick apart the formatted cells row by row, and reassemble my $r = 0; for (;;) { my $line = "|"; my $keepgoing = 0; my $c = 0; for my $cell (@formatted) { my $part; if ($r <= $#$cell) { $part = $$cell[$r]; $keepgoing = 1; } else { $part = ""; } my $pl = length $part; if($try == 1) { ++$pl if($pl < $colwidth); if(!defined $max[$c] || $pl > $max[$c]) { $max[$c] = $pl; } } else { $part .= " " x ($max[$c] - $pl) if($pl < $max[$c]); $line .= "$part|"; } ++$c; } last if !$keepgoing; if($try == 2) { push(@ret, $line); } ++$r; } } if($try == 2) { push(@ret, "`" . ("-" x $total) . "'"); } } } } return @ret; } my $files = 0; while($#ARGV >= 0) { local $_ = shift; if(/^-/) { last if($_ eq "--"); if($_ eq "-debug") { ++$debug; } else { die "$0: unknown option '$_'\n"; } } else { $doc = []; $count = 1; %names = (); $parser->parse_file($_); (print STDOUT map("$_\n", &format($doc, $width))) or die "$0: writing to stdout: $!\n"; ++$files; } } if(!$files) { $doc = []; $count = 1; %names = (); $parser->parse_file(*STDIN); (print STDOUT map("$_\n", &format($doc, $width))) or die "$0: writing to stdout: $!\n"; } (close STDOUT) or die "$0: closing stdout: $!\n";

update (broquaint): added <readmore> tag

Replies are listed 'Best First'.
Re: how html2text converter run in CGI
by PodMaster (Abbot) on Jan 30, 2003 at 11:33 UTC
    Do you simply wish to make this work in a CGI environment, doing the same thing it does now, except dumping diagnostic info as a webpage(or text), or what?

    For info on CGI, `perldoc -q CGI':


    MJD says you can't just make shit up and expect the computer to know what you mean, retardo!
    ** The Third rule of perl club is a statement of fact: pod is sexy.