code here #!/usr/bin/perl -wT # /home/sites/www.in-vacua.com/web/cgi-bin/Noumena1.pl -w use CGI::Carp qw(fatalsToBrowser); use strict; use CGI ':standard'; use lib '/.users/27/inv838/Template'; use LWP::Simple; use HTML::Parser; use vars qw($html); my $content; # Configurable variables for the script my %templates = (text => "text_output.html", url => "url_output.html"); # Initialise a new CGI object for parameter handling, etc. my $q = CGI->new; # Check to see if we have any input from the user. If so, # we go to process it. If not, we'll return a blank form if ($q->param('text')) { my $text = &process_text($q->param('text')); &output_template('text',$text); } elsif ($q->param('url')) { my $text = &process_url($q->param('url')); } else { print $q->redirect("/noumena.html"); } ## Subroutine Definitions # process_url: strip non-punctuation from html docs (harder) sub process_url { my ($url) = @_; my $content = get($url); #41 die "Couldn't get it!" unless defined $content; # Slightly ugly kludging to sort out internal document links # on sites that don't fully qualify (damn them all) if (!($url =~ m!^http://!)) { $url ="http://".$url; } $url =~ m!(http://(.*))/!; my $baseurl=$1 || $url; $content =~ s!href="/(.*)"!href="$baseurl/$1"!ig; $content =~ s!rel="/(.*)"!rel="$baseurl/$1"!ig; $content =~ s!src="/(.*)"!src="$baseurl/$1"!ig; # HTML::Parser is slightly odd - it uses a callback interface which throws # things back into this namespace. HTML::Parser->new(api_version => 3, handlers => [start => [\&_html_parser_tag, "text"], end => [\&_html_parser_tag, "text"], text => [\&_html_parser_text, "dtext"]], marked_sections => 1,)->parse($content); print $q->header; print $html; } # html_parser_text: handler to tell HTML::Parser what to do with text sections sub _html_parser_text { my ($text) = @_; $text =~ s!\w! !g; $html .= $text; } # html_parser_tag: handler to pass html tags unmolested back to HTML::Parser sub _html_parser_tag { my ($text) = @_; $html .= $text; } # output_template: use Template Toolkit to return data to the user sub output_template { my ($type, $text) = @_; print $q->header; my $template = Template->new; $template->process($templates{$type}, {text => $text}) || die $template->error(); }