comment on

Sorry as I said new... like today ;) here is the whole thing still working on the validation of tags etc but the code is untouched other than the encoded ampersands..

&parse_form;
print "Content-Type: text/html\n\n";
if(!open(FL, $page_model))
{print "Couldn't open page model\n"; exit(1);}
read FL,$model,-s $page_model;
close FL;
($model_top, $model_bot) = split /#####/, $model;
$extra = '';
$title = '';
@words = ();
if(exists $form{'words'})
{ $search_words = $form{'words'};
if($form{'wt'} eq 'be') {$search_wb = ' checked'; $search_ew = '';}
else {$search_wb = ''; $search_ew = ' checked';}
if($form{'bl'} eq 'an') {$search_bAND = ' checked'; $search_bOR = ''; 
+$search_bPHR = '';}
elsif($form{'bl'} eq 'ph') {$search_bAND = ''; $search_bOR = ''; $sear
+ch_bPHR = ' checked';}
else {$search_bAND = ''; $search_bOR = ' checked'; $search_bPHR = '';}
$wl = lc $search_words;
$wl =~ tr/a-z0-9/ /c;
$wl =~ s/(\A\s+)|(\s+\Z)//g;
@words = split /\s+/, $wl;
if($wl eq '' || $#words < 0)
{ $extra = "<font color=red>Please enter some words in the search box.
+</font><br>";
@words = ();
} else
{ $title = join ' ', 'Search results for', @words;
} $search_q = $ENV{'QUERY_STRING'};
$search_q =~ s/\&amp;pg=\d+//;
} else
{ $search_words = '';
$search_wb = ' checked';
$search_ew = '';
$search_bAND = '';
$search_bOR = ' checked';
$search_bPHR = '';
} if($title ne '') {$model_top =~ s~<title>(.+?)</title>~<title>$title
+</title>~si;}
print $model_top;
if($#words >= 0)
{ %pages = ();
$go = 1;
for($word = 0; $word <= $#words && $go; $word++)
{ ($i,$gs) = split //, $words[$word], 3;
$g = $grains[ord($gs)];
if(open(FL, "$index_loc/$i.$g"))
{ $found = 0;
$pat = "\\A$words[$word]";
if($search_wb eq '')
{ $pat .= '\\s';    
} %pages2 = ();
while(<FL>)
{ if(m/$pat/)
{ $found = 1;
($wd, $pg) = split / /, $_, 2;
@vp = split /#/, $pg;
$mul = ($#words + 1) - $word;
if($word > 0 && $search_bOR eq '')
{ for(@vp)
{ /\A(.+):(\d+?)\Z/;
if(exists $pages{"I$1"})
{ $pages2{"I$1"} = $pages{"I$1"} * ($2 * $mul);                       
+         
} } } else
{ for(@vp)
{ /\A(.+):(\d+?)\Z/;
if(exists $pages{"I$1"})
{ $pages{"I$1"} *= ($2 * $mul);
} else
{ $pages{"I$1"} = ($2 * $mul);
} } } } } if($word > 0 && $search_bOR eq '')
{ %pages = %pages2;
%pages2 = ();
} close FL;
} else
{ if($search_bAND ne '' || $search_bPHR ne '')
{ $pages = ();
$go = 0;        
} } } if($search_bPHR ne '' && $#words > 0)
{ if($search_wb ne '')
{ $pat = '\b' . join('\w*?\s+', @words);
} else
{ $pat = '\b' . join('\s+', @words) . '\b';
} @ps = keys %pages;
for $pg (@ps)
{ if($pg =~ m/I(.+)/)
{ if(open FL, "$docroot_disc/$1")
{ read FL,$pt,-s "$docroot_disc/$1";
close FL;
$pt =~ s/<.+?>//gs;
if(!($pt =~ m/$pat/gis))
{ delete $pages{$pg};
} } else
{ delete $pages{$pg};
} } } } @pages = sort {$pages{$b} <=> $pages{$a}} keys %pages;
$found = $#pages + 1;
$page_begin = 0;
$this_page = 0;
if($res_per_page != -1)
{ $end = ($res_per_page - 1);
if(exists $form{'pg'})
{ $this_page = $form{'pg'};
$this_page =~ tr/0-9//cd;
if($this_page eq '') {$this_page = 0}
$page_begin = $this_page * $res_per_page;
$end = ($page_begin + $res_per_page - 1);
} if($end > $#pages) {$end = $#pages}
@pages = @pages[$page_begin .. $end];
} $shown = $#pages + 1;
if($high_results)
{ $disp_root = "$page_script?$search_q&amp;d=";
} else
{ $disp_root = $docroot_web;
} if($#pages >= 0)
{ print "<p><strong>Search results ($found found, $shown shown)</stron
+g></p>\n";
$n = $page_begin + 1;
for $pg (@pages)
{ if($pg =~ m/(\w)(.+)/)
{ if($1 eq 'I')
{ $fn = $2;
if(open FL, "$docroot_disc/$fn")
{ read FL,$pt,1024;
close FL;
if($pt =~ m~<title>(.+?)</title>~gis)
{$title = $1}
else {$title = '(no title)'}
print qq~$n - <p><a href="$disp_root$fn"$link_target>$title</a></p>\n~
+;
} } else
{ print "(result type not known)<br />\n";
} } $n++;
} print "\n";
} else
{ print "<strong>No matching pages found.</strong></p>\n";
} if($res_per_page != -1)
{ if($found != $shown)
{ $pages = int(($found / $res_per_page) + 1);
print "<i>$pages pages</i> : <tt>\n";
if($this_page != 0)
{ $p = $this_page - 1;
print qq~ <p><a href="$search_script?$search_q&amp;pg=$p">&lt;</a></p>
+ ~;
} else
{ print "&lt; ";
} for($p = 0; $p < $pages; $p++)
{ $n = $p+1;
if($p == $this_page)
{ print " <strong>$n</strong> ";
} else
{ print qq~ <p><a href="$search_script?$search_q&amp;pg=$p">$n</a></p>
+ ~;
} } if($this_page != ($pages - 1))
{ $p = $this_page + 1;
print qq~ <p><a href="$search_script?$search_q&amp;pg=$p">&gt;</a></p>
+</tt>\n~;
} else
{ print " &gt;</tt>\n";
} } else
{ print "1 page of results only\n";
} } } print <<__EOF;
<form method="get" action="$search_script">
$extra
Search for:<br />
<input type=text name="words" value="$search_words" /><input type=subm
+it value="Search" /><br />
(put most important words first)<br />
Find: <input type=radio name="wt" value="be"$search_wb />Words beginni
+ng
<input type=radio name="wt" value="ew"$search_ew />Entire words<br>
Require: <input type=radio name="bl" value="an"$search_bAND />All word
+s
<input type=radio name="bl" value="or"$search_bOR />Any word
<input type=radio name="bl" value="ph"$search_bPHR />Phrase<br />
</form>
__EOF
print $model_bot;
exit(0);
sub parse_form
{ %form = ();
my $buffer;
if($ENV{'REQUEST_METHOD'} eq 'POST')
{ read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
} else
{ $buffer = $ENV{'QUERY_STRING'};
} @pairs = split(/&amp;/, $buffer);
foreach $pair (@pairs)
{ ($name, $value) = split(/=/, $pair);
$value =~ tr/+/ /;
$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$form{$name} = $value;
} }
[download]

astroboy! You may have pointed me to the right place but it all went over my head sorry hope the rest of the code helps. For all I know the code may be way heavier than it needs to be, but it works fine just wont validate as XHTML 1.0 strict. Again thanks for your time! Nathan.

Janitored by Arunbear - added readmore tags

In reply to Re^2: Parsing un-encoded ampersand in XHTML by Anonymous Monk
in thread Parsing un-encoded ampersand in XHTML by Anonymous Monk

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.