package PNSearch; use strict; use warnings FATAL => qw(all); use CGIutil; our $Log = "PNSearch.log"; $SIG{__WARN__} = \&logwarn; sub logwarn { CGIutil->logger($Log,shift) } sub new { # represents a note my $self = {}; (my $fname, $self->{terms}, my $ln, my $dbh) = (pop,pop,pop,pop); my $cur = 0; while (<$dbh>) { next unless (++$cur == $ln); $_ =~ s/^([^>]+?)<\|>(.*?)\((.*?)\)\s*<\|>//; if (!defined $1) { CGIutil->logger($Log, "No href defined: $fname\n$_\n\n"); return undef; } my $href = $1; if (!defined $2) { $self->{date} = "—"; $self->{title} = "[no title]"; } else { $self->{date} = $2; if (!defined $3) { $self->{title} = "[no title]" } else { $self->{title} = $3 }; } $self->{href} = ""; $self->{body} = $_; last; } bless($self); } sub hilight { (my $self, my $term) = (shift,shift); my @left = split /{body}; foreach (@left) { # @right halves each elem of @left my @right = split />/,$_; next if ($#right < 1); # no half = $right[1] =~ s/($term)/$1<\/em>/g; $_ = join(">",@right); } $self->{body} = join("<",@left); $self->{title} =~ s/($term)/$1<\/em>/g; } 1; #### my @Notes; # array of PNSearch objects foreach my $file (@Files) { next unless (-f "$DBDir/$file" && !-z "$DBDir/$file"); # scan text only database # each file represents one .html page, each line represents one whole note unless (open(DB, "<$DBDir/$file")) { CGIutil->logger($Log,"!!Could not open $DBDir/$file: $!"); next; } my @lines = (); # array of arrays, 0 = line number 1 = terms found: qv. checkline() below my $ln = 1; while () { my @found = ($ln,checkline($_)); push @lines, \@found if $found[1]; $ln++; } close(DB); # pull selected notes from markup database my $cur = 0; # last line in db my $MUH; unless (open($MUH, "<$DBDir/markup/$file")) { CGIutil->logger($Log,"!!Can't open /markup/$file: $!"); next; } foreach my $l (@lines) { my $pns = PNSearch->new($MUH, $l->[0]-$cur, $l->[1], $file); push @Notes, $pns if ($pns); $cur = $l->[0]; } close($MUH); } sub checkline { my $line = pop; my $c = 0; foreach (@Terms) { $c += 1 if ($line =~ /$Pfix<\|>.*?$_/); # anchor name is before first <|> (don't search that) } # nb: return value is the number of terms found, not the number of individual hits # ie, if there is only one search term, this will be 0 or 1 return $c; } #### 30 April 2008 (Possession of <|>30 April 2008 (Possession of "extreme pornography") <|>SNIP 29 April 2008 (Labor Department and whistleblower law)<|>29 April 2008 (Labor Department and whistleblower law) <|>SNIP 29 April 2008 (Dalit woman refused treatment and dies)<|>29 April 2008 (Dalit woman refused treatment and dies) <|>SNIP 29 April 2008 (Veterans and suicide)<|>29 April 2008 (Veterans and suicide) <|>SNIP 28 April 2008 (Cluster bombs in Iraq)<|>28 April 2008 (Cluster bombs in Iraq)<|>SNIP