Gwalchmai has asked for the wisdom of the Perl Monks concerning the following question:
Please help.
#!/opt/perl5.8t/bin/perl ###################################################################### +########## # # # Program: distributedSearch.pl # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: June 10, 2003 # # # # Description: This script allows for the simulataneous searching of W +eb- # # based databases through the use of threads. The results are # # collected and displayed together. # # # # Updates: # # Date Name Description of update # # 08/04/03 JDB Added entry for European Fusion Development Agree +ment # # (EFDA). # # 08/06/03 JDB Added the following 5 databases located at the # # University of Manchester Institute of Science and # # Technology: # # Astro Physics # # Atmospheric Physics # # Condensed Matter # # Plasma Physics # # Theoretical Physics # # 08/15/03 JDB Added entries for Advanced Knowledge Technologies + and # # Australian National University # # # ###################################################################### +########## use threads; use threads::shared; use CGI qw/:standard/; # Arrays containing search key words. @title = param('keywords'); @abstract = param('keywords'); @author = qw( ); # Hash of search terms my %searchTerms = ( 'title' => [@title], 'abstract' => [@abstract], 'author' => [@author], ); # Maximum number of hits to return from any one site my $maxHits = param('hits'); # Array used by all threads to store hits my @searchResults : shared = undef; # Create threads $thr1 = threads->new( \&citeSeer ) if param('CiteSeer'); $thr2 = threads->new( \&spiresHEP ) if param('SLAC SPIRES-HEP'); $thr3 = threads->new( \&sfb288Preprints ) if param('SFB 288 Preprints'); $thr4 = threads->new( \&chalmers ) if param('Chalmers University of Technology'); $thr5 = threads->new( \&cleo ) if param('CLEO'); $thr6 = threads->new( \&efda ) if param('European Fusion Development Agreement'); $thr7 = threads->new( \&umist_AstroPhysics ) if param('UMIST - Astro Physics'); $thr8 = threads->new( \&umist_AtmosphericPhysics ) if param('UMIST - Atmospheric Physics'); $thr9 = threads->new( \&umist_CondensedMatter ) if param('UMIST - Condensed Matter'); $thr10 = threads->new( \&umist_PlasmaPhysics ) if param('UMIST - Plasma Physics'); $thr11 = threads->new( \&umist_TheoreticalPhysics ) if param('UMIST - Theoretical Physics'); $thr12 = threads->new( \&advancedKnowledgeTechnologies ) if param('Advanced Knowledge Technologies'); $thr13 = threads->new( \&australianNationalUniversity ) if param('Australian National University'); # Do not end program until all threads have finished $thr1->join() if param('CiteSeer'); $thr2->join() if param('SLAC SPIRES-HEP'); $thr3->join() if param('SFB 288 Preprints'); $thr4->join() if param('Chalmers University of Technology'); $thr5->join() if param('CLEO'); $thr6->join() if param('European Fusion Development Agreement'); $thr7->join() if param('UMIST - Astro Physics'); $thr8->join() if param('UMIST - Atmospheric Physics'); $thr9->join() if param('UMIST - Condensed Matter'); $thr10->join() if param('UMIST - Plasma Physics'); $thr11->join() if param('UMIST - Theoretical Physics'); $thr12->join() if param('Advanced Knowledge Technologies'); $thr13->join() if param('Australian National University'); # Display results + + &displayResults; ###################################################################### +########## # # # Subroutine: displayResults # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: June 10, 2003 # # # # Description: This subroutine displays the results returned by the th +reads. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub displayResults { my $reportNo; my $title; my $author; my $publisher; my $source; my $keywords = param('keywords'); @searchResults = rank(@searchResults); print header, start_html( -title => 'Distributed Search Results', -background => 'images/happyface.gif' ), h1("Distributed Search Results"), h3("Search term(s): $keywords" +); foreach (@searchResults) { next unless ($_); ( $reportNo, $title, $author, $publisher, $source ) = split (/ +\t/); # Display print "Report No: ", $reportNo, "<br>" if ($reportNo); print "Title: ", $title, "<br>" if ($title); print "Author: ", $author, "<br>" if ($author); print "Publisher: ", $publisher, "<br>" if ($publisher); print "Source: ", $source, "<br><br>"; } print hr, a( { -href => './search.pl' }, "Return to search page." +), end_html; } ###################################################################### +########## # # # Subroutine: citeSeer # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: June 10, 2003 # # # # Description: This subroutine uses the individual site module to form +ulate a # # site specific search request, connect to and search the site, # # collect and return data. The subroutine placed the returned # # data in the shared array @searchResults. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub citeSeer { use CiteSeer; CiteSeer::search( $maxHits, %searchTerms ); my @results = CiteSeer::getResults(); lock(@searchResults); push ( @searchResults, @results ); } ###################################################################### +########## # # # Subroutine: spiresHEP # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: June 10, 2003 # # # # Description: This subroutine uses the individual site module to form +ulate a # # site specific search request, connect to and search the site, # # collect and return data. The subroutine placed the returned # # data in the shared array @searchResults. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub spiresHEP { use SpiresHEP; SpiresHEP::search( $maxHits, %searchTerms ); my @results = SpiresHEP::getResults(); lock(@searchResults); push ( @searchResults, @results ); } ###################################################################### +########## # # # Subroutine: sfb288Preprints # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: June 10, 2003 # # # # Description: This subroutine uses the individual site module to form +ulate a # # site specific search request, connect to and search the site, # # collect and return data. The subroutine placed the returned # # data in the shared array @searchResults. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub sfb288Preprints { use SFB288Preprints; SFB288Preprints::search( $maxHits, %searchTerms ); my @results = SFB288Preprints::getResults(); lock(@searchResults); push ( @searchResults, @results ); } ###################################################################### +########## # # # Subroutine: chalmers # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: June 10, 2003 # # # # Description: This subroutine uses the individual site module to form +ulate a # # site specific search request, connect to and search the site, # # collect and return data. The subroutine placed the returned # # data in the shared array @searchResults. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub chalmers { use Chalmers; Chalmers::search( $maxHits, %searchTerms ); my @results = Chalmers::getResults(); lock(@searchResults); push ( @searchResults, @results ); } ###################################################################### +########## # # # Subroutine: cleo # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: June 10, 2003 # # # # Description: This subroutine uses the individual site module to form +ulate a # # site specific search request, connect to and search the site, # # collect and return data. The subroutine placed the returned # # data in the shared array @searchResults. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub cleo { use CLEO; CLEO::search( $maxHits, %searchTerms ); my @results = CLEO::getResults(); lock(@searchResults); push ( @searchResults, @results ); } ###################################################################### +########## # # # Subroutine: efda # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: August 4, 2003 # # # # Description: This subroutine uses the individual site module to form +ulate a # # site specific search request, connect to and search the site, # # collect and return data. The subroutine placed the returned # # data in the shared array @searchResults. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub efda { use EFDA; EFDA::search( $maxHits, %searchTerms ); my @results = EFDA::getResults(); lock(@searchResults); push ( @searchResults, @results ); } ###################################################################### +########## # # # Subroutine: umist_AstroPhysics # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: August 6, 2003 # # # # Description: This subroutine uses the individual site module to form +ulate a # # site specific search request, connect to and search the site, # # collect and return data. The subroutine placed the returned # # data in the shared array @searchResults. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub umist_AstroPhysics { use UMIST_AstroPhysics; UMIST_AstroPhysics::search( $maxHits, %searchTerms ); my @results = UMIST_AstroPhysics::getResults(); lock(@searchResults); push ( @searchResults, @results ); } ###################################################################### +########## # # # Subroutine: umist_AtmosphericPhysics # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: August 6, 2003 # # # # Description: This subroutine uses the individual site module to form +ulate a # # site specific search request, connect to and search the site, # # collect and return data. The subroutine placed the returned # # data in the shared array @searchResults. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub umist_AtmosphericPhysics { use UMIST_AtmosphericPhysics; UMIST_AtmosphericPhysics::search( $maxHits, %searchTerms ); my @results = UMIST_AtmosphericPhysics::getResults(); lock(@searchResults); push ( @searchResults, @results ); } ###################################################################### +########## # # # Subroutine: umist_CondensedMatter # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: August 6, 2003 # # # # Description: This subroutine uses the individual site module to form +ulate a # # site specific search request, connect to and search the site, # # collect and return data. The subroutine placed the returned # # data in the shared array @searchResults. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub umist_CondensedMatter { use UMIST_CondensedMatter; UMIST_CondensedMatter::search( $maxHits, %searchTerms ); my @results = UMIST_CondensedMatter::getResults(); lock(@searchResults); push ( @searchResults, @results ); } ###################################################################### +########## # # # Subroutine: umist_PlasmaPhysics # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: August 6, 2003 # # # # Description: This subroutine uses the individual site module to form +ulate a # # site specific search request, connect to and search the site, # # collect and return data. The subroutine placed the returned # # data in the shared array @searchResults. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub umist_PlasmaPhysics { use UMIST_PlasmaPhysics; UMIST_PlasmaPhysics::search( $maxHits, %searchTerms ); my @results = UMIST_PlasmaPhysics::getResults(); lock(@searchResults); push ( @searchResults, @results ); } ###################################################################### +########## # # # Subroutine: umist_TheoreticalPhysics # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: August 6, 2003 # # # # Description: This subroutine uses the individual site module to form +ulate a # # site specific search request, connect to and search the site, # # collect and return data. The subroutine placed the returned # # data in the shared array @searchResults. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub umist_TheoreticalPhysics { use UMIST_TheoreticalPhysics; UMIST_TheoreticalPhysics::search( $maxHits, %searchTerms ); my @results = UMIST_TheoreticalPhysics::getResults(); lock(@searchResults); push ( @searchResults, @results ); } ###################################################################### +########## # # # Subroutine: advancedKnowledgeTechnologies # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: August 15, 2003 # # # # Description: This subroutine uses the individual site module to form +ulate a # # site specific search request, connect to and search the site, # # collect and return data. The subroutine placed the returned # # data in the shared array @searchResults. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub advancedKnowledgeTechnologies { use AdvancedKnowledgeTechnologies; AdvancedKnowledgeTechnologies::search( $maxHits, %searchTerms ); my @results = AdvancedKnowledgeTechnologies::getResults(); lock(@searchResults); push ( @searchResults, @results ); } ###################################################################### +########## # # # Subroutine: australianNationalUniversity # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: August 15, 2003 # # # # Description: This subroutine uses the individual site module to form +ulate a # # site specific search request, connect to and search the site, # # collect and return data. The subroutine placed the returned # # data in the shared array @searchResults. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub australianNationalUniversity { use AustralianNationalUniversity; AustralianNationalUniversity::search( $maxHits, %searchTerms ); my @results = AustralianNationalUniversity::getResults(); lock(@searchResults); push ( @searchResults, @results ); } ###################################################################### +########## # # # Subroutine: rank # # # # Programmer: Doug Bales # # # # Language: Perl 5.8 # # # # Version: 1.0.0 # # # # Release Date: June 12, 2003 # # # # Description: This subroutine takes an array containing tab delimited + data # # and performs a reverse sort on that data. An array containing # # the sorted data is returned. # # # # Updates: # # Date Name Description of update # # # ###################################################################### +########## sub rank { my (@origArray) = @_; my @keywords = param('keywords'); my $keyword = ""; my @tempArray = undef; my @newArray = undef; my %hash = undef; my $temp = ""; my $score = 0; my $count = 0; foreach my $data (@origArray) { # Increment count $count++; # Reset score value to zero $score = 0; # Loop for each keyword foreach $keyword (@keywords) { # Score value for exact word matches $score += wordCount( $keyword, $data ) * .1; # Score value for plural word matches $temp = $keyword . "s"; $score += wordCount( $temp, $data ) * .01; $temp = $keyword . "es"; $score += wordCount( $temp, $data ) * .01; $temp = $keyword . "ies"; $score += wordCount( $temp, $data ) * .01; # Scored value for past or action matches $temp = $keyword . "ed"; $score += wordCount( $temp, $data ) * .001; $temp = $keyword . "ing"; $score += wordCount( $temp, $data ) * .001; } # Score value for placement in returned list $score *= ( 1 - ( $count * .0001 ) ); # Populate temp array with score and original information $data = $score . "\t" . $data; push ( @tempArray, $data ); } # Sort temp array @tempArray = reverse( sort (@tempArray) ); # Remove scores from sorted data foreach (@tempArray) { m/\A.*?\t(.*)/; push ( @newArray, $1 ) unless ( $1 =~ /\A\s*?\Z/ ); } return (@newArray); } sub wordCount { my ( $word, $data ) = @_; my $num = 0; while ( $data =~ m/\b$word\b/ig ) { $num++; } return ($num); }
update (broquaint): added <readmore> + <code> tags and ran through perltidy
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Question about threads
by liz (Monsignor) on Aug 26, 2003 at 13:46 UTC | |
|
Re: Question about threads
by jeffa (Bishop) on Aug 26, 2003 at 14:17 UTC | |
|
Re: Question about threads
by benn (Vicar) on Aug 26, 2003 at 14:33 UTC |