Many thanks poj. Got me across the line. Final version of my program appears below
#!/usr/local/bin/perl use strict; use warnings; use lib "$ENV{HOME}/mylib/lib/perl5"; use HTML::TreeBuilder; use LWP::Simple; # Program Name: top_speakers.pl # Author: XXXXX # Purpose: Parses the page http://perlcourse.ecorp.net/conf-mirror/ +conferences.oreillynet.com/speakers.html # and finds the speakers who had the most sessions and/or tutor +ials # Original code only found sessions or tutorials, adjusted code + to find Sessions, Tutorials, BOF's & Panels # to match expected output per project specification # Define debugging variable - set to positive integer to enable my $DEBUG_FLAG = 0; # Define variable that will contain the URL we will parse my $URL = 'http://perlcourse.ecorp.net/conf-mirror/conferences.oreilly +net.com/speakers.html'; # Define our tree using HTML::Treebuilder and parse the document my $tree = HTML::TreeBuilder->new; $tree->parse( get( $URL ) ); # Define our hash that will contain speaker names and their count my %speakers; # Define current speaker variable - used in find_speakers subroutine my $current_speaker; my @nodes = $tree->look_down( _tag => "a", \&find_speakers ); # If in debug mode, Print list of speaker and their total of Sessions +or Tutorials if ( $DEBUG_FLAG ) { foreach (sort keys %speakers) { print "$_ = ($speakers{$_})\n"; } } # Set a counter to limit our results, call our sorting routine to # sort in descending order (highest to lowest) and print results # Exit loop once we have 3 speakers displayed. # Technically if there are speakers with the same amount of speaking # engagements they should be weighted equally (equal third etc) but # this was not in the project requirements my $counter = 0; foreach my $key (sort hashValueDescending (keys(%speakers))) { print "$key\t($speakers{$key})\n"; $counter++; last if $counter == 3; } # Delete tree object to free up the memory (Best practice) $tree->delete; # find_speakers subroutine - finds speakers, adds their name to the %s +peakers hash # then looks for Sessions, Tutorials, BOFs or Panels that the speaker +is presenting # and adds those to the total for each speaker sub find_speakers { my ($element) = @_; my $parent = $element->parent; my $text = $element->as_text; # Check if tag is a 'span' as this was consistent for delineating th +e speakers # throughout the document if ($parent->tag eq 'span'){ print "Speaker = $text\n" if $DEBUG_FLAG; # add current speaker to the hash and initialize to zero # Note: We would need an alternative method if a speaker link appe +ared more than once $speakers{$text} = 0; # set current speaker $current_speaker = $text; } # Check if the parent tag is a bold element and if the text matche +s one # of our criteria - Session, Tutorial, BOF or Panel elsif ($parent->tag eq 'b' && $parent->as_text =~/(Session|Tutorial|BOF|Panel)/){ print "$1 = $text\n" if $DEBUG_FLAG; # add record to current speaker - set counter to current speaker c +ontents and increment by 1 # then assign to the $speaker hash my $count = $speakers{$current_speaker} + 1; $speakers{$current_speaker} = $count; } } # hashValueDescending subroutine - sorts the hash in descending numeri +cal order # from highest down to lowest sub hashValueDescending { $speakers{$b} <=> $speakers{$a}; }
In reply to Re^2: Parse HTML page for links and count by author
by Anonymous Monk
in thread Parse HTML page for links and count by author
by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |