#!/usr/local/bin/perl
use strict;
use warnings;
use lib "$ENV{HOME}/mylib/lib/perl5";
use HTML::TreeBuilder;
use LWP::Simple;
# Program Name: top_speakers.pl
# Author: XXXXX
# Purpose: Parses the page http://perlcourse.ecorp.net/conf-mirror/
+conferences.oreillynet.com/speakers.html
# and finds the speakers who had the most sessions and/or tutor
+ials
# Original code only found sessions or tutorials, adjusted code
+ to find Sessions, Tutorials, BOF's & Panels
# to match expected output per project specification
# Define debugging variable - set to positive integer to enable
my $DEBUG_FLAG = 0;
# Define variable that will contain the URL we will parse
my $URL = 'http://perlcourse.ecorp.net/conf-mirror/conferences.oreilly
+net.com/speakers.html';
# Define our tree using HTML::Treebuilder and parse the document
my $tree = HTML::TreeBuilder->new;
$tree->parse( get( $URL ) );
# Define our hash that will contain speaker names and their count
my %speakers;
# Define current speaker variable - used in find_speakers subroutine
my $current_speaker;
my @nodes = $tree->look_down( _tag => "a", \&find_speakers );
# If in debug mode, Print list of speaker and their total of Sessions
+or Tutorials
if ( $DEBUG_FLAG )
{
foreach (sort keys %speakers)
{
print "$_ = ($speakers{$_})\n";
}
}
# Set a counter to limit our results, call our sorting routine to
# sort in descending order (highest to lowest) and print results
# Exit loop once we have 3 speakers displayed.
# Technically if there are speakers with the same amount of speaking
# engagements they should be weighted equally (equal third etc) but
# this was not in the project requirements
my $counter = 0;
foreach my $key (sort hashValueDescending (keys(%speakers))) {
print "$key\t($speakers{$key})\n";
$counter++;
last if $counter == 3;
}
# Delete tree object to free up the memory (Best practice)
$tree->delete;
# find_speakers subroutine - finds speakers, adds their name to the %s
+peakers hash
# then looks for Sessions, Tutorials, BOFs or Panels that the speaker
+is presenting
# and adds those to the total for each speaker
sub find_speakers {
my ($element) = @_;
my $parent = $element->parent;
my $text = $element->as_text;
# Check if tag is a 'span' as this was consistent for delineating th
+e speakers
# throughout the document
if ($parent->tag eq 'span'){
print "Speaker = $text\n" if $DEBUG_FLAG;
# add current speaker to the hash and initialize to zero
# Note: We would need an alternative method if a speaker link appe
+ared more than once
$speakers{$text} = 0;
# set current speaker
$current_speaker = $text;
}
# Check if the parent tag is a bold element and if the text matche
+s one
# of our criteria - Session, Tutorial, BOF or Panel
elsif ($parent->tag eq 'b'
&& $parent->as_text =~/(Session|Tutorial|BOF|Panel)/){
print "$1 = $text\n" if $DEBUG_FLAG;
# add record to current speaker - set counter to current speaker c
+ontents and increment by 1
# then assign to the $speaker hash
my $count = $speakers{$current_speaker} + 1;
$speakers{$current_speaker} = $count;
}
}
# hashValueDescending subroutine - sorts the hash in descending numeri
+cal order
# from highest down to lowest
sub hashValueDescending {
$speakers{$b} <=> $speakers{$a};
}
|