use strict;
use warnings;
use HTML::TreeBuilder;
my @goodWordsList = (
"mhm", "right", "well", "yeah", "sure", "good", "ah", "okay", "yep", "hm",
"definitely", "alright", "'m'm", "oh", "my", "god", "wow", "uhuh", "exactly",
"yup", "mkay", "i see", "ooh", "cool", "uh", "fine", "true", "hm'm", "hmm",
"yes", "absolutely", "great", "um", "so", "mm", "weird", "ye-", "i mean",
"i know", "i think so", "huh", "yay", "maybe", "eh", "obviously", "correct",
"awesome", "really", "interesting",
);
my %goodwords;
@goodwords{@goodWordsList} = (1) x @goodWordsList;
my $root = HTML::TreeBuilder->new ();
$root->parse_file (*DATA);
my %speakers;
# Parse out speaker attributes
for ($root->look_down ('_tag', 'strong')) {
my $info = $_->right ();
my $name = $_->as_text ();
$speakers{$name}{info} = $info;
for my $param (split /\s*(?:;\s*|$)/, $info) {
my ($key, $value) = $param =~ /^:?\s*([^:]*):\s*(.*)/;
$speakers{$name}{$key} = $value;
}
}
my %stats;
# Do the analysis
for ($root->look_down ('_tag', 'p')) {
my $line = $_->as_text ();;
my ($name) = $line =~ /(\w+):/;
# Preform analysis on paragraph here
}