#!perl
use strict;
use HTML::TreeBuilder 2.97;
use LWP::UserAgent;

sub get_headlines {
  my $url = $_[0] || die "What URL?";
   
  my $response = LWP::UserAgent->new->request(
    HTTP::Request->new( GET => $url )
  );
  unless($response->is_success) {
    warn "Couldn't get $url: ", $response->status_line, "\n";
    return;
  }
   
  my $tree = HTML::TreeBuilder->new();
  $tree->parse($response->content);
  $tree->eof;
   
  my @out;
  foreach my $link (
    $tree->look_down(   # !
      '_tag', 'a',
      sub {
        return 1 if $_[0]->attr('class') =~ /title/;
#        my @c = $_[0]->content_list;
#        @c == 1 and ref $c[0] and $c[0]->tag eq 'b';
      }
    )
  ) {
    push @out, [ $link->attr('href'), $link->as_text, ];
  }
   
  warn "Odd, fewer than 6 stories in $url!" if @out < 6;
  $tree->delete;
  return @out;
}

#science health world entertainment
open OUT,'>:utf8','yahoo.txt' or die "$!";
foreach my $section (qw[tech science health world entertainment]) {
  my @links = get_headlines(
    "https://uk.news.yahoo.com/$section/"
  );
  print OUT
    $section, ": ", scalar(@links), " stories\n",
    map(("  ", $_->[1], "\n"), @links),"\n";
}