#!perl use strict; use HTML::TreeBuilder 2.97; use LWP::UserAgent; sub get_headlines { my $url = $_[0] || die "What URL?"; my $response = LWP::UserAgent->new->request( HTTP::Request->new( GET => $url ) ); unless($response->is_success) { warn "Couldn't get $url: ", $response->status_line, "\n"; return; } my $tree = HTML::TreeBuilder->new(); $tree->parse($response->content); $tree->eof; my @out; foreach my $link ( $tree->look_down( # ! '_tag', 'a', sub { return 1 if $_[0]->attr('class') =~ /title/; # my @c = $_[0]->content_list; # @c == 1 and ref $c[0] and $c[0]->tag eq 'b'; } ) ) { push @out, [ $link->attr('href'), $link->as_text, ]; } warn "Odd, fewer than 6 stories in $url!" if @out < 6; $tree->delete; return @out; } #science health world entertainment open OUT,'>:utf8','yahoo.txt' or die "$!"; foreach my $section (qw[tech science health world entertainment]) { my @links = get_headlines( "https://uk.news.yahoo.com/$section/" ); print OUT $section, ": ", scalar(@links), " stories\n", map((" ", $_->[1], "\n"), @links),"\n"; }