#! /usr/bin/perl use strict; use warnings; use HTML::Parser; use WWW::Mechanize; my ($status, $status2) = (0,0); my ($channel, $channel_number, $date, $time, $title, $description, $category, $rating); my $parse = HTML::Parser->new( api_version => 3, text_h => [ \&text, "dtext" ], start_h => [\&start, "tagname, attr"], end_h => [\&end, "tagname"], ); #$parse->parse_file("guide.html"); my $mech = WWW::Mechanize->new(); $mech->agent_alias( 'Linux Mozilla' ); $mech->get('http://www.tvguide.co.uk/?systemid=3'); my @channels = $mech->find_all_links(url_regex => qr/channellisting.asp\?ch=/i); foreach my $link_obj (@channels) { next unless defined $link_obj->url and defined $link_obj->attrs->{'title'}; ($channel, $channel_number) = $link_obj->attrs->{'title'} =~ m/\[Click for (.*?) full day TV listings\].*?\[Channel Numbers
.*?Freeview:(\d+)/; my ($channel_id) = $link_obj->url =~ m/channellisting.asp\?ch=(\d+)/i; next unless defined $channel and defined $channel_number and defined $channel_id; sleep 5; my $url = 'http://www.tvguide.co.uk/channellisting.asp\?ch='.$channel_id; $mech->get($url); print $channel.' '.$channel_id."\n"; print $link_obj->url."\n"; $parse->parse($mech->content); print "\n\n\n"; undef $channel; undef $channel_number; undef $channel_id; } exit; sub start { my ($tag, $attr) = @_; return if !$attr->{'class'} or $status != 2; my %other_stat = ( 0 => 'programmeheading', 2 => 'programmetext', 4 => 'tvchannel', 6 => 'programmetext', 8 => 'programmetext', ); return unless exists $other_stat{$status2} and $attr->{'class'} eq $other_stat{$status2}; $status2++; } sub end { my ($tag) = @_; return unless $tag eq 'span' and $status == 2; foreach (1,3,5,7,9) { $status2++ if $status2 == $_; } } sub text { my ($text, $atr) = @_; $text =~ s/\s+/ /; return if $text eq ' ' or !defined $text; print $text."\n" if !$status and $text =~ /HTTP\/1\.\d \d+ /; if (!$status and $text =~ /^\w+, \w+ \d\d, \d{4}$/) { $date .= $text if defined $date and defined $text; $date = $text if !defined $date and defined $text; print $text."\n\n\n"; ++$status; } elsif ($status == 1 and $text =~ /\d{1,2}:\d\d\w\w/) { $time .= $text if defined $time and defined $text; $time = $text if !defined $time and defined $text; ++$status; } elsif ($status == 2 and $status2 == 1) { $title .= $text if defined $title and defined $text; $title = $text if !defined $title and defined $text; } elsif ($status == 2 and $status2 == 3) { $description .= $text if defined $description and defined $text; $description = $text if !defined $description and defined $text; } elsif ($status == 2 and $status2 == 7) { $category .= $text if defined $category and defined $text; $category = $text if !defined $category and defined $text; #print $text."\n"; } elsif ($status == 2 and $status2 == 9 and $text eq 'Rating') { #print $text."\n"; } elsif ($status == 2 and $status2 == 10 and $text =~ /^\d+/) { $rating .= $text if defined $rating and defined $text; $rating = $text if !defined $rating and defined $text; $description = 'unknown' if !defined $description; print $time."\n".$title."\n".$description."\n".$category."\n".$rating."\n\n"; undef $time; undef $title; undef $description; undef $category; undef $rating; ($status, $status2) = (1,0); } } sub database { my ($channel, $date, $time, $title, $description, $category, $rating) = @_; my $program_exists = $schema->resultset('Programmes')->count({ 'name' => $title, 'date' => $date, 'time' => $time }); return 0 if $program_exists; my $category_id = $schema->resultset('Categories')->find_or_create({ name => $category})->id; $channel = 'BBC One'; my $channel_id = $schema->resultset('Channels')->find_or_create({ name => $channel, chan => $channel_number})->id; print $title.' '.$time." being added!\n"; my $new = $schema->resultset('Programmes')->create({ name => $title, description => $description, date => $date, category_id => $category_id, channel_id => $channel_id, rating => $rating }); $new->time($time); $new->update; return 1; } #CREATE TABLE categories ( #id INTEGER PRIMARY KEY, #name TEXT, #colour TEXT #); #CREATE TABLE channels ( #id INTEGER PRIMARY KEY, #name TEXT, #chan INTEGER, #logo TEXT #); #CREATE TABLE programmes ( #id INTEGER PRIMARY KEY, #name TEXT, #description TEXT, #date TEXT, #time TEXT, #rating INTEGER, #channel_id INTEGER, #category_id INTEGER #);