#!/usr/bin/perl -w use strict; use Tk; require Tk::Pane; use Proc::Killfam; use Tk::ROText; use LWP::UserAgent; use LWP::Simple; use HTML::LinkExtor; use URI::URL; use threads; use threads::shared; # it will work on linux (X11) only, and you need an mplayer # that can play .flv files. #I creatively named the downloaded files, by concating the # titles and flv filename, so you can tell what they are more # easily. # Start the app, if any flv files are present in the working dir, # they will be listed. Otherwise, press the Select Catagories # button, and select 1 or more categories to sample. Then the # downloader will get the videos in the background. # must setup thread code before any Tk code is used # to avoid Tk thread-safety problems my $thread_die : shared; my $thread_go : shared; my @selected : shared; my @ready : shared; $thread_die = 0; $thread_go = 0; @selected = (); @ready = (); my $thread = threads->new( \&work ); ################################################### $|++; my @links; #a global for Link extractor, funky callback hack my $ua = LWP::UserAgent->new(); my @selected_cats =(); my @ready_lb; #local non-thread copy of @ready to prevent #Tk from segfaulting $SIG{INT} = sub { &close_it_up }; $SIG{PIPE} = 'IGNORE'; my $mpg_in = shift || ''; #mpg to play from command line # needs an init.mpg which will be created if you # don't supply one if( ! -e 'init.mpg'){ &make_init} #setup local files to play @ready = <*.flv>; #newly downloaded files will be pushed onto @ready #if there are no .flv files in the dir, the first completed #download will play upon arrival @ready_lb = @ready; my $cur_play; my $title; my $playing = 0; #flag used in auto-playing first completed download my $timer; my $timer1; my $mw = MainWindow->new(-background =>'black'); $mw->geometry('530x650+20+30'); $mw->Tk::bind("", sub{&close_it_up}); $mw->Tk::bind("", sub{&close_it_up}); my $cframe1 = $mw->Frame(-background =>'black') ->pack( -fill =>'x'); my $cframe2 = $mw->Frame(-background =>'black') ->pack( -fill =>'x'); my $canv = $mw->Scrolled('Canvas', -bg => 'black', -borderwidth => 0, -highlightthickness => 0, -relief => 'sunken', -width => 500, -height => 400, -scrollregion=>[0,0,500,400], -scrollbars=>'osoe', )->pack(); my $contWidth = 500; my $contHeight = 400; ## this Frame is needed for including the window in Tk::Canvas my $Container = $canv->Frame(-container => 1); my $xtid = $Container->id(); # converting the id from HEX to decimal as xterm requires a decimal Id my ($xtId) = sprintf hex $xtid; my $dcontitem = $canv->createWindow(10,10, -anchor=>'nw', -window => $Container, -width => $contWidth, -height => $contHeight, -state => 'hidden', -tags => ['viewport'], ); my $pid; my @options = ( '-slave','-loop 0', '-zoom', "-x $contWidth", "-y $contHeight", '-really-quiet', "-wid $xtId", ); my $loadu_but = $cframe1->Button(-text => 'Select Category', -padx => 0, -command => sub{ my $url = &get_url; })->pack(-side =>'left',-padx=>10 ); $cframe1->Button(-text => "Exit", -padx => 0, -command => [sub{&close_it_up}] ) ->pack(-side=>'right', -padx =>10 ); $cframe1->Button(-text => "Stop Download", -padx => 0, -command => sub{ $thread_go = 0; $timer1->cancel; #stop updating list } ) ->pack(-side=>'right', -padx =>10 ); my $sound = 1; $cframe1->Checkbutton( -text => 'Sound', -background => 'lightblue', -variable => \$sound, -command => \&set_sound, )->pack(-fill =>'y' ); $cframe2->Label(-text => 'SpaceBar->Pause q or esc -> exit left and right arrow keys navigate', -background => 'black', -foreground => 'lightgreen', )->pack(-expand=>1 ); $cframe2->Label( -textvariable => \$title , -background => 'black', -foreground => 'lightblue', )->pack(-expand=>1 ); &start_player; if($mpg_in ne ''){ &play($mpg_in)} $mw->Label( -text => 'Left Click Select...Right Click Play', -background => 'black', -foreground => 'green', )->pack(-fill => 'x',-expand => 1 ); my $lb = $mw->Scrolled('Listbox', -scrollbars=>'osow', -listvariable => \@ready_lb, -background => 'white', )->pack(-fill => 'x',-expand=>1); $lb->bind( '', sub{ my $file = $lb->get($lb->curselection); $playing = 1; #set flag that we are playing a local file &play($file); }); MainLoop(); ######################################################### sub get_url{ my %cats = ( 1 => 'Arts &Animation', 2 => 'Autos & Vehicles', 23 => 'Comedy', 24 => 'Entertainment', 10 => 'Music', 25 => 'News & Blogs', 22 => 'People', 15 => 'Pets & Animals', 26 => 'Science & Technology', 17 => 'Sports', 19 => 'Travel & Places', 20 => 'Video Games' ); my $url_top = "http://youtube.com/categories_portal"; # for instance @links = &get_links( $url_top ); # http://youtube.com/categories_portal?c=1&e=1 my $lead = 'http://youtube.com/categories_portal?c='; my $rx = qr/\Q$lead\E(.*)/; @links = grep{/$rx/} @links; #remove dups @links = keys %{{map{$_,1} @links}}; my %result_list =(); #sort the links according to category my $rx1 = qr/\Q$lead\E(\d+).*/; foreach my $link (@links){ $link =~ /$rx1/; my $cat = $1; push @{ $result_list{ $cat } }, $link; } my @hyperlinks =(); foreach my $key (keys %result_list){ # print $cats{$key},"\n"; # print join( "\n", @{ $result_list{ $key } } ), "\n\n\n"; push @hyperlinks, "$cats{$key}\n"; foreach( @{ $result_list{ $key } } ){ push @hyperlinks, "$_\n"; } } #popup my $tl = MainWindow->new(); $tl->title("Select YouTube Categories"); $tl->fontCreate('big', -family=>'arial', -weight=>'bold', -size=>int(-18*18/14)); my $t = $tl->Scrolled('ROText', -width => 80, -height => 30, -background => 'white', )->pack(); my $ebutton = $tl->Button(-text =>'Done Selecting', -command => sub{ $tl->destroy; #print "@selected_cats\n"; #now start to process links and download #videos in the work thread &start_downloads; })->pack(); #add colors $t->tagConfigure( 'tag1', -foreground => 'red', -font => 'big' ); my $tag = "tag000"; foreach (@hyperlinks) { chomp; my @res = split (/(http:\S+)/); foreach (@res) { if (/(http:\S+)/) { $t->insert( 'end', $_, $tag ); $t->tagConfigure( $tag, -foreground => 'blue' ); $t->tagBind( $tag, '' => [ \&manipulate_link, $tag, 'raised', 'hand2' ] ); $t->tagBind( $tag, '' => [ \&manipulate_link, $tag, 'flat', 'xterm' ] ); $t->tagBind( $tag, '' => [ \&manipulate_link, $tag, 'sunken' ] ); $t->tagBind( $tag, '' => [ \&manipulate_link, $tag, 'raised', undef, \&printme ] ); $tag++; } else { $t->insert( 'end', $_ , 'tag1' ); } } $t->insert( 'end', "\n" ); } } ############################################## sub get_links{ my $url_in = shift; # Set up a callback that collect image links @links = (); # a global # Make the parser. Unfortunately, we don't know the base yet # (it might be diffent from $url) my $p = HTML::LinkExtor->new(\&callback,\@links); # Request document and parse it as it arrives my $res = $ua->request( HTTP::Request->new( GET => $url_in ), sub { $p->parse( $_[0] ); } ); # Expand all URLs to absolute ones my $base = $res->base; @links = map { $_ = url( $_, $base )->abs; } @links; return @links; } ################################################# sub callback { my ( $tag, %attr) = @_; return if $tag ne 'a'; # we only look closer at push( @links, values %attr ); } #################################################### sub printme { # print "@_\n"; $_[0]->tagConfigure( $_[2], -foreground => 'yellow', -background => 'black' ); push @selected_cats, $_[1]; #now we have an array of urls to fetch and process for all #video links #this is done in the 'Done Selecting' callback } ####################################################### sub manipulate_link { # manipulate the link as you press the mouse key my ($a) = shift; my ($tag) = shift; my ($relief) = shift; my ($cursor) = shift; my ($after) = shift; # by configuring the relief (to simulate a button press) $a->tagConfigure( $tag, -relief => $relief, -borderwidth => 1 ); # by changing the cursor between hand and xterm $a->configure( -cursor => $cursor ) if ($cursor); # and by scheduling the specified action to run "soon" if ($after) { my ($s) = $a->get( $a->tagRanges($tag) ); $mw->after( 100, [ $after, $a, $s, $tag, @_ ] ) if ($after); } } ######################################################## sub get_file{ my @types =( ["mpeg,mp3,avi", [qw/.mpg .mpeg .mp3 .avi .flv/]],); my $file = $mw->getOpenFile( -filetypes => \@types ); return $file; } ########################################################### sub set_sound{ &stop; if($sound == 1 ){ @options = grep{ $_ ne '-nosound'} @options;} else{ unshift @options, '-nosound';} &start_player; &play($cur_play); } ############################################################## sub start_player{ $pid = open(MP, "| mplayer @options init.mpg >/dev/null 2>&1 "); $canv->itemconfigure($dcontitem,-state => 'normal'); } ############################################################## sub play { my $mpg = shift || 'init.mpg'; syswrite(MP, "loadfile $mpg\n"); $cur_play = $mpg; my $filename = substr ($mpg, rindex ($mpg, "/") + 1); $mw->configure(-title=>$filename); $title = $filename; } ################################################################### sub stop{ syswrite(MP, "quit\n"); $canv->itemconfigure($dcontitem,-state => 'hidden'); killfam 9, $pid; close MP; } ################################################################# sub close_it_up{ &stop; $thread_die = 1; $thread->join; exit; } ############################################################################## sub start_downloads{ foreach my $page( @selected_cats ){ my @links1 = &get_links( $page ); # "http://youtube.com/watch?v=EkTpUxh8Vxc"; my $lead = 'http://youtube.com/watch?v='; my $rx = qr/\Q$lead\E(.*)/; @links1 = grep{/$rx/} @links1; #remove dups @links1 = keys %{{map{$_,1} @links1}}; #print "@links1\n"; foreach(@links1){ push @selected , $_ } } #print join "\n", @selected,"\n"; #start downloading thread if needed if( scalar @selected > 0 ){ $thread_go = 1; # timer to autoplay first download $timer = $mw->repeat(500, sub{ if( scalar @ready > 0 ){ if( ! $playing ){ &play( $ready[0] ) }; $timer->cancel; } }); #update download list (hack for tk thread safety) #use 10 seconds to allow time to make selections #definitely a hack :-) $timer1 = $mw->repeat(10000, sub{ @ready_lb = @ready; $lb->update; }); } } ############################################################# sub work{ no warnings "all"; #stops a harmless shared var warning my $received_size; my $request_time; my $last_update; my $ua = LWP::UserAgent->new(); $|++; while(1){ if($thread_die == 1){ goto END }; if ( $thread_go == 1 ){ print "starting next page retrieval\n"; my $urlin = shift @selected; my $content = get( $urlin ) or die "$!\n"; print "done page retrieval\n"; #get human friendly title for video $content =~ /(.*)<\/title>/; my $title = $1; #put underscores for spaces in title $title =~ tr/ /_/; # regex for 2 key text strings which identify the video file # the second one $2 is unique for each download attempt $content =~ /player2\.swf\?video_id=([^&]+)&.*t=([^&]+)&/ ; print $1, "\n" , $2, "\n"; #make add title to filename for ease of identification my $infile = $title.'--'.$1.'.flv'; #add a .flv extension #http://www.youtube.com/get_video?video_id=p_YMigZmUuk&t=OEgsToPDskLRl9-iKyfQVcNT8xes2OIT my $get_url = 'http://www.youtube.com/get_video?video_id='.$1.'&t='.$2; print "gettin video file $get_url\n"; # don't buffer the prints to make the status update $| = 1; open(IN,"> $infile") or die "$_\n"; $received_size = 0; my $url = $get_url; print "Fetching $url\n"; $request_time = time; $last_update = 0; my $response = $ua->get($url, ':content_cb' => \&callback_t, ':read_size_hint' => 8192, ); print "\n"; close IN; print "$infile done\n"; push @ready, $infile; }else { select(undef,undef,undef,.1); } #sleep .1 second } #end while loop sub callback_t { my ($data, $response, $protocol) = @_; my $total_size = $response->header('Content-Length') || 0; $received_size += length $data; # write the $data to a filehandle or whatever should happen # with it here. print IN $data; my $time_now = time; if($thread_go == 0){last} if($thread_die == 1){ goto END }; # this to make the status only update once per second. return unless $time_now > $last_update or $received_size == $total_size; $last_update = $time_now; print "\rReceived $received_size bytes"; printf " (%i%%)", (100/$total_size)*$received_size if $total_size; printf " %6.1f/bps", $received_size/(($time_now-$request_time)||1) if $received_size; } END: } ################################################################### sub make_init{ use MIME::Base64; my $init_mpg =' UklGRvYUAABBVkkgTElTVOgRAABoZHJsYXZpaDgAAABAQg8AfJIAAAAAAAAQCQAAAwAAAAAAAAAB AAAAAAAQAMAAAACQAAAAAAAAAAAAAAAAAAAAAAAAAExJU1SQEAAAc3RybHN0cmg4AAAAdmlkc0RJ VlgAAAAAAAAAAAAAAAABAAAAAQAAAAAAAAADAAAAAAAQAP////8ARAEAAAAAAMAAkABzdHJmKAAA ACgAAADAAAAAkAAAAAEAGABESVZYAEQBAAAAAAAAAAAAAAAAAAAAAABKVU5LFBAAAAQAAAAAAAAA MDBkYwpVTksEAQAAb2RtbGRtbGj4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABMSVNUwgIAAG1vdmkw MGRjbAEAAAAAAbABAAABtYkTAAABAAAAASAAxI2IAA0GBBIUYwAAAbJGRm1wZWcwLjQuOGI0Njgw AH8AAAGzABAHAAABthYPGC/bfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G23 8bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G2 38bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G 238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/ G238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt/G238bbfxtt /G238bbfxtt/G238bbfxtt/G238bbfxtt/G2378wMGRjIwEAAAAAAbZrgRC+5wX3OC+5wX3OC+5w X3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5 wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+ 5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC +5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3O C+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OC+5wX3OfwAwMGRjFQAAAAAAAbZr AR//////////////////fwBpZHgxMAAAADAwZGMQAAAABAAAAGwBAAAwMGRjAAAAAHgBAAAjAQAA MDBkYwAAAACkAgAAFQAAAA=='; open(FH,"> init.mpg") or die "$!\n"; print FH decode_base64($init_mpg); close FH; }