#!/fellow/monks.pl
#!/usr/bin/perl -w my $version = "massyn.net philpodder/1.6 (" . $^O . ")"; # ======================================================== # # This script is FREEWARE. You are allowed to use this # # code in any way, commercial or non-commercial, as long # # as credit is given to me. I still own the copyright. # # # # Even though every attempt has been made to ensure this # # script is bug free, problems sometimes to come up. I # # can not be held responsible for any damage or disruption # # this script may cause you. The code is provided open # # and free. It is assumed that you will at least attempt # # to test the code first, before running it on any system. # # Use the code at your own risk. If you don't agree, then # # don't use it. It's as simple as that. # # # # Feel free to leave any comments about the script on the # # www.massyn.net website. I trust that you will be able # # to use the script to your benefit. # # # # Phil Massyn # # http://www.massyn.net # # December 31st, 2006 # # Sydney, AUSTRALIA # # _ # # _ __ ___ __ _ ___ ___ _ _ _ __ _ __ ___| |_ # # | '_ ` _ \ / _` / __/ __| | | | '_ \ | '_ \ / _ \ __| # # | | | | | | (_| \__ \__ \ |_| | | | |_| | | | __/ |_ # # |_| |_| |_|\__,_|___/___/\__, |_| |_(_)_| |_|\___|\__| # # |___/ # # # # ======================================================== # # Phil's Podder - a simple perl podcast aggregator # # This script requires wget. I find wget is very good at # getting files downloaded. If you don't have wget, do # a Google search for it, and get it installed. # # The script is loosely based on BashPodder. # http://linc.homeunix.org:8080/scripts/bashpodder/ # I just found that, first of all, BashPodder relied # heavily on Unix commands. I've consolidated all of # that using perl commands. I also found BashPodder # to be very cruel when running it the first time. It # will download everything. It also tends to download # everything again, should it crash half way through a # download. # # The $config parameter points to the text file containing # the list of feeds to check. You maintain this file. # # The $log parameter points to the log file, that contains # all URLs already downloaded. # # $datadir points to the directory where the podcasts will # be downloaded to. # # Revision update # ------------------------------------------------------ # 1.1 - 2007/02/24 # A huge thanks to Foonley from the Whirlpool forums for # some suggestions on how to clean up the code, and to # fix some issues. For one, I've added strict. # # 1.2 - 2007/05/18 # Added .busy files (so I don't update it by accident) # # 1.3 - 2007/07/04 # Added ignoring of # in config file # # 1.4 - 2007/07/20 # Added reset command line option # # 1.5 - 2007/07/28 # Changed .busy to be .$$.busy, to help with duplicate downloads # # 1.6 - 2007/08/21 # Removed reset (because that's what nograb does!). # Added .$$.mp3 if the file already exists # # ====================================================== # use strict; my $config = "$0.conf"; my $log = "$0.logs"; # For Linux, this is the path I use -- adjust it accordingly... my $datadir = "/home/phil/www/tmp/podcast"; # If you're on a Windows machine, you need to provide that directory #my $datadir = "c:/temp"; my $podcast; my $url; my $line; my $file; my $xml; ## Check if the datadir exists... die "Data directory $datadir does not exist...\n" unless -d $datadir; die "No config file found ($config)...\n" unless -f $config; ## If we're adding a new podcast, add it to our config file ## if it were read successfully if(!$ARGV[0]) { # read the config file open(my $IN, "$config"); foreach $podcast (<$IN>) { chomp($podcast); if($podcast !~ /^#/) { parse_xml($podcast); } } close $IN; } elsif($ARGV[0] eq "add") { if(parse_xml($ARGV[1]) == 0) { add_to_db($config,$ARGV[1]); } } else { die "Unknown parameter."; } exit(0); # ============================================== # # This function reads the XML file, and extracts the enclosure tags UR +L sub parse_xml { $podcast = $_[0]; print "$podcast\n"; # get the XML file... undef $xml; $xml = `wget --timeout 10 -t 3 -U \"$version\" -q \"$podcast\" -O +-`; if($? == 0) { # go through each line, by splitting in the > character. Reas +on : one podcast # feed had everything in one line, so this function didn't wor +k as expected. foreach $line (split(/\>/,$xml)) { chomp($line); $line =~ tr /\r/\n/; $line =~ tr /\'/\"/; # if the line contains <enclosure and url=", grab the URL +in there. podomatic # for example puts the length just after enclosure... if($line =~ m/\<enclosure/ && $line =~ /url="(.+)"/i) { handle_url((split/\"/,$1)[0]); } } return 0; } else { print " ERROR reading the XML feed!\n"; return 1; } } # ------------------------------------------------ # # This function attempts to download the URL. If it already got downl +oaded, # it will be skipped. sub handle_url { $url = $_[0]; # is the url in the log file? if(is_in_db($log,$url) eq "YES") { #print " Skipping : $url\n"; return; } # get the file name from the URL $file = substr($url,rindex($url,"/")+1); if(!$ARGV[0]) { print " Downloading : $url\n"; `wget -t 10 --timeout 20 -U \"$version\" -c -q -O $dat +adir/$file.$$.busy "$url"`; if($? == 0) { add_to_db($log,$url); if(!-f "$datadir/$file") { rename("$datadir/$file.$$.busy","$datadir/$file"); } else { my $id = $$; my $newfile = $file; $newfile =~ s/(\.\S+)$/.$id$1/g; rename("$datadir/$file.$$.busy","$datadir/$newfile"); } } else { print " WARNING : Could not download $url!!\ +n"; } } elsif($ARGV[0] eq "nograb" || $ARGV[0] eq "add") { print " Ignoring : $url\n"; add_to_db($log,$url); } else { die "Unknown parameter\n"; } } # ----------------------------------------------- # # These two functions are just a silly way to implement a single field +, single table # database. They are used mainly for the logs. They ensure that only + unique fields # are added to the database. sub add_to_db { if(is_in_db($_[0],$_[1]) eq "NO") { open(my $ADD_TO_DB,">>$_[0]"); print $ADD_TO_DB "$_[1]\n"; close $ADD_TO_DB; } } sub is_in_db { open(my $IS_IN_DB,"$_[0]"); foreach (<$IS_IN_DB>) { chomp; if($_ eq $_[1]) { close $IS_IN_DB; return "YES"; } } close $IS_IN_DB; return "NO"; }
Thanks!
|\/| _. _ _ ._
www. | |(_|_>_>\/| | .net
/
The more I learn the more I realise I don't know.In reply to philpodder.pl - my own podcast catcher by Massyn
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |