Beefy Boxes and Bandwidth Generously Provided by pair Networks
Syntactic Confectionery Delight

download mp3s listed in RSS feed

by blahblahblah (Priest)
on Jan 27, 2007 at 02:36 UTC ( [id://596817]=sourcecode: print w/replies, xml ) Need Help??
Category: web stuff
Author/Contact Info Joe Cullin blahblahblah
Description: Scans WFMU's MP3 archive RSS Feed for certain show titles, and then downloads those shows.

There's no particular reason to use POE::Component::RSSAggregator rather than XML::RSS::Feed, other than the fact that I heard about the POE version first and was interested in trying something in POE. (Thanks again everyone for helping me get around the problems due to my out-of-date POE in POE::Component::RSSAggregator breaks LWP::Simple::get.)

Also, I heartily recommend this station to everyone!

use strict;
use warnings;
use POE qw(Component::RSSAggregator);


my $wantedShowsPattern = join ('|',
                   'Laura Cantrell',
                   'mister c',
                   'billy jam', # will get "unshackled..." too?
                   'coffee',    # does this feed include coffee2go als
                   'Dave Emory',

                   # 7 Second Delay?
                   # Ken's show?
                   # Pseu Braun?
                   # Irwin (calypso 2-3PM)?

my $downloadDir = 'E:/wfmu/';

my @feeds = (
          url   => "",
          name  => "wfmu_mp3",
          delay => 3600,


-d $downloadDir or die("download dir $downloadDir must be created.\n")
-w $downloadDir or die("download dir $downloadDir must be writable.\n"

             inline_states => {
                       _start      => \&init_session,
                       handle_feed => \&handle_feed,


sub init_session
  my ( $kernel, $heap, $session ) = @_[ KERNEL, HEAP, SESSION ];
  $heap->{rssagg} = POE::Component::RSSAggregator->new(
        alias    => 'rssagg',
        debug    => 1,
        callback => $session->postback("handle_feed"),
        tmpdir   => 'f:/cgi/wfmu/',
  $kernel->post( 'rssagg', 'add_feed', $_ ) for @feeds;

sub handle_feed
    my ( $kernel, $feed ) = ( $_[KERNEL], $_[ARG1]->[0] );
    printf "\n========= %s ===============\n", scalar(localtime);
    for my $headline ( $feed->late_breaking_news )
      print $headline->headline() . "\n";

      next unless parseHeadline($headline->headline())->{'show'} =~ m/

      print "\n----- DOWNLOADING ... ---------------------\n";
      print "     url:  " . $headline->url() . "\n";


      print "\n";

sub processUrl
  my $url = shift;
  if ($url !~ /\.m3u/i) {
    print "Invalid playlist url?\n";
  use LWP::Simple;
  print "retrieving m3u file...\n";
  my $mp3Url = LWP::Simple::get($url);
  print "mp3 url:  $mp3Url\n";
  if ($mp3Url !~ /mp3$/s) {
    print "Either the get failed or the content is unusable?\n";

  # example url:

  use URI;
  my $uriObj = URI->new($mp3Url);
  my $uriPath = $uriObj->path();
  use File::Basename;
  my $baseFileName = basename($uriPath);

  if ($baseFileName eq '') {
    print "Botched processing of filename?\n";

  my $mp3File = $downloadDir . $baseFileName;
  print "file: $mp3File\n";

  if (-e $mp3File) {
    print "File already exists!\n";

  print "SAVING MP3 TO FILE...\n";
  my $responseCode = getstore($mp3Url, $mp3File);
  print "done saving.\n";

sub parseHeadline
  my $headline = shift;
  return if $headline eq '';

  $headline =~ s/^WFMU\sMP3\sArchive:\s+//;
  if ($headline =~ s{
    ($mon, $mday, $year) = ($1, $2, $3);
    my $show = $headline;
    return {'show' => $show};

    # (I might eventually with the date, but not yet.)
    print "parse error on headline?\n  ( $_[0] )\n";


Log In?

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: sourcecode [id://596817]
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others taking refuge in the Monastery: (4)
As of 2024-04-24 11:58 GMT
Find Nodes?
    Voting Booth?

    No recent polls found