Beefy Boxes and Bandwidth Generously Provided by pair Networks
laziness, impatience, and hubris
 
PerlMonks  

Lyrics Fetcher

by saskaqueer (Friar)
on Apr 16, 2004 at 00:25 UTC ( [id://345593]=sourcecode: print w/replies, xml ) Need Help??
Category: Audio Related Programs
Author/Contact Info /msg saskaqueer
Description:

Locates and downloads lyrics to (hopefully) any song. Simple object interface so you can roll your own. I threw this together because the Lyrics::Fetcher:: modules on CPAN are horrible. The regexes used to extract the lyrics are of course fragile and are likely to break upon any changes to the lyric sites :) I updated the method in which the lyrics are extracted. It is now one general regex, so it should stand the test of time slightly better. It will still break (partially) if any of the lyrics sites modify their templates. I'll try to keep this up to date as maintenance is needed.

There is no guarantee that the first match returned will be the correct song. That is why I use multiple sites, it gives you a fighting chance. Chances are you will receive some lyrics with additional info at the top (songwriter, etc) that I can't filter out easily. So you get stuck with some extras once in a while.

# sample program - all available methods are used

#!c:/perl/bin/perl -w
$| = 1;

use strict;
use LyricFinder;

# 'more' is used because of both win32 and *nix support.
# you can change this to 'less', /vim?/, 'notepad', etc etc.
my $viewer = 'more';

my $lf = LyricFinder->new();

find_lyrics() while 1;


sub find_lyrics {
    print "Artist: ";
    chomp( my $artist = <STDIN> );

    print "Song Title: ";
    chomp( my $song = <STDIN> );

    my $search = $lf->search(50, $artist, $song);

    print "\nFound ", $search->count(), " possible results!\n";

    while ( my $page = $search->fetch() ) {
        if ( $page->error() ) {
            print "Fetch for ", $page->source(), " failed (", $page->e
+rror(), ")\n";
            next;
        }

        print "Fetch from ", $page->source(), " successfull (200).\n\n
+";
        sleep 2;

        open( my $fh, '>', '.lyrics' ) or die "open failed: $!";
        print $fh $page->lyrics();
        close($fh) or die "close failed: $!";

        system "$viewer .lyrics";
        unlink '.lyrics';

PROMPT:
        print "\n\n>> ";
        chomp( my $cmd = <STDIN> );

        if ($cmd =~ /^[qQ]/) {
            exit(0);
        }
        elsif ($cmd =~ /^[rR]/) {
            print "\n"; last;
        }
        elsif ($cmd =~ /^[nN]/) {
            next;
        }
        else {
            goto PROMPT;
        }
    }

    print "\nNo more lyrics found for given artist and song.\n\n";
}




# LyricFinder.pm - all code below here goes in same file!
package LyricFinder;

use URI::Escape;
use LWP::UserAgent;

my @sites = qw(
    sing365.com
    musicsonglyrics.com
    lyricsdepot.com
    123lyrics.net
    lyricsfreak.com
    azlyrics.com
    elyrics.net
);

sub new {
    my ($class) = @_;

    my $self = {
        _ua         => LWP::UserAgent->new(
            timeout => 30,
            agent   => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 
+5.1) ' .
                       'Opera 7.23 [en]'
        )
    };

    return( bless($self, $class) );
}

sub search {
    my ($self, $count, @terms) = @_;

    my $res = $self->{_ua}->get(
        'http://www.google.com/search?q=lyrics%20' .
         uri_escape( join(' ', @terms) ) . '&num=' . $count .
        '&ie=UTF-8&oe=UTF-8&hl=en&btnG=Google%20Search',

        Referer => 'http://www.google.com/'
    );

    die("Unable to retrieve search results: ${\$res->status_line()}\n"
+)
        unless ( $res->is_success() );

    my @links = grep {
        my $link = $_;
        grep { $link =~ m!^http://(?:www.)?$_! } @sites
    } $res->content() =~ m!<p class=g><a href=([^>]+)>!g;

    return( LyricsFinder::Search->new($self->{_ua}, \@links) );
}


package LyricsFinder::Search;

sub new {
    my ($class, $ua, $links) = @_;

    my $self = {
        _ua    => $ua,
        _links => $links
    };

    return( bless($self, $class) );
}

sub count { scalar @{shift->{_links}} }

sub fetch {
    my ($self) = @_;

    if ( defined( my $page = shift(@{$self->{_links}}) ) ) {
        return( LyricsFinder::Page->new($self->{_ua}, $page) );
    }
    else {
        return( undef );
    }
}


package LyricsFinder::Page;

use HTML::Entities;

sub new {
    my ($class, $ua, $uri) = @_;

    my $source = ($uri =~ m!^http://(?:www.)?([^/]+)/!)[0];

    my $self = {
        _ua     => $ua,
        _uri    => $uri,
        _source => $source
    };

    my $res = $self->{_ua}->get($uri);
    my ($lyrics);

    unless ( $res->is_error() ) {
        $lyrics = $res->content();

        $lyrics =~ y!\r!!d;
        $lyrics =~ s!(?:\342\200\231|\306|\221|\222)!'!g;

        $lyrics =~ s!\n?<\s*br\s*(?:/\s*)?>\n?!\n!ig;
        $lyrics =~ s!<\!\-\-.*?\-\->!!gs;
        $lyrics =~ s!<([^>]+)>!\001!gs;

        ($lyrics) = sort { length($b) <=> length($a) } split(/\001/, $
+lyrics);

        $lyrics =~ s!\A\s*(?:\]\s*)?!!;
        $lyrics =~ s!(?:\s*\[)?\s*\z!!;
        $lyrics = decode_entities($lyrics);
    }

    if ( $res->is_error() or not defined($lyrics)     or
         $lyrics =~ /hotlog_js/ or $lyrics =~ /td {/  or
         $lyrics =~ /All lyrics are property/
       ) {
        $self->{_error} = $res->is_error() ? $res->code() : 'no lyrics
+ found';
    }
    else {
        $self->{_lyrics} = $lyrics;
    }

    return( bless($self, $class) );
}

sub uri    { shift->{_uri}    }
sub error  { shift->{_error}  }
sub source { shift->{_source} }
sub lyrics { shift->{_lyrics} }

1;
Replies are listed 'Best First'.
Re: Lyrics Fetcher
by Jaap (Curate) on Apr 16, 2004 at 08:18 UTC
    Does this work with WinAmp and/or unix equivalents? If so: how? I see it reads from STDIN.

      And voila, here is a winamp version! Simply download the winamp plugin, install Winamp::Control and then here's the script. The controls are pretty easy to figure out. Press 'r' (after exiting the 'notepad' program) to grab the next song's lyrics, 'n' to load a different version of the lyrics, and 'q' to quit.

      #!c:/perl/bin/perl -w $| = 1; use strict; use LyricFinder; use Winamp::Control; my $viewer = 'notepad'; my $winamp = Winamp::Control->new( host => '127.0.0.1', port => 8080, passwd => 'our_secret' ); die "Winamp and/or the httpQ plugin do not seem to be running." unless ( $winamp->getversion() ); my $lf = LyricFinder->new(); find_lyrics() while 1; sub find_lyrics { my $artist = $winamp->getid3tag_artist(); my $song = $winamp->getid3tag_songname(); my $search = $lf->search(50, $artist, $song); print "\nFound ", $search->count(), " possible results!\n"; while ( my $page = $search->fetch() ) { if ( $page->error() ) { print "Fetch for ", $page->source(), " failed (", $page->e +rror(), ")\n"; next; } print "Fetch from ", $page->source(), " successfull (200).\n\n +"; sleep 2; open( my $fh, '>', '.lyrics' ) or die "open failed: $!"; print $fh $page->lyrics(); close($fh) or die "close failed: $!"; system "$viewer .lyrics"; unlink '.lyrics'; PROMPT: print "\n\n>> "; chomp( my $cmd = <STDIN> ); if ($cmd =~ /^[qQ]/) { exit(0); } elsif ($cmd =~ /^[rR]/) { print "\n"; last; } elsif ($cmd =~ /^[nN]/) { next; } else { goto PROMPT; } } print "\nNo more lyrics found for given artist and song.\n\n"; }

      It does indeed read from STDIN: you have to pass the artist and song title manually. You could very easily take XMMS::Remote or Winamp::Control and incorporate its methods to extract the artist/song title of the currently playing song.

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: sourcecode [id://345593]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others browsing the Monastery: (7)
As of 2024-04-23 10:13 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found