#!/usr/bin/perl use strict; use warnings; use utf8; use 5.10.0; use Data::Dumper; use Readonly; use HTML::TreeBuilder::XPath; use LWP::Simple; use POSIX qw(strftime); binmode STDOUT, 'utf8'; Readonly my $BASEURL => 'https://twitter.com'; Readonly my $USAGE => "$0 : make an rss of a twitter search"; die $USAGE unless $#ARGV==0; my $term = $ARGV[0]; my $content = get("$BASEURL/search?q=$term&src=typd"); die "Couldn't get search results" unless defined $content; my @items; my $tree= HTML::TreeBuilder::XPath->new; $tree->parse($content); my $tweets = $tree->findnodes( '//li' . class_contains('js-stream-item') ); for my $li (@$tweets) { my $tweet = $li->findnodes('./div' . class_contains("tweet") . '/div' . class_contains("content") )->[0] ; my $header = $tweet->findnodes('./div' . class_contains("stream-item-header"))->[0]; my $body = $tweet->findvalue('./p' . class_contains("tweet-text")); $body = ""; my $avatar = $header->findvalue('./a/img' . class_contains("avatar") . "/\@src"); my $fullname = $header->findvalue('./a/strong' . class_contains("fullname")); my $username = '@' . $header->findvalue('./a/span' . class_contains("username") . '/b'); my $uri = $BASEURL . $header->findvalue('./small' . class_contains("time") . '/a' . class_contains("tweet-timestamp") . '/@href' ); my $timestamp = $header->findvalue('./small' . class_contains("time") . '/a' . class_contains("tweet-timestamp") . '/span/@data-time' ); my $pub_date = strftime("%a, %d %b %Y %H:%M:%S %z", localtime($timestamp)); push @items, { username => $username, fullname => $fullname, link => $uri, guid => $uri, title => $body, description => $body, timestamp => $timestamp, pubDate => $pub_date } } $tree->delete; # now print as an rss feed print< Twitter Search / $term http://twitter.com/search/q=$term Twitter search for: $term. en-us 40 ENDHEAD ; for (@items) { print< $_->{username}: $_->{title} $_->{description} $_->{pubDate} $_->{guid} $_->{link} ENDITEM ; } print< ENDRSS ; sub class_contains { my $classname = shift; "[contains(concat(' ',normalize-space(\@class),' '),' $classname ')]"; }