#!/usr/bin/perl # based almost entirely on John Bokma's,script from 2005 # from http://johnbokma.com/, # http://johnbokma.com/perl/rss-web-feed-builder.html use strict; use warnings; use POSIX; use XML::RSS; use File::Find; use Getopt::Long; use HTML::TreeBuilder; my $domain; my $dir; my $title; my $description; GetOptions( "dir=s" => \$dir, "domain=s" => \$domain, "title=s" => \$title, "desc=s" => \$description, ) or show_help(); (defined $dir and defined $domain) or show_help(); my ($file_history, $files) = fetch_files($dir); my $rss = new XML::RSS(version => '2.0'); $rss->channel( title => $title, link => "https://$domain/", description => $description, pubDate => strftime("%a, %d %b %Y %H:%M:%S %Z", gmtime time), # Thu, 23 Aug 1999 07:00:00 GMT ); $rss->add_module(prefix=>'dc', uri=>'http://purl.org/rss/1.0/modules/dc/'); foreach my $file (@$files) { my ($title, $description) = get_file_meta($file); my $link = "https://$domain/" . substr $file, length $dir; $link =~ s/index\.html?$//; my $modified_date = format_date_time($file_history->{$file}); $rss->add_item( title => $title, link => $link, description => $description, PubDate => $modified_date, dc => { 'dc:date' => $modified_date }, ); } print $rss->as_string; # # # PRIVATE METHODS sub fetch_files { my ($dir) = @_; my $file_history; find sub { -f or return; /\.html?$/ or return; $file_history->{$File::Find::name} = (stat)[9]; }, $dir; # Sort the file on modification time, ascending. my @file_names = sort { $file_history->{$a} <=> $file_history->{$b} } keys %$file_history; return ($file_history, \@file_names); } sub show_help { print < index.rss Options: --dir Path to the document root --domain Domain name --title Title of feed --desc Description of feed HELP exit 1; } sub format_date_time { my ($time) = @_; my @time = gmtime $time; return sprintf "%4d-%02d-%02dT%02d:%02d:%02dZ", $time[5] + 1900, $time[4] + 1, $time[3], $time[2], $time[1], $time[0]; } sub get_file_meta { my ($file_name) = @_; my $root = HTML::TreeBuilder->new; $root->parse_file($file_name); my $title_element = $root->look_down(_tag => 'title'); my $title = defined $title_element ? $title_element->as_text : 'N/A'; my $p_element = $root->look_down(_tag => 'p'); my $description = defined $p_element ? $p_element->as_text : ( defined $title_element ? $title : 'N/A' ); $root->delete; return ($title, $description); }