#!/usr/bin/perl -w
##
#
# parses bugtraq off security-focus.com.
# mainly for dynamic website use.
#
# dusty hall
#
##
use strict;
use URI::URL;
use LWP::Simple;
use HTML::Parse;
use HTML::TableExtract;
open(OUT,">security-focus.html");
my $bugtraq = "http://www.security-focus.com/archive/1";
my $html = get $bugtraq;
my ($extract,$state,$row);
$extract = new HTML::TableExtract( headers => [qw(Date Subject Author)], keep_html => 1 );
$extract->parse($html);
foreach $state ($extract->table_states) {
foreach $row ($state->rows) {
my $parsed_html = HTML::Parse::parse_html($row->[1]);
my @wanted = ('a');
for (@{$parsed_html->extract_links(@wanted) }) {
my $link =$_->[0];
my $description = $_->[1];
$description = $description->content->[0];
if (! defined $description) { $description='' }
my $url = new URI::URL $link;
my $full_url = $url->abs($bugtraq);
$description =~ tr/A-Z/a-z/;
print OUT "$row->[0] $description
";
}
}
}
close(OUT);