| Category: | Data Processing, Utilities |
| Author/Contact Info | hossman |
| Description: | ISFDB is an extremely useful database of information about SF Publications. The database is "Publication" centric (as opposed to Author Centric, or Title centric), and has some handy search & listing pages, but one thing that's not readily available is a list of all the publications that contain titles by a particular Author. (So you kow what books to buy.) Fortunately, all of the data/software that powers the ISFDB is available as part of the ISFDB Open Data Project. If you download the compiled database files (or the raw data files and the tools to compile them) this script will take care of it for you. |
#!/usr/local/bin/perl
use warnings;
use strict;
use vars qw(%opts $author_rx $pubtype_rx %pubmap %titlemap);
use Getopt::Std;
sub usage {
my $err = shift || '';
print $err, <DATA>;
exit;
}
$opts{t} = '.*'; # default
getopts('ha:d:t:', \%opts) or usage "Invalid Args.\n";
usage() if $opts{h};
usage "Missing Mandatory Args.\n" unless defined $opts{d} and defined
+$opts{a};
$author_rx = qr/\Q$opts{a}/;
$pubtype_rx = qr/$opts{t}/;
open TITLES, "$opts{d}/titles.dbase" or die "couldn't open titles.dbas
+e file";
open PUBS, "$opts{d}/pubs.dbase" or die "couldn't open pubs.dbase
+file";
while (<TITLES>) {
my $line = $_;
chomp $line;
# split ignores trailing empty fields unless you ask for
# a specific number of fields (ignore trailing |) ...
my @fields = split /\|/, $line, ($line =~ tr/|/|/);
next unless $fields[1] =~ /$author_rx/;
foreach my $pubcode (split /\,/, $fields[5]) {
$pubmap{$pubcode} = [] unless exists $pubmap{$pubcode};
push @{$pubmap{$pubcode}}, $line;
}
}
while (<PUBS>) {
my $line = $_;
chomp $line;
# split ignores trailing empty fields unless you ask for
# a specific number of fields (ignore trailing |) ...
my @fields = split /\|/, $line, ($line =~ tr/|/|/);
next unless exists $pubmap{$fields[0]};
next unless $fields[8] =~ /$pubtype_rx/;
foreach my $titleline (@{$pubmap{$fields[0]}}) {
my $title = (split /\|/, $titleline)[0];
$titlemap{$title} = [] unless exists $titlemap{$title};
push @{$titlemap{$title}}, "$title|$line$titleline\n";
}
}
foreach my $title (sort keys %titlemap) {
foreach my $line (@{$titlemap{$title}}) {
print $line;
}
}
__END__
Usage:
isfdb-list-author.pl -d dbase_compiled_dir -a auth_substr [-t pubty
+pe_regex ]
Examples:
isfdb-list-author.pl -d ~/isfdb/dbase.compiled -a Asimov
isfdb-list-author.pl -d ~/isfdb/dbase.compiled -a 'G. Harry Stine^L
+ee Correy'
isfdb-list-author.pl -d ~/isfdb/dbase.compiled -a Asimov -t 'hc|pb'
isfdb-list-author.pl -d ~/isfdb/dbase.compiled -a Asimov -t '^$'
Output:
One record per title/pub combo that mathches the specified criteria
+. The
1st field is the title, after that comes all of the fields about th
+e
publication, and then all of the fields about the title. In standa
+rd ISFDB
compiled DB format there is a trailing | after the last field.
|
|
|
|---|