planetscape's extra scratchpad

$  finger coke@cmu.edu
[cmu.edu]

(Email addressed to 'coke@cmu.edu' will not be delivered to the
following user because it only matches on a last name, not a computer
login name.)
           name:  Coke

         e-mail:  coke@ece.cmu.edu

[ Forwarding as "coke@ece.cmu.edu" ]
[ece.cmu.edu]
Login: coke                             Name: Coke Account
Directory: /afs/ece/usr/coke            Shell: /bin/true
Last login Wed Jan 17 09:39 2007 (EST) on pts/0 from livia.ece.cmu.e
No mail.
No Plan.

[download]

#!/usr/bin/perl -w
#
# Hack to query and report from www.lexfn.com
#
# This code is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# by rik - ora@rikrose.net
#

######################
# support stage      #
######################

use strict;
use Getopt::Std qw(getopts);
use LWP::Simple qw(get);
use URI::Escape qw(uri_escape uri_unescape);
use HTML::TokeParser;

sub usage (  ) { print "
usage: lexfn [options] word1 [word2]
options available:
 -s Synonymous     -a Antonym        -b Birth Year
 -t Triggers       -r Rhymes         -d Death Year
 -g Generalizes    -l Sounds like    -T Bio Triggers
 -S Specialises    -A Anagram of     -k Also Known As
 -c Comprises      -o Occupation of
 -p Part of        -n Nationality

 or -x for all

word1 is mandatory, but some searches require word2\n\n"
}

######################
# parse stage        #
######################

# grab arguments, and put them into %args hash, leaving nonarguments
# in @ARGV for us to process later (where word1 and word2 would be)
# if we don't have at least one argument, we die with our usage.
my %args; getopts('stgScparlAonbdTkx', \%args);
if (@ARGV > 2 || @ARGV == 0) { usage(  ); exit 0; }

# turn both our words into queries.
$ARGV[0] =~ s/ /\+/g; $ARGV[1] ||= "";
if ($ARGV[1]) { $ARGV[1] =~ s/ /\+/g; }

# begin our URL construction with the keywords.
my $URL = "http://www.lexfn.com/l/lexfn-cuff.cgi?sWord=$ARGV[0]".
          "&tWord=$ARGV[1]&query=show&maxReach=2";

# now, let's figure out our command-line arguments. each
# argument is associated with a relevant search at LexFN,
# so we'll first create a mapping to and fro.
my %keynames = (
 s => 'ASYN', t => 'ATRG', g => 'AGEN', S => 'ASPC', c => 'ACOM', 
 p => 'APAR', a => 'AANT', r => 'ARHY', l => 'ASIM', A => 'AANA', 
 o => 'ABOX', n => 'ABNX', b => 'ABBX', d => 'ABDX', T => 'ABTR', 
 k => 'ABAK'
);

foreach my $key(keys %keynames) {
    $keynames{$key} = lc($keynames{$key});
    $keynames{$key} = ucfirst($keynames{$key});
}

# if we want everything all matches
# then add them to our arguments hash,
# in preparation for our URL.
use Data::Dumper;
#die Dumper(\%keynames);

if (defined($args{'x'}) && $args{'x'} == 1) {
   foreach my $arg (qw/s t g l S c p a r l A o n b d T k/){
       $args{$arg} = 1; # in preparation for URL.
   } delete $args{'x'}; # x means nothing to LexFN.
}

# build the URL from the flags we want.
foreach my $arg (keys %args) { $URL .= '&' . $keynames{$arg} . '=on' i
+f $args{$arg} == 1}

######################
# request stage      #
######################

# and download it all for parsing.

my %table = (
        'sounds like' => $args{l},
        'comprises' => $args{c},
        'rhymes with' => $args{r},
        'is more general than' => $args{g},
        'is a kind of' => $args{p},
        'is an anagram of' => $args{A},
        'triggers' => $args{t},
        'is a synonym of' => $args{s},
);

my $content = get($URL) or die $!;

my %tags = ();
foreach my $line(split /\n/, $content) {
    my ($tag) = $line =~ /ALT\=\"([\w ]+)\"/;
    next unless($tag);
    $tags{$tag} = $table{$tag};
}

######################
# extract stage      #
######################

# with the data sucked down, pass it off to the parser.
my $stream = HTML::TokeParser->new( \$content ) or die $!;

# skip the form on the page, then it's the first <b>
# after the form that we start extracting data from
my $tag = $stream->get_tag("/form");
while ($tag = $stream->get_tag("b")) {
    my $line = '';
    $line .= $stream->get_trimmed_text("/b") . " ";
    $tag = $stream->get_tag("img");
    $line .= $tag->[1]{alt} . " ";
    my $ok = $tags{$tag->[1]{alt}} ? 1 : 0;
    $tag = $stream->get_tag("a");
    $line .= $stream->get_trimmed_text("/a") . "\n";
    print $line if($ok);
}

exit 0;

[download]

Location

User's Bio

ww and Scott7477

HTML TIDY

Demoronizer

PMEdit

Miscellaneous