Category: Information retrieval
Author/Contact Info Juerd <juerd@juerd.nl>
Description: The Nederlandse Spoorwegen (Dutch Railways) decided only MSIE 4+ users may visit their site. Other javascript capable browsers get an annoying message.
Someone (nohup) made ns.sh, a shell script that retrieves information using lynx. I took his idea, implemented it in Perl and improved it a little. Please note that I did not implement all the features his script has.
Perhaps I should have used HTML::Parser or some table extractor, but this is easier :)

Of course, if the NS change their site again, this script will probably be broken.

Update (200201091407+0100)
New version, takes command line parameters or hides prompts with -p. Also no longer uses STDERR.
No-one mentioned $t[$#t] which should of course be written as $t[-1], which makes me wonder if this node is being read :)
#!/usr/bin/perl -w

use v5.6; # Require perl 5.6.0 or newer
use strict;
use POSIX;

# Based on nohup's ns.sh

$|++;

our $VERSION = '1.23';

my $defaultfrom = 'Dordrecht';

sub mydie {
    print @_;
    exit 255;
}

sub input {
    my ($prompt, $default, $pattern, $force) = @_;
    my ($input, @input);
    if (defined $force){
        $force = $default if $force eq '';
        unless (@input = $force =~ $pattern){
            mydie "'$force' is an invalid value for '$prompt'.\n";
        }
        return @input;
    }
    do{
        print "$prompt [$default]: ";
        $input = <STDIN>;
        mydie "Aborted.\n" if not defined $input;
        chomp $input;
        $input = $default if $input eq '';
    } until @input = $input =~ $pattern;
    return @input;
}

my @param = @ARGV && $ARGV[0] eq '-p' ? <STDIN> : @ARGV;
chomp @param;
if (@param and @param != 5) {
    print <<"    EOU";
    Dutch Railways train schedule information, version $VERSION.
    Made by Juerd <juerd\@juerd.nl>
    Use this at your own risk.

        Usage: $0
        Usage: echo -e "\$FROM\\n\$TO\\n\$DATE\\n\$TIME\\n\$DEPARTURE"
+ | $0 -p
        Usage: $0 \$TO \$FROM \$DATE \$TO \$DEPARTURE

        Example: $0 'Rotterdam Centaal' 'Den Helder' '' 17:00 A
        (Empty parameters take defaults, date/time default to 
current local.)
    EOU
    exit 1;
}

my ($from) = input('From', $defaultfrom, qr/^(.+)$/, $param[0]);
my ($to)   = input('To', '', qr/^(.+)$/, $param[1]);
my ($dd, $mm, $yyyy) =
    input(
        'Date (dd/mm/yyyy)',
        strftime('%d/%m/%Y', localtime),
        qr/^(3[01]|[012]?\d)\W(1[012]|0?\d)\W((?:\d\d)?\d\d)?$/,
        $param[2]
    );
my ($hh, $MM) =
    input(
        'Time',
        strftime('%H:%M', localtime),
        qr/^(2[0-3]|[01]?\d)\W?([0-5]\d)$/,
        $param[3]
    );
my ($da) = input('(D)eparture or (A)rrival', 'D', 
qr/^([VDAvda01])/, $param[4]);$da = 0 + !!($da =~ /[Aa1]/);

$yyyy = "20$yyyy" if length($yyyy) == 2;

###

my $page;
my $tries = 0;

{
    $tries++;
    s/(\W)/sprintf('%%%02x', ord $1)/eg for $from, $to;
    my $url = 'http://www.ns.nl/cgi-ns/nsbaliecgi?' .
        'state=0%3A0%3A0%3A0%3A0%3A0%3A0%3A0%3A0%3A0&' .
        'actie=ok&datumbutton=anders&fromlocmode=1&tolocmode=1&' .
        "from=$from&to=$to&" .
        "daytraveldate=$dd&monthtraveldate=$mm&yeartraveldate=$yyyy&" 
+.
        "hourtraveltime=$hh&minutetraveltime=$MM&tijdbutton=$da";

    eval q{
        use LWP::Simple;
        $page = get($url);
        mydie "Couldn't retrieve webpage.\n" unless $page;
    };
    if ($@){
        mydie "LWP (libwww-perl) is not installed, and lynx cannot be 
+used.\n"
            unless ($ENV{PS1} or $ENV{prompt});
        $page = `lynx -source '$url'`;
        mydie "LWP (libwww-perl) is not installed, and lynx could not 
+be used.\n"
            unless $page;
    }
    if ($tries < 2 and $page =~ /NAME="(?:from|to)"\s+type="hidden"\s*
+VALUE=""/) {
        ($from) = $page =~ /NAME="fromstationselected"\s+type="hidden"
+\s+VALUE=".*:([^\"]+)"/;
        ($to)   = $page =~ /NAME="tostationselected"\s+type="hidden"\s
++VALUE=".*:([^\"]+)"/;
        print "Not found. Trying with from='$from' and to='$to'.\n";
        redo;
    }
}

###

my $tfound = 0;
my @t;

print "\n\n" unless @param;

OUTER: for (split /\r?\n/, $page){
    $1 eq 'Postcode' ? mydie "Vague output\n" : print "$1\n" if 
/<font [^>]*>([^<]*)/;
    $tfound = 1 if /id="?train1div"?/i;
    if ($tfound){
        if (m,</div>,) {
            $tfound = 0;
            my @max = (0)x6;
            for my $t (@t){
                my $i = -1;
                for (@$t){
                    $i++;
                    s/&nbsp;//, s/^\s+|\s+$//g;
                    $max[$i] = length if length > $max[$i];
                }
            }
            for (@t){
                my $z = -1;
                print join ' ', map {
                    $z++;
                my $i = -1;
                for (@$t){
                    $i++;
                    s/&nbsp;//, s/^\s+|\s+$//g;
                    $max[$i] = length if length > $max[$i];
                }
            }
            for (@t){
                my $z = -1;
                print join ' ', map {
                    $z++;
                    $_ . (' ' x ($max[$z] - length))
                } @$_;
                print "\n";
            }
            last OUTER;
        };
        push @t, [] if m,<tr>,;
        #print "\n" if m,</tr>,;
        push @{ $t[-1] }, $1 if />([A-Z0-9a-z][^<]*)/ and
                                $1 ne 'Treinroute' and $1 ne 
'Tussenstations';
    }

}