| Category: | Information retrieval |
| Author/Contact Info | Juerd <juerd@juerd.nl> |
| Description: | The Nederlandse Spoorwegen (Dutch Railways) decided only MSIE 4+ users may visit their site. Other javascript capable browsers get an annoying message. Someone (nohup) made ns.sh, a shell script that retrieves information using lynx. I took his idea, implemented it in Perl and improved it a little. Please note that I did not implement all the features his script has. Perhaps I should have used HTML::Parser or some table extractor, but this is easier :) Of course, if the NS change their site again, this script will probably be broken. Update (200201091407+0100) New version, takes command line parameters or hides prompts with -p. Also no longer uses STDERR. No-one mentioned $t[$#t] which should of course be written as $t[-1], which makes me wonder if this node is being read :) |
#!/usr/bin/perl -w
use v5.6; # Require perl 5.6.0 or newer
use strict;
use POSIX;
# Based on nohup's ns.sh
$|++;
our $VERSION = '1.23';
my $defaultfrom = 'Dordrecht';
sub mydie {
print @_;
exit 255;
}
sub input {
my ($prompt, $default, $pattern, $force) = @_;
my ($input, @input);
if (defined $force){
$force = $default if $force eq '';
unless (@input = $force =~ $pattern){
mydie "'$force' is an invalid value for '$prompt'.\n";
}
return @input;
}
do{
print "$prompt [$default]: ";
$input = <STDIN>;
mydie "Aborted.\n" if not defined $input;
chomp $input;
$input = $default if $input eq '';
} until @input = $input =~ $pattern;
return @input;
}
my @param = @ARGV && $ARGV[0] eq '-p' ? <STDIN> : @ARGV;
chomp @param;
if (@param and @param != 5) {
print <<" EOU";
Dutch Railways train schedule information, version $VERSION.
Made by Juerd <juerd\@juerd.nl>
Use this at your own risk.
Usage: $0
Usage: echo -e "\$FROM\\n\$TO\\n\$DATE\\n\$TIME\\n\$DEPARTURE"
+ | $0 -p
Usage: $0 \$TO \$FROM \$DATE \$TO \$DEPARTURE
Example: $0 'Rotterdam Centaal' 'Den Helder' '' 17:00 A
(Empty parameters take defaults, date/time default to
current local.)
EOU
exit 1;
}
my ($from) = input('From', $defaultfrom, qr/^(.+)$/, $param[0]);
my ($to) = input('To', '', qr/^(.+)$/, $param[1]);
my ($dd, $mm, $yyyy) =
input(
'Date (dd/mm/yyyy)',
strftime('%d/%m/%Y', localtime),
qr/^(3[01]|[012]?\d)\W(1[012]|0?\d)\W((?:\d\d)?\d\d)?$/,
$param[2]
);
my ($hh, $MM) =
input(
'Time',
strftime('%H:%M', localtime),
qr/^(2[0-3]|[01]?\d)\W?([0-5]\d)$/,
$param[3]
);
my ($da) = input('(D)eparture or (A)rrival', 'D',
qr/^([VDAvda01])/, $param[4]);$da = 0 + !!($da =~ /[Aa1]/);
$yyyy = "20$yyyy" if length($yyyy) == 2;
###
my $page;
my $tries = 0;
{
$tries++;
s/(\W)/sprintf('%%%02x', ord $1)/eg for $from, $to;
my $url = 'http://www.ns.nl/cgi-ns/nsbaliecgi?' .
'state=0%3A0%3A0%3A0%3A0%3A0%3A0%3A0%3A0%3A0&' .
'actie=ok&datumbutton=anders&fromlocmode=1&tolocmode=1&' .
"from=$from&to=$to&" .
"daytraveldate=$dd&monthtraveldate=$mm&yeartraveldate=$yyyy&"
+.
"hourtraveltime=$hh&minutetraveltime=$MM&tijdbutton=$da";
eval q{
use LWP::Simple;
$page = get($url);
mydie "Couldn't retrieve webpage.\n" unless $page;
};
if ($@){
mydie "LWP (libwww-perl) is not installed, and lynx cannot be
+used.\n"
unless ($ENV{PS1} or $ENV{prompt});
$page = `lynx -source '$url'`;
mydie "LWP (libwww-perl) is not installed, and lynx could not
+be used.\n"
unless $page;
}
if ($tries < 2 and $page =~ /NAME="(?:from|to)"\s+type="hidden"\s*
+VALUE=""/) {
($from) = $page =~ /NAME="fromstationselected"\s+type="hidden"
+\s+VALUE=".*:([^\"]+)"/;
($to) = $page =~ /NAME="tostationselected"\s+type="hidden"\s
++VALUE=".*:([^\"]+)"/;
print "Not found. Trying with from='$from' and to='$to'.\n";
redo;
}
}
###
my $tfound = 0;
my @t;
print "\n\n" unless @param;
OUTER: for (split /\r?\n/, $page){
$1 eq 'Postcode' ? mydie "Vague output\n" : print "$1\n" if
/<font [^>]*>([^<]*)/;
$tfound = 1 if /id="?train1div"?/i;
if ($tfound){
if (m,</div>,) {
$tfound = 0;
my @max = (0)x6;
for my $t (@t){
my $i = -1;
for (@$t){
$i++;
s/ //, s/^\s+|\s+$//g;
$max[$i] = length if length > $max[$i];
}
}
for (@t){
my $z = -1;
print join ' ', map {
$z++;
my $i = -1;
for (@$t){
$i++;
s/ //, s/^\s+|\s+$//g;
$max[$i] = length if length > $max[$i];
}
}
for (@t){
my $z = -1;
print join ' ', map {
$z++;
$_ . (' ' x ($max[$z] - length))
} @$_;
print "\n";
}
last OUTER;
};
push @t, [] if m,<tr>,;
#print "\n" if m,</tr>,;
push @{ $t[-1] }, $1 if />([A-Z0-9a-z][^<]*)/ and
$1 ne 'Treinroute' and $1 ne
'Tussenstations';
}
}
|
|
|
|---|