#!/usr/bin/perl use strict; use LWP::Simple; use Regexp::Bind qw(global_bind); my ($year, $mon, $mday) = @ARGV[0..2]; my $template = qr'\d+)>(?#\w.+?)</B><BR><BR>.+?<table width=75%>\n< tr><td>\n?(?#<text>.+?)\n?(?:<BR>){3}.+?</table>'s; sub url { sprintf("http://www.germnews.de/archive/$_[0]/%4d/%02d/%02d.html", $year, $mon, $mday); } sub get_url { [get(url('dn')), get(url('gn'))] } sub regexp_bind { map { my @c; foreach my $t (global_bind($_, $template)){ $c[$t->{number}] = [ $t->{title}, $t->{text} ]; } \@c; } @{$_[0]} } sub output { my ($a, $b) = @_; <<"."; <NEWS> <TITLE> <EN> $a->[0] </EN> <DE> $b->[0] </DE> $a->[1] $b->[1] . } my($a, $b) = regexp_bind(get_url($year, $mon, $mday)); foreach (0..(@$a >= @$b ? $#$a : $#$b)){ next unless $a->[$_][0] && $b->[$_][0]; print output($a->[$_], $b->[$_]); }