#!/usr/bin/perl -w use strict; use LWP::Simple; use Text::Diff; use HTML::Strip; require 5.008_000; my $STORE="/home/charlie/diffs"; my $hs = HTML::Strip->new(); die ("Usage: $0 <URL_TO_DIFF>") unless ($#ARGV==0); my $url=$ARGV[0]; # 'nice' URL my $n_url=$url; $n_url=~s/^http:\/\///; $n_url=~s/\//_/g; my $store_as = (-e "$STORE/$n_url" ) ? "$STORE/$n_url.new" : "$STORE/$n_url"; if (is_success(getstore($url,$store_as))) { unless ($store_as eq "$STORE/$url") { + open (IN, $store_as); my @from=<IN>; close IN; open (IN,"$STORE/$n_url"); my @to=<IN>; close IN; my $from = $hs->parse(join ' ', @from); $from=~tr/[ \t]/ /s; my $to = $hs->parse(join ' ',@to); $to=~tr/[ \t]/ /s; my $diff = diff \$from, \$to; print $diff; rename $store_as, "$STORE/$n_url"; } } else { warn "Storing $store_as failed. Life sucks." } __END__ =head1 NAME web_diff.pl =head2 VERSION 0.1 =head1 SYNOPSIS diff text from a page retrieved off interweb and page stored locally =head1 DESCRIPTION Retrieve and store a page locally If we have a previously stored local copy, Compare retrieved and local page If they are not identical Strip html from them Print a diff =head2 OPTIONS =over =item C<URL TO DIFF> This isn't sanitized in properly, this code is not for use by people you don't trust implicitly :-) =back =head1 REQUIREMENTS =over =item Perl >= 5.8.0 (not tested on earlier versions) =item HTML::Strip =item Text::Diff =item LWP::Simple =back =head1 COPYRIGHT AND LICENCE Copyright (C)2006 Charlie Harvey This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Also available on line: http://www.gnu.org/copyleft/gpl.html =head1 SEE ALSO =cut
In reply to web_diff.pl by ciderpunx
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |