| Category: | Web |
| Author/Contact Info | Adrian Howard |
| Description: | Easy way to find particular Examples (see the mod for more info):
Update:: As mirod correctly points out this code does not do a good job on some XML code. Don't use it for that (unless you understand where it will break). 'XML' removed from the title. For those who don't realise this will also break on PRE tags, et al. For me this is a feature not a bug :-) |
#! /usr/bin/perl
########
# tgrep - A grep for XML/HTML tags. See POD at bottom for docs
#
# Copyright (C) 2000-2002 Adrian Howard
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
########
use strict;
use warnings;
use Getopt::Long;
use Pod::Usage;
use Fatal qw(open close rename);
use File::Find;
my $OPTIONS = {};
GetOptions( $OPTIONS,
'attr=s@',
'noattr=s@',
'set=s@',
'help|?',
'nobackup',
'first',
) or pod2usage(2);
pod2usage(2) if $OPTIONS->{help};
my $TAG = shift @ARGV || pod2usage({-verbose => 1, -message => 'need a
+ tag to search for'});
pod2usage({-verbose => 1, -message => "search for tag $TAG in what?"})
+ unless @ARGV;
sub has_attribute {
my ($tag, $attr) = @_;
my ($name, $value) = split(/=/, $attr);
my $has_attribute = 0;
if (defined($value)) {
if ($value eq '') {
$has_attribute = 1 if $tag =~ m/\s+$name\s*=\s*(['"])$valu
+e\1/si;
} else {
$has_attribute = 1 if $tag =~ m/\s+$name\s*=\s*(['"]?)$val
+ue\1/si;
};
} else {
$has_attribute = 1 if $tag =~ m/\s+$name[^\w]/si;
};
return($has_attribute);
};
sub tag_matches {
my $tag = shift;
foreach my $attr (@{$OPTIONS->{attr}}) {
return(0) unless has_attribute($tag, $attr);
};
foreach my $attr (@{$OPTIONS->{noattr}}) {
return(0) if has_attribute($tag, $attr);
};
return(1);
};
sub move_original {
my $file = shift;
my $backup = $file . "~";
my $i='';
while (-e "$backup$i") { $i++ };
rename $file, "$backup$i";
return("$backup$i");
};
sub read_file {
my $file = shift;
local $/ = undef;
open(FILE, $file);
my $content = <FILE>;
close(FILE);
return($content);
};
sub clear_attribute_from_tag {
my ($tag, $name) = @_;
$tag =~ s/\s+$name\s*=\s*\w+?\b//gsi;
$tag =~ s/\s+$name\b(\s*=\s*(['"]).*?\2)?//gsi;
return($tag);
};
sub add_attribute_to_tag {
my ($tag, $name, $value) = @_;
my $attr = "$name";
if (defined($value)) {
$attr .= qq!="$value"!;
};
$tag =~ s/(<\w+)/$1 $attr/;
return($tag);
};
sub set_tag_attributes {
my $tag = shift;
return($tag) unless tag_matches($tag);
foreach my $attr (@{$OPTIONS->{set}}) {
my ($name, $value) = split(/=/, $attr);
$tag = clear_attribute_from_tag($tag, $name);
$tag = add_attribute_to_tag($tag, $name, $value);
};
return($tag);
};
sub replace_matches_in_file {
my $file = shift;
my $content = read_file($file);
$content =~ s/(<$TAG[^>]*>?)/set_tag_attributes($1)/sige;
my $original_file = move_original($file);
die "file $file still exists - this should not happen!\n" if -e $f
+ile;
open(FILE, ">$file");
print FILE $content;
close(FILE);
if ($OPTIONS->{nobackup}) {
unlink $original_file;
} else {
print "$file -> $original_file\n";
};
};
sub find_tag {
my $file = $_;
return unless -f $file;
return if $file =~ m/~\d*$/;
my $found;
open(FILE, $file);
LINE: while (my $line = <FILE>) {
next unless $line =~ m/<$TAG/si;
MATCH: while ($line =~ m/<$TAG/si) {
my $line_number = $.;
until ($line =~ m/<$TAG.*?>/) {
my $next_line = <FILE>;
last unless $next_line;
$line .= $next_line;
};
my $tag;
($tag, $line) = ($line =~ m/(<$TAG[^>]*>?)(.*)/si);
next MATCH unless tag_matches($tag);
chomp($tag);
$tag =~ s!([^\x20-\x7E])!sprintf("\\x%02X", ord($1))!gse;
print "$File::Find::name : $line_number : $tag\n";
$found=1;
last LINE if $OPTIONS->{first};
};
};
close(FILE);
replace_matches_in_file($file) if $found && exists $OPTIONS->{set}
+;
};
find(\&find_tag, @ARGV);
__END__
=head1 NAME
tgrep - A grep for XML/HTML tags.
=head1 SYNOPSIS
tgrep [options] tag file ...
List all occurances of the tag in the specified files. Directories are
recursed into.
Options:
-attr ATTR Return tags only if they contain attribute
-noattr ATTR Return tags only if they do not contain attrib
+ute
-set ATTR Replace the attribute in the found tags
-nobackup Don't create backup copies of the original fi
+le
-help Brief help message
-first Only return first match in a file
-? Same as -help
ATTR can be "ATTR" or "ATTR=VALUE". VALUE in -attr or -noattr is treat
+ed as a regular expression.
By default backup files are created of the form FILENAME~, FILENAME~1,
+ FILENAME~2, etc. when you
alter attributes using -set. Files named in this style will never be a
+ffected by tgrep.
Using -first will only show the first match in each file. -set will st
+ill alter all matching tags.
Using -first will increase speed for updates slightly.
Examples:
% tgrep -noattr alt img htdocs
(find all IMG tags without ALT text)
% tgrep -attr width=60 -attr height=40 img htdocs
(find IMG tags whose WIDTH is 60 and HEIGHT is 40)
% tgrep -noattr 'class=.*sidebar' p htdocs
(find paragraph tags whose class doesn't end with "sidebar")
% tgrep -noattr alt -set alt="" -attr src=".*/1x1.gif" img htdocs
(set the ALT text of all "1x1.gif" IMGs without existing ALT text
+to "")
=cut
|
|
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: tgrep - A grep for XML/HTML tags
by larsen (Parson) on Dec 01, 2002 at 15:01 UTC | |
|
Re: tgrep - A grep for XML/HTML tags
by mirod (Canon) on Dec 02, 2002 at 14:19 UTC | |
by adrianh (Chancellor) on Dec 02, 2002 at 14:42 UTC |