#! /usr/bin/perl ######## # tgrep - A grep for XML/HTML tags. See POD at bottom for docs # # Copyright (C) 2000-2002 Adrian Howard # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA ######## use strict; use warnings; use Getopt::Long; use Pod::Usage; use Fatal qw(open close rename); use File::Find; my $OPTIONS = {}; GetOptions( $OPTIONS, 'attr=s@', 'noattr=s@', 'set=s@', 'help|?', 'nobackup', 'first', ) or pod2usage(2); pod2usage(2) if $OPTIONS->{help}; my $TAG = shift @ARGV || pod2usage({-verbose => 1, -message => 'need a tag to search for'}); pod2usage({-verbose => 1, -message => "search for tag $TAG in what?"}) unless @ARGV; sub has_attribute { my ($tag, $attr) = @_; my ($name, $value) = split(/=/, $attr); my $has_attribute = 0; if (defined($value)) { if ($value eq '') { $has_attribute = 1 if $tag =~ m/\s+$name\s*=\s*(['"])$value\1/si; } else { $has_attribute = 1 if $tag =~ m/\s+$name\s*=\s*(['"]?)$value\1/si; }; } else { $has_attribute = 1 if $tag =~ m/\s+$name[^\w]/si; }; return($has_attribute); }; sub tag_matches { my $tag = shift; foreach my $attr (@{$OPTIONS->{attr}}) { return(0) unless has_attribute($tag, $attr); }; foreach my $attr (@{$OPTIONS->{noattr}}) { return(0) if has_attribute($tag, $attr); }; return(1); }; sub move_original { my $file = shift; my $backup = $file . "~"; my $i=''; while (-e "$backup$i") { $i++ }; rename $file, "$backup$i"; return("$backup$i"); }; sub read_file { my $file = shift; local $/ = undef; open(FILE, $file); my $content = ; close(FILE); return($content); }; sub clear_attribute_from_tag { my ($tag, $name) = @_; $tag =~ s/\s+$name\s*=\s*\w+?\b//gsi; $tag =~ s/\s+$name\b(\s*=\s*(['"]).*?\2)?//gsi; return($tag); }; sub add_attribute_to_tag { my ($tag, $name, $value) = @_; my $attr = "$name"; if (defined($value)) { $attr .= qq!="$value"!; }; $tag =~ s/(<\w+)/$1 $attr/; return($tag); }; sub set_tag_attributes { my $tag = shift; return($tag) unless tag_matches($tag); foreach my $attr (@{$OPTIONS->{set}}) { my ($name, $value) = split(/=/, $attr); $tag = clear_attribute_from_tag($tag, $name); $tag = add_attribute_to_tag($tag, $name, $value); }; return($tag); }; sub replace_matches_in_file { my $file = shift; my $content = read_file($file); $content =~ s/(<$TAG[^>]*>?)/set_tag_attributes($1)/sige; my $original_file = move_original($file); die "file $file still exists - this should not happen!\n" if -e $file; open(FILE, ">$file"); print FILE $content; close(FILE); if ($OPTIONS->{nobackup}) { unlink $original_file; } else { print "$file -> $original_file\n"; }; }; sub find_tag { my $file = $_; return unless -f $file; return if $file =~ m/~\d*$/; my $found; open(FILE, $file); LINE: while (my $line = ) { next unless $line =~ m/<$TAG/si; MATCH: while ($line =~ m/<$TAG/si) { my $line_number = $.; until ($line =~ m/<$TAG.*?>/) { my $next_line = ; last unless $next_line; $line .= $next_line; }; my $tag; ($tag, $line) = ($line =~ m/(<$TAG[^>]*>?)(.*)/si); next MATCH unless tag_matches($tag); chomp($tag); $tag =~ s!([^\x20-\x7E])!sprintf("\\x%02X", ord($1))!gse; print "$File::Find::name : $line_number : $tag\n"; $found=1; last LINE if $OPTIONS->{first}; }; }; close(FILE); replace_matches_in_file($file) if $found && exists $OPTIONS->{set}; }; find(\&find_tag, @ARGV); __END__ =head1 NAME tgrep - A grep for XML/HTML tags. =head1 SYNOPSIS tgrep [options] tag file ... List all occurances of the tag in the specified files. Directories are recursed into. Options: -attr ATTR Return tags only if they contain attribute -noattr ATTR Return tags only if they do not contain attribute -set ATTR Replace the attribute in the found tags -nobackup Don't create backup copies of the original file -help Brief help message -first Only return first match in a file -? Same as -help ATTR can be "ATTR" or "ATTR=VALUE". VALUE in -attr or -noattr is treated as a regular expression. By default backup files are created of the form FILENAME~, FILENAME~1, FILENAME~2, etc. when you alter attributes using -set. Files named in this style will never be affected by tgrep. Using -first will only show the first match in each file. -set will still alter all matching tags. Using -first will increase speed for updates slightly. Examples: % tgrep -noattr alt img htdocs (find all IMG tags without ALT text) % tgrep -attr width=60 -attr height=40 img htdocs (find IMG tags whose WIDTH is 60 and HEIGHT is 40) % tgrep -noattr 'class=.*sidebar' p htdocs (find paragraph tags whose class doesn't end with "sidebar") % tgrep -noattr alt -set alt="" -attr src=".*/1x1.gif" img htdocs (set the ALT text of all "1x1.gif" IMGs without existing ALT text to "") =cut