#! /usr/bin/perl ######## # tgrep - A grep for XML/HTML tags. See POD at bottom for docs # # Copyright (C) 2000-2002 Adrian Howard # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA ######## use strict; use warnings; use Getopt::Long; use Pod::Usage; use Fatal qw(open close rename); use File::Find; my $OPTIONS = {}; GetOptions( $OPTIONS, 'attr=s@', 'noattr=s@', 'set=s@', 'help|?', 'nobackup', 'first', ) or pod2usage(2); pod2usage(2) if $OPTIONS->{help}; my $TAG = shift @ARGV || pod2usage({-verbose => 1, -message => 'need a + tag to search for'}); pod2usage({-verbose => 1, -message => "search for tag $TAG in what?"}) + unless @ARGV; sub has_attribute { my ($tag, $attr) = @_; my ($name, $value) = split(/=/, $attr); my $has_attribute = 0; if (defined($value)) { if ($value eq '') { $has_attribute = 1 if $tag =~ m/\s+$name\s*=\s*(['"])$valu +e\1/si; } else { $has_attribute = 1 if $tag =~ m/\s+$name\s*=\s*(['"]?)$val +ue\1/si; }; } else { $has_attribute = 1 if $tag =~ m/\s+$name[^\w]/si; }; return($has_attribute); }; sub tag_matches { my $tag = shift; foreach my $attr (@{$OPTIONS->{attr}}) { return(0) unless has_attribute($tag, $attr); }; foreach my $attr (@{$OPTIONS->{noattr}}) { return(0) if has_attribute($tag, $attr); }; return(1); }; sub move_original { my $file = shift; my $backup = $file . "~"; my $i=''; while (-e "$backup$i") { $i++ }; rename $file, "$backup$i"; return("$backup$i"); }; sub read_file { my $file = shift; local $/ = undef; open(FILE, $file); my $content = <FILE>; close(FILE); return($content); }; sub clear_attribute_from_tag { my ($tag, $name) = @_; $tag =~ s/\s+$name\s*=\s*\w+?\b//gsi; $tag =~ s/\s+$name\b(\s*=\s*(['"]).*?\2)?//gsi; return($tag); }; sub add_attribute_to_tag { my ($tag, $name, $value) = @_; my $attr = "$name"; if (defined($value)) { $attr .= qq!="$value"!; }; $tag =~ s/(<\w+)/$1 $attr/; return($tag); }; sub set_tag_attributes { my $tag = shift; return($tag) unless tag_matches($tag); foreach my $attr (@{$OPTIONS->{set}}) { my ($name, $value) = split(/=/, $attr); $tag = clear_attribute_from_tag($tag, $name); $tag = add_attribute_to_tag($tag, $name, $value); }; return($tag); }; sub replace_matches_in_file { my $file = shift; my $content = read_file($file); $content =~ s/(<$TAG[^>]*>?)/set_tag_attributes($1)/sige; my $original_file = move_original($file); die "file $file still exists - this should not happen!\n" if -e $f +ile; open(FILE, ">$file"); print FILE $content; close(FILE); if ($OPTIONS->{nobackup}) { unlink $original_file; } else { print "$file -> $original_file\n"; }; }; sub find_tag { my $file = $_; return unless -f $file; return if $file =~ m/~\d*$/; my $found; open(FILE, $file); LINE: while (my $line = <FILE>) { next unless $line =~ m/<$TAG/si; MATCH: while ($line =~ m/<$TAG/si) { my $line_number = $.; until ($line =~ m/<$TAG.*?>/) { my $next_line = <FILE>; last unless $next_line; $line .= $next_line; }; my $tag; ($tag, $line) = ($line =~ m/(<$TAG[^>]*>?)(.*)/si); next MATCH unless tag_matches($tag); chomp($tag); $tag =~ s!([^\x20-\x7E])!sprintf("\\x%02X", ord($1))!gse; print "$File::Find::name : $line_number : $tag\n"; $found=1; last LINE if $OPTIONS->{first}; }; }; close(FILE); replace_matches_in_file($file) if $found && exists $OPTIONS->{set} +; }; find(\&find_tag, @ARGV); __END__ =head1 NAME tgrep - A grep for XML/HTML tags. =head1 SYNOPSIS tgrep [options] tag file ... List all occurances of the tag in the specified files. Directories are recursed into. Options: -attr ATTR Return tags only if they contain attribute -noattr ATTR Return tags only if they do not contain attrib +ute -set ATTR Replace the attribute in the found tags -nobackup Don't create backup copies of the original fi +le -help Brief help message -first Only return first match in a file -? Same as -help ATTR can be "ATTR" or "ATTR=VALUE". VALUE in -attr or -noattr is treat +ed as a regular expression. By default backup files are created of the form FILENAME~, FILENAME~1, + FILENAME~2, etc. when you alter attributes using -set. Files named in this style will never be a +ffected by tgrep. Using -first will only show the first match in each file. -set will st +ill alter all matching tags. Using -first will increase speed for updates slightly. Examples: % tgrep -noattr alt img htdocs (find all IMG tags without ALT text) % tgrep -attr width=60 -attr height=40 img htdocs (find IMG tags whose WIDTH is 60 and HEIGHT is 40) % tgrep -noattr 'class=.*sidebar' p htdocs (find paragraph tags whose class doesn't end with "sidebar") % tgrep -noattr alt -set alt="" -attr src=".*/1x1.gif" img htdocs (set the ALT text of all "1x1.gif" IMGs without existing ALT text +to "") =cut

In reply to tgrep - A grep for HTML tags by adrianh

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.