sourcecode
ambrus
<code>
#!/usr/local/bin/perl
=head1 NAME
cgrep - print lines matching a pattern
=head1 SYNOPSYS
B<cgrep> [I<options>] I<PATTERN> [I<FILES>]
=head1 DESCRIPTION
B<Cgrep> searches for I<PATTERN> in text files, and prints matching lines.
I<PATTERN> can be a perl regular expression, or a fixed string.
B<Cgrep> reads all files specified in I<FILES>, or stdin if no I<FILES>
are given. Directories are ignored.
Every line is matched with I<PATTERN> separately. I<PATTERN>
is interpreted as a perl regular expression by default, but as a fixed
string if the B<-F> flag is set. In normal operation mode, B<cgrep> prints
matching lines. Exit code is 0 if any matches are found, 1 if no matches
were found.
=head1 OPTIONS
Long options can be abbreviated to a unique prefix, and can be
negated with a B<no-> prefix. A double hyphen stops option processing,
allowing you to use filenames starting with a minus.
=over
=item B<-A> I<NUM1>, B<--after-context=>I<NUM1>;
B<-B> I<NUM2>, B<--before-context=>I<NUM2>
Print context lines around matches, as with B<-C>.
These options, however, set the number of context lines
after and before matches to I<NUM1> and I<NUM2> respectively.
If only one of the options is given, context lines will
be printed only after or only before the match resp.
=item B<-C> I<NUM>, B<--context=>I<NUM>
Print I<NUM> lines of context before and after each match.
Also, print a line with a lone dash between continuous chunks
of lines.
If any of B<-n>, B<-h>, B<-p>, B<-P> is enabled, the separator
before the line and the prefixes will be a colon on
matching lines, and a hyphen in context lines.
=item B<-e> I<PATTERN>, B<--pattern=>I<PATTERN>
A different way to specify I<PATTERN>.
If this option is used, all non-option arguments are filenames.
If not used, the first non-option argument is the I<PATTERN>.
=item B<-E>, B<--perl-regexp>
Interpret I<PATTERN> as a perl regexp. This is the default.
Perl regexps are NFA, see L<perlre(1)> for details of syntax and semantics.
=item B<-F>, B<--fixed-strings>
Interpret I<PATTERN> as fixed string, not a regexp.
=item B<-h>, B<--with-no-filename>
Do not print filename before each line printed.
This is the default if there is only one I<FILE> given.
=item B<-H>, B<--with-filename>
Print filename before each line. This is the default if
more than one I<FILES> are given.
=item B<-i>, B<--fold-case>
Interpret I<PATTERN> case insensitively. Works with both
regexp pattern and fixed strings.
=item B<-l>, B<--files-with-match>
Print the lists of files that have any matching lines.
Stop reading the file at the first match.
This supresses normal operation mode, that is, printing
matching lines. The options B<-ABChHnpP> do not make
sense in combination with B<-l>.
=item B<-L>, B<--files-without-match>
Prints the lists of files having no matching lines.
Similar to B<-l> otherwise.
=item B<-n>, B<--line-number>
Print line number before each line printed.
=item B<-p>, B<--show-function>
Prints current function name before each line.
=item B<-P> I<REGEXP>, B<--function-regexp=>I<REGEXP>
Defines how function names are found. Implies B<-p>.
Lines matching I<REGEXP> are taken to be function
headers. I<REGEXP> is a perl regexp, unaffected by
B<-F>, B<-i>, B<-v>. The last matching capture in it
should return the function name.
The default regexp is C<^(\w+)>, suitable for C.
=item B<-s>, B<--no-messages>
Do not print warnings about files that can't be opened.
=item B<-q>, B<--silent>
Do not print anything, just give the return value.
Stops execution at very first match.
=item B<-v>, B<--invert-match>
Inverts the sense of the match, that is, searches for
line not matching I<PATTERN>
=back
=head1 SEE ALSO
L<egrep(1)>, L<perlre(1)>
=head1 AUTHOR
<ambrus@math.bme.hu>
=cut
use warnings;
use strict;
use Getopt::Long;
use IO::Handle;
{
my($cregexp, @filenames, $complement, $found_any, $match, $endfile,
$print_filename, $print_lineno, $before_context, $after_context,
$context, $hush_messages, $print_func, $func_cregexp,
$file, $filename, $anyoutput, $lastoutput);
sub main {
parseopts();
$found_any = 0;
!@filenames and @filenames = undef;
for my $n (@filenames) {
$filename = $n;
if (defined($filename)) {
open $file, "<", $n or do {
$hush_messages or warn qq[error opening file "$n": $!];
next;
};
} else {
$file = *STDIN;
}
-d $file and next;
process_file();
close $file;
}
!$found_any;
}
sub process_file {
my($line, $func, $m, @before, $b, $after);
($after, $func, $lastoutput) = (0, undef, -1);
READ: {
while (defined($line = <$file>)) {
chomp $line;
$print_func and
$line =~ /$func_cregexp/ and
$func = $+;
$m = $line =~ /$cregexp/;
if ($m xor $complement) {
$found_any = 1;
$context and do {
for $b (@before)
{ &$match(@$b[0, 1, 2]), 0 }
@before = ();
$after = $after_context;
};
&$match($line, input_line_number $file, $func, 1);
} else {
$context and do {
if (0 < $after) {
&$match($line, input_line_number $file, $func, 0);
$after--;
} else {
push @before, [$line, input_line_number $file, $func];
$before_context < @before and shift @before;
}
};
}
}
&$endfile()
}
}
sub nextfile {
no warnings "exiting";
last READ;
}
sub print_match {
my($line, $lineno, $func, $m) = @_;
$context and do {
$lastoutput != $lineno - 1 && $anyoutput and
print "--\n";
$anyoutput = 1;
$lastoutput = $lineno;
};
$print_filename and print $filename, $m ? ":" : "-";
$print_func and defined($func) and print $func, $m ? ":" : "-";
$print_lineno and print $lineno, $m ? ":" : "-";
print $line, "\n";
}
sub print_name_exit {
$_[3] or return;
print $filename, "\n";
nextfile;
}
sub found_exit {
$_[3] or return;
nextfile;
}
sub found_exit_zero {
$_[3] or return;
return 0;
}
sub print_filename {
print $filename, "\n";
}
sub noop {
}
sub parseopts {
my($regexp, $plain, $ignorecase, $mods, $mode);
($before_context, $after_context, $mode, $func_cregexp) = (0, 0, "", qr/^(\w+)/);
Getopt::Long::Configure "bundling", "gnu_compat", "prefix_pattern=(--|-)";
GetOptions(
"fixed-strings|F!", sub { $plain = $_[1] },
"extended-regexp|perl-regexp|E!", sub { $plain = !$_[1] },
"pattern|e=s", sub { $regexp = $_[1] },
"invert-match|complement-match|v!", sub { $complement = $_[1] },
"filename|with-filename|H!", sub { $print_filename = $_[1] },
"with-no-filename|h!", sub { $print_filename = !$_[1] },
"igore-case|fold-case|y|i!", sub { $ignorecase = $_[1] },
"line-number|n!", sub { $print_lineno = $_[1]; },
"context|C=n", sub { $after_context = $before_context = $_[1] },
"after-context|A=n", sub { $after_context = $_[1] },
"before-context|B=n", sub { $before_context = $_[1] },
"files-with-match|list|l!", sub { $mode = $_[1] ? "l" : "" },
"file-without-match|missing|L!", sub { $mode = $_[1] ? "L" : "" },
"quiet|silent|q", sub { $mode = $_[1] ? "q" : "" },
"no-messages|hush-messages|s", sub { $hush_messages = $_[1] },
"show-function|function|p!", sub { $print_func = $_[1] },
"function-regexp|P=s", sub { $func_cregexp = qr/$_[1]/; $print_func = 1 },
);
defined($regexp) or $regexp = shift @ARGV;
defined($regexp) or die 'required argument missing; usage: cgrep [options] regexp [filenames]';
!$plain && $regexp=~/\<|\>/ and do
{ $hush_messages or warn 'warning: \< and \> are not special in perl regexen' };
$plain and $regexp = quotemeta $regexp;
$mods = $ignorecase ? "i" : "";
$cregexp = qr/(?$mods:$regexp)/;
@filenames = @ARGV;
defined($print_filename) or $print_filename = 1 < @filenames;
0 < $before_context || 0 < $after_context and do {
$context = 1; $before_context ||= 0; $after_context ||= 0;
};
$endfile = \&noop;
if ($mode eq "l")
{ $match = \&print_name_exit }
elsif ($mode eq "L")
{ ($match, $endfile) = (\&found_exit, \&print_filename) }
elsif ($mode eq "q")
{ $match = \&found_exit_zero }
elsif ($mode eq "")
{ $match = \&print_match }
else
{ die "internal error: invalid mode: $mode" }
}
exit main();
}
__END__
</code>
<p>
<b>Important:</b> You can download the actual newest version of this program from [http://www.math.bme.hu/~ambrus/pu/cgrep]. That version has recursive search in a directory tree and other useful stuff. I won't update the old version here anymore.
<P>
This is an egrep clone that can display the
name of the function in which the matching line is in,
together with the line.
This feature is already present in gnu diff, and
I found it very useful for diffing C code,
so I wrote this to have it in grep too.
Use the -p or -P switch to enable the feature.
<p>
This uses perl regexps instead of posix regexps,
and is much slower then the real egrep.
<P>
<B>Update:</b> there appears to be a bug:
cgrep sometimes does not print the -- delimiters
in context mode. <B>Update 2:</b> More
precisely, there's no delimiter printed between
chunks in different files. Also [YuckFoo] has noted
that the -B switch does not work. I'll post a patch
asap.
<P>
<b>Update:</b> these bugs (and one more undocumented bug) have been fixed. You see the new code here, and I'll post the diff from the old code to the new code as a reply.
<p>
<b>Update</b> 2006 dec 17: see also [id://586862].
<p>
<b>Update</b> 2007 jun 5: see also [id://573020].
<p>
<b>Update</b> 2007 aug 29: the -q option is buggy. I will fix it in the version at [http://www.math.bme.hu/~ambrus/pu/cgrep] but not in the older version in this node. The bug is in the found_exit_zero function.
<p>
<b>Update</b> 2008 nov 14: see also [id://723699].
Text Processing
<P>
You can download the actual newest version of this program from [http://www.math.bme.hu/~ambrus/pu/cgrep].