The classic perl challenge: run through a directory tree, search through files of a certain type, find a certain type of thing in the file (in this case, all strings)
#/usr/bin/perl -w #Stringscan by Jeremy Price #Call with perl stringscan.pl "/full/directory/path/from/the/top" #Run through a directory structure, pick the strings out of files, has +h them, #sort them and then print them out with references to the files they c +ame from. #Bugs: Will fail badly on files which include a ";" ie a ; in quotes. use strict; use diagnostics; #Download this from CPAN with 'perl -MCPAN -e "install File::Recurse" use File::Recurse; my $cd = $ARGV[0]; #The directory to be scanned, supplied on th +e command line my @dc; #Directory contents. A list of files in dir my %fl; #String locations (which files are they in?) my $ft; #The type of file we are examining #ft is filetype, a hash of the file languages and the extension to ide +ntify them my %ft=( 'c'=> '\.h$|\.c$', 'perl'=> '\.pl$' ); #An associative array, keyed by filetype, of regular expressions to fi +nd the strings. #To add more, add another 'filetype'=>'regexp' my %search=( 'c'=>'(\"[^;]*)', 'perl'=>'(\"[^;]*)' ); my %vars; #A cheap way of maintaining a list so that each i +tem is unique print "\nNow scanning for all files in directories below: $cd"; my %files = &Recurse([$cd], {match => join('|',keys %ft)}); foreach my $dir (sort keys %files) { foreach my $file (@{ $files{$dir} + }) { push @dc, "$dir/$file"; }} FILE: foreach my $i (@dc){ #WindBlows hack, comment out if you are on a UFS (Unified File Sys +tem). $i=~ s/\//\\/g; #Try to detect the type of file we are examining $ft='none'; #Default (fall through) option foreach my $j ( keys %ft ) {$ft=$j if $i=~ /$ft{$j}/;} #Bail if we couldn't figure it out. next FILE if $ft eq 'none'; open(FH, "<$i") || die "can't open file $i: $!"; print "\n\nThe strings in $i\nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv +\n"; { local($/)=undef; #Change the record s +eparator to nothing for a short while my $filedata=<FH>; #read the whole file + into filedata while ($filedata=~ /$search{$ft}/g){ #Apply the correct reg +ex based on the file type ($ft) add_func($1,$i); #Add it to the hash (s +ee below) } } close FH; } #Output the hash print "\n\nBelow is a list of all the strings found, and then the file +s they were found in\n"; foreach my $i ( keys %fl ) { print "\n\n\n\nThis is the string: \n$i"; print "\n\nIt was found in these files:\n "; print join(',\n', @{ $fl{$i} }); } sub add_func(){ my $string=$_[0]; my $file=$_[1]; #Push the filename onto a list which is referenced by the string push @{ $fl{$string} },$file; }

Replies are listed 'Best First'.
Re: String Finder and Displayer
by fundflow (Chaplain) on Dec 09, 2000 at 20:21 UTC
    The following alias, which i call 'cGrep' searches recursively in all the C files: find . -type f \( -name "*.[cCh]" -o -name \*.cpp \) -print |xargs fgrep !* In tcsh you can set it up as:
    alias cGrep 'find . -type f \( -name "*.[cCh]" -o -name \*.cpp \) -pri +nt |xargs fgrep !*'
    and use it as:
    > cGrep '\"[^;]*'
    It is not doing exactly the same as your script but i find it useful.