in reply to Re: Algorithm to search and replace on data file, based on a table file?
in thread Algorithm to search and replace on data file, based on a table file?


I like your idea of an option for regular expressions. I've used that concept below.

Here is the updated code, using your idea of testing for a match before substituting. It seems to do the trick.
my $replacecount = 0; my %linenums; my @keys_ordered = reverse sort { length $table_ref->{$a} <=> length $ +table_ref->{$b} } keys %{$table_ref}; LINE: while ( my $old_line = <INFILE> ) { my $new_line = $old_line; my $old_count = 0; my $new_count = 0; KEY: for my $key ( @keys_ordered ) { #if ($debug) { print "DEBUG: \$key = $key\n" } my $backtrack_line = $new_line; my $regex = ( $opt_regex ? $key : quotemeta($key) ); if ( $old_count = ( $old_line =~ /$regex/g) ) { $new_count = ( $new_line =~ s/$regex/$table_ref->{$key}/g +); if ( $new_count != $old_count ) { if ($debug) { print "DEBUG: Match count failure for k +ey $key on ($old_count/$new_count) on line $.: $old_line" } $new_count = $old_count; $new_line = $backtrack_line; next KEY; } if ( $debug ) { print "DEBUG: line $. replacing $key with $table_ref- +>{$key}\n"; $linenums{$.}++; } $replacecount += $new_count; } else { } # do nothing } print OUTFILE $new_line; next LINE; } if ( $debug ) { for my $linenum ( sort keys %linenums ) { if ( $linenums{$linenum} > 1 ) { print "DEBUG: WARNING: line $linenum had $linenums{$line +num} replacements!\n"; } } } print "Made $replacecount replacements in $output_file\n"; print "Done.\n\n";

Thanks!

- Chris Koknat
  • Comment on Re^2: Algorithm to search and replace on data file, based on a table file?
  • Download Code

Replies are listed 'Best First'.
Re^3: Algorithm to search and replace on data file, based on a table file?
by koknat (Sexton) on Oct 02, 2009 at 23:10 UTC
    Here is the latest copy of the code, in its entirety.
    It can handle multiple input files, gives usage information, and has several options.

    I kept the regex searching simple, so that it would work on any machine without additional libraries.
    #!/usr/bin/env perl use warnings; use strict; use Getopt::Long; # process command line options use Dumpvalue; # used for debugging hash data structures &usage if ( @ARGV < 3 ); sub usage { print "\n search_replace_table.pl by Chris Koknat special thanks to David Irving and Kenneth K on PerlMonks Purpose: Search and replace on files using a two-column table file Options: -table file Use this file for all search/replace. The format is as follows: search1 replace1 search2 replace2 search3 replace3 Example: foo BA BA BAH -reverse Use column 2 to search, and column 1 to replace Default is to use column 1 to search, and column 2 to replace -suffix The default output filename is the name of the input file, appende +d with _ Use this option to change the suffix -regex Treat the entries in the table file as Perl regular expressions (^ +,\$,[,],etc) Usage examples: search_replace_table.pl -table tablefile file search_replace_table.pl -table tablefile *.stil search_replace_table.pl -table tablefile file -suffix _fixed "; exit; } # Process options my $debug = 0; my $opt_reverse = 0; my $opt_tablefile = ""; my $opt_suffix = "_"; my $opt_regex = 0; Getopt::Long::GetOptions( "debug" => \$debug, "reverse" => \$opt_reverse, "table=s" => \$opt_tablefile, "suffix=s" => \$opt_suffix, "regex" => \$opt_regex, ) or exit; # Banner print "\n\n"; print " ###############################\n"; print " ### search_replace_table.pl ###\n"; print " ###############################\n\n"; # Read table file my $table_ref; if ( $opt_tablefile ) { $table_ref = read_table_file($opt_tablefile,$opt_reverse) } else { print "ERROR: You need to specify a table file with the -table op +tion\n"; print "Exiting.\n\n"; exit 1; } if ( $debug ) { print "\nDEBUG: Dumping \$table_ref:\n"; Dumpvalue->new->dumpValue(\$table_ref); } # Sort the keys early to save time my @keys_ordered = reverse sort { length $a <=> length $b } keys %{$ta +ble_ref}; if ( $debug ) { print "\nDEBUG: \@keys_ordered = @keys_ordered\n" } # Loop through input files while ( @ARGV ) { # $input_file and $output_filenames my $input_file = shift @ARGV; my $output_file = newFilename($input_file . $opt_suffix); if ( $debug ) { print "\nDEBUG: \$output_file = $output_file\n" } # Open input and output files open( "INFILE", "< $input_file" ) or die "ERROR: Cannot open input + file: $input_file\n\n"; print "Reading from input file: $input_file\n"; open( "OUTFILE", "> $output_file" ) or die "ERROR: Cannot open out +put file: $output_file\n\n"; print "Writing to output file: $output_file\n"; # Loop through input file my $replacecount = 0; LINE: while ( my $line = <INFILE> ) { if ($debug>=2) { print "DEBUG: $. $line"; } my $old_line = $line; my $new_line = $line; my $old_count = 0; my $new_count = 0; KEY: for my $key ( @keys_ordered ) { if ($debug) { print "DEBUG: \$key = $key\n" } my $regex = ( $opt_regex ? $key : quotemeta($key) ); if ( $debug ) { print "DEBUG: \$regex = $regex\n" } if ( $debug ) { print "DEBUG: \$old_line = $old_line" } my $backtrack_line = $new_line; my $tmp_line = $old_line; if ( $old_count = ( $tmp_line =~ s/$regex/$table_ref->{$ke +y}/g ) ) { if ( $debug ) { print "DEBUG: \$old_count = $old_coun +t\n" } $new_count = ( $new_line =~ s/$regex/$table_ref->{$key +}/g ); if ( $debug ) { print "DEBUG: \$new_count = $new_coun +t\n" } if ( $debug ) { print "DEBUG: \$new_line = $new_line" + } if ( $new_count != $old_count ) { if ($debug) { print "DEBUG: Match count failure f +or key $key on ($old_count/$new_count) on line $.: $old_line" } $new_count = $old_count; $new_line = $backtrack_line; next KEY; } if ( $debug ) { print "DEBUG: line $. replacing $key +with $table_ref->{$key}\n" } $replacecount += $new_count; } else { } # do nothing } print OUTFILE $new_line; next LINE; } print "Made $replacecount replacements in $output_file\n"; } print "\nDone.\n\n"; ########################### ### END OF MAIN PROGRAM ### ########################### # $ = read_table_file($filename,$opt_reverse) # Reads 2-column table mapping file, returns hash reference sub read_table_file { my $reverse = 0; my $table_file = $_[0]; if ( defined $_[1] and $_[1] == 1 ) { $reverse = 1; } my %map; open( "MAPFILE", "< $table_file" ) or die "ERROR: Cannot open fil +e for reading: $table_file\n\n"; print "Reading from table file $table_file\n"; while ( my $line = <MAPFILE> ) { # blank line or # comment if ( $line =~ /^\s*$/ or $line =~ /^\s*#/ ) { # do nothing } # search_term replace_term # xtals_in XTAL_SSIN elsif ( $line =~ /^\s*(\S+)\s+(\S+)/ ) { my ($key,$value) = ($1,$2); if ( $reverse ) { if ( defined $map{$value} ) { print "ERROR: Table file $table_file contains 2 e +ntries for $value in the right column!\n"; print " 1st entry was: $map{$value} $valu +e\n"; print " 2nd entry was: $key $value on li +ne $.\n"; print "Exiting.\n\n"; exit 1; } else { $map{$value} = $key; } } else { if ( defined $map{$key} ) { print "ERROR: Table file $table_file contains 2 e +ntries for $key in the left column!\n"; print " 1st entry was: $key $map{$key}\n" +; print " 2nd entry was: $key $value on li +ne $.\n"; print "Exiting.\n\n"; exit 1; } else { $map{$key} = $value; } } } # Unknown else { print "ERROR: Did not understand line in config file: $l +ine"; exit 1; } } return \%map; } # Return a valid filename that doesn't already exist # $ = newFilename($filename) sub newFilename { my $filename = $_[0]; while ( -e $filename ) { $filename .= "_"; } return $filename; }

    - Chris Koknat
      My friend Pierre found another way to solve the same problem using the Nedit text editor:

      The following Nedit macro does the same thing you described.
      I haven't tested it on a large file though (the macro below reads the entire file in memory...) This simple example takes a file1 with values to search for and replaces the contents of file2. Nedit has a nice C-like macro language (documented here:www.nedit.org/ftp/contrib/misc/nedit.pdf)

      file1
      value_to_search1 value_to_replace1
      value_to_search1 value_to_replace1
      etc

      A macro can be added to Nedit by selecting:

      Preferences->Default Menu->Customize Menu->Macro Menu

      A dialog window pops up, select New, name your macro add it in the macro editor and click on Apply. After it will appear in the Macro menu and can be executed from there.

      ========== nedit edit macro ====================

      t_print("Automatic Search & Replace","\n")
      file1 = read_file("/home/pierre/tmp/nedit/file1")
      file2= read_file("/home/pierre/tmp/nedit/file2")
      file1_array = split(file1,"\n")

      for(i=0;i<file1_array[]-1;i++) {
      line_array = split(file1_arrayi," ")
      file2= replace_in_string(file2,line_array[0],line_array1)
      }

      write_file(file2,"/home/pierre/tmp/nedit/file2_changed")

      =================================================

      I should note that Pierre's "Nedit" technique doesn't solve the
      foo bar
      ba BAH
      problem, but this may be a good enough solution for many cases.


      - Chris Koknat