in reply to Contextual find and replace large config file

Thanks again for your input everyone.

With your help I am now able to change a foreign datafile like:

# comment GlobalParam = 1 Object Type1 { Param1 = Foo NestedObject { Param 1 = Bar } # just another comment } # comment ObjectType2 { Param1 = Quz = z Param2 = 3 NestedObjectX { Param1 = Baz NestedObjectZ { Param1 = Baz } } NestedObjectY { Param1 = 5 } }

by applying a filter like:

[ [ # Filter { 'Object Type1' => { 'Param1' => [ "Foo" ], }, 'GlobalParam' => [ '1' ], # 'Junk' => [ 'more junk' ], # Will break the filter }, # Changes { 'Object Type1' => { 'NestedObject' => { 'Param 1' => "\"Box\"", }, }, } ], [ # Filter { 'Object Type1' => { 'Param1' => [ "Foo" ], 'NestedObject' => { 'Param 1' => [ "Box" ], }, }, # 'GlobalParam' => [ '2' ], # Will disable this filter, # but first filter is still # applied }, # Changes { 'Object Type1' => { 'NestedObject' => { 'Param 1' => "\$curVal . \" Baz\"", }, }, } ], [ # Filter { 'ObjectType2' => { 'Param2' => [ '1', '2', '3' ], }, }, # Changes { 'ObjectType2' => { 'NestedObjectY' => { 'Param1' => "\$curVal * 2", }, }, } ], ] ;

Which changes the configured paramaters into:

# comment GlobalParam = 1 Object Type1 { Param1 = Foo NestedObject { Param 1 = Box Baz } # just another comment } # comment ObjectType2 { Param1 = Quz = z Param2 = 3 NestedObjectX { Param1 = Baz NestedObjectZ { Param1 = Baz } } NestedObjectY { Param1 = 10 } }

edit 2019 Jan 07: Without further testing of this particular program I have removed a '^' from my $re_comment = qr/ ^ \h* \# [^\n]* \n / ; and qr/ (?<pre> ^\h* )because it was killing the performance of this program.

code if you want:

use warnings ; use strict ; use re '/msx' ; use Data::Compare qw/Compare/ ; use Data::Dumper ; # begin + horizontal space + # + Text + new line (consumes new line) my $re_comment = qr/ \h* \# [^\n]* \n / ; # horizontal and vertical space + context=Text (spaces allowed) + # horizontal and vertical space + { + horizontal space optionally # followed by new line (consumes new line) my $re_context = qr/ \s* (?<context> \w(?:[\w\h]*\w)? ) \s* \{ \h*\n* +/ ; # pre=horizontal space + name=Text + mid=h-space + '=' + h-space + # value=Text + post = horizontal space + new line (consumes new line) my $re_namevalue = qr/ (?<pre> \h* ) (?<name> [^\n=]+?) (?<mid> \h*=\h +* ) (?<value> [^\n]+? ) (?<post> \h*\n ) / ; # horizontal or vertical space + } + horizontal space + # optional new line (consumes new line) my $re_endblock = qr/ \s* \} \h*\n* / ; my $parseConfig = [ [ # Filter { 'Object Type1' => { 'Param1' => [ "Foo" ], }, 'GlobalParam' => [ '1' ], # 'Junk' => [ 'more junk' ], # Will break the filter }, # Changes { 'Object Type1' => { 'NestedObject' => { 'Param 1' => "\"Box\"", }, }, } ], [ # Filter { 'Object Type1' => { 'Param1' => [ "Foo" ], 'NestedObject' => { 'Param 1' => [ "Box" ], }, }, # 'GlobalParam' => [ '2' ], # Will disable this filter, # but first filter is still # applied }, # Changes { 'Object Type1' => { 'NestedObject' => { 'Param 1' => "\$curVal . \" Baz\"", }, }, } ], [ # Filter { 'ObjectType2' => { 'Param2' => [ '1', '2', '3' ], }, }, # Changes { 'ObjectType2' => { 'NestedObjectY' => { 'Param1' => "\$curVal * 2", }, }, } ], ] ; sub applyFilter { # Apply filter on given parameter set (non-recursive) my $filter = $_[0] ; # Hash reference of one filter. In case # filter is empty result is true my $test = $_[1] ; # Hash reference with parameter value pairs # and/or context (context will be ignored) my $result = 1 ; foreach my $filterItems ( keys %{$filter} ) { # Ignore context if ( ( ref $filter->{ $filterItems } ) ne 'HASH' ) { my $tmpResult = 0 ; # In case parameter does not exists in $tmpH then the # filter may have a bad configuration, however in other # contexts this parameter may exists, so no errors or # warnings. Since something is specified in this filter, # but it does not exists in this context the filter # silently fails if ( exists $test->{ $filterItems } ) { foreach my $filterItem ( @{$filter->{ $filterItems }} +) { if ( $test->{ $filterItems } eq $filterItem ) { $tmpResult = 1 ; } } } # TO DO: Make procedure more efficient by breaking out of # the foreach loop and return 0 immediately. E.g.: # if ( $tmpResult == 0 ) { # return 0 ; # } $result = $result && $tmpResult ; } } return $result ; } sub reduceConfigSets { # Create a new configuration-set selecting the filters by indexes # and changes to 'one level up' for a given context. # Synopsis: my $reducedParseConfig = # reduceConfigSets( $parseConfig, 'Object Type1', $indexes ) ; my $config = $_[0] ; # Hash ref my $selection = $_[1] ; # Context key my $indexes = $_[2] ; # Array Ref array with indexes my $newConfig = [] ; my $ix = -1 ; foreach ( @{$config} ) { ++$ix ; my $skip = 1 ; foreach( @{$indexes} ) { if ( $ix == $_ ) { $skip = 0 ; last ; } } next if ( $skip ) ; my $newFilter ; my $newChanges ; if ( exists $_->[0]->{ $selection } ) { # Note: no copies! $newFilter = $_->[0]->{ $selection } ; } if ( exists $_->[1]->{ $selection } ) { # Note: no copies! $newChanges = $_->[1]->{ $selection } ; } if ( $newFilter || $newChanges ) { push @$newConfig, [ $newFilter, $newChanges ] ; } } return $newConfig ; } ; my $data = do { local $/ ; <DATA> ; } ; sub processData { my ( $data, $position, $config ) = @_ ; # Refs pos( $$data ) = $position ; my $ident = 0 ; my $tmpH = {} ; while ( pos( $$data ) < length( $$data ) ) { if ( $$data =~ m{\G$re_comment}gc ) { # comment, nothing to do } elsif ( $$data =~ m{\G$re_context}gc ) { if ( $ident == 0 ) { foreach ( @{$config} ) { # If a change exists in config for this # context (change data) if ( exists $_->[1]->{ $+{ context } } ) { $tmpH->{ $+{ context } } = 1 ; } } } ++$ident ; } elsif ( $$data =~ m{\G$re_namevalue}gc ) { if ( $ident == 0 ) { foreach ( @{$config} ) { # If a parameter needs to be collected within the # current context (filter data) # or # If a change exists for this # parameter (change data) if ( ( exists $_->[0]->{ $+{ name } } ) || ( exist +s $_->[1]->{ $+{ name } } ) ) { $tmpH->{ $+{ name } } = $+{ value } ; } } } } elsif ( $$data =~ m{\G$re_endblock}gc ) { if ( $position == 0 ) { die "'}' with no opening '{'?" unless $ident ; } # if ( --$ident == -1 ) { # ? --$ident ; if ( $ident == -1 ) { last ; } } else { die "Failed to parse at: \"" . substr( $$data, pos $$data, 50 ) . "...\"" ; } } $ident = 0 ; # Should not be needed, but just to be safe pos( $$data ) = $position ; while ( pos( $$data ) < length( $$data ) ) { my $repl ; if ( $$data =~ m{\G$re_comment}gc ) { # comment, nothing to do print $repl // substr( $$data, $-[0], $+[0] - $-[0] ) ; } elsif ( $$data =~ m{\G$re_context}gc ) { # TO DO: Check if this actually is going to work # for a triple nested context # in case there is no filter on the second # context level # Though I do think that # if ( exists $tmpH->{ $+{context} } ) { # does the trick my $contextSelected = 0 ; my $useConfigItems = [] ; if ( exists $tmpH->{ $+{ context } } ) { # Parameter(s) needs to be checked within the # current context (filter data) my $changeAllowed = 0 ; my $configIndex = -1 ; foreach my $configItem ( @{$config} ) { ++$configIndex ; my $filter = $configItem->[0] ; my $tmpChangeAllowed = applyFilter( $filter, $tmpH + ) ; if ( $tmpChangeAllowed ) { $changeAllowed = 1 ; push @{$useConfigItems}, $configIndex ; } } if ( $changeAllowed ) { $contextSelected = 1 ; } } if ( $contextSelected ) { print $repl // substr( $$data, $-[0], $+[0] - $-[0] ) +; my $reducedParseConfig = reduceConfigSets( $config, $+ +{context}, $useConfigItems ) ; # Warning: $- and $+ variables can no longer be # used after this call processData( $data, pos( $$data ), $reducedParseConfig + ) ; } else { ++$ident ; print $repl // substr( $$data, $-[0], $+[0] - $-[0] ) +; } } elsif ( $$data =~ m{\G$re_namevalue}gc ) { # Name/value pair always exists inside $tmpH if ( exists $tmpH->{ $+{ name } } ) { foreach my $configItem ( @{$config} ) { # Cascading filters my $filter = $configItem->[0] ; # If there are filters left within this # context (no filter means: allow change) if ( applyFilter( $filter, $tmpH ) ) { # If a parameter needs to be changed within # the current context (change data) my $changes = $configItem->[1] ; foreach( keys %{$changes} ) { if ( ( ref $changes->{ $_ } ) ne 'HASH' ) +{ if ( $_ eq $+{name} ) { my $curVal = $tmpH->{ $+{name} } ; my $newValue ; my $instruction = "\$newValue = " +. $changes->{ $_ } ; eval( $instruction ) ; $repl = $+{ pre } . $+{ name } . $ ++{ mid } . $newValue . $+{ post } ; # Filter is cascading so update # the value inside the temp hash # as well $tmpH->{ $+{ name } } = $newValue +; } } } } } } print $repl // substr( $$data, $-[0], $+[0] - $-[0] ) ; } elsif ( $$data =~ m{\G$re_endblock}gc ) { print $repl // substr( $$data, $-[0], $+[0] - $-[0] ) ; # if ( --$ident == -1 ) # ? --$ident ; if ( $ident == -1 ) { return ; } } else { die "Failed to parse at: \"".substr( $$data, pos $$data, 5 +0 ) . "...\"" ; } } } processData( \$data, 0, $parseConfig ) ; __DATA__ # comment GlobalParam = 1 Object Type1 { Param1 = Foo NestedObject { Param 1 = Bar } # just another comment } # comment ObjectType2 { Param1 = Quz = z Param2 = 3 NestedObjectX { Param1 = Baz NestedObjectZ { Param1 = Baz } } NestedObjectY { Param1 = 5 } }