in reply to Re^2: regex causing segmentation fault (core dump)
in thread regex causing segmentation fault (core dump)

the regex is supposed to get... a number value which represent a specific interface and a digit or a string as the second value depends on what that series of digits represent in the beginning

I'm not sure I get all that, but it sounds like you should just focus on simplifying the process. Split the scalar up into lines, and look for matches one line at a time. I would expect that to simplify a lot of things for you. If you have trouble with that, tell us more about what you're actually trying to accomplish, show us a little bit of input data, what you want to extract from it, etc.

  • Comment on Re^3: regex causing segmentation fault (core dump)

Replies are listed 'Best First'.
Re^4: regex causing segmentation fault (core dump)
by Otogi (Beadle) on Apr 03, 2006 at 22:38 UTC
    Splitting it into lines is a major design change, although if I find I have no choice I will walk that route. I have simplified the program a lot to just be sure that nothing else is causing it like one of the modules (DBI,DBD::Oracle,Data::Dumper,Posix) it still segfaults here is a copy of the gdb debug output:

    #4989 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #4990 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #4991 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #4992 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #4993 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #4994 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #4995 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #4996 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #4997 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #4998 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #4999 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #5000 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #5001 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #5002 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #5003 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #5004 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #5005 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #5006 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #5007 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #5008 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #5009 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #5010 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #5011 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #5012 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #5013 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #5014 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #5015 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #5016 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #5017 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #5018 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #5019 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #5020 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #5021 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #5022 0x080f7821 in S_regmatch (prog=0x841d358) at regexec.c:3254 #5023 0x080f6817 in S_regmatch (prog=0x841d350) at regexec.c:3784 #5024 0x080f7821 in S_regmatch (prog=0x841d360) at regexec.c:3254 #5025 0x080f7607 in S_regmatch (prog=0x841d344) at regexec.c:3104 #5026 0x080f3d2c in S_regtry (prog=0x841d300, startpos=0xb2939008 "3503 responses for oid .1.3.6.1.4.1.9.2.2.1.1 +: \n\t.1.3.6.1.4.1.9.2.2.1.1.1.1 = \"te/test test\" (OCTETSTR)\n\t.1. +3.6.1.4.1.9.2.2.1.1.1.2 = \"te/test test\" (OCTETSTR)\n\t.1.3.6.1.4.1 +.9.2.2.1.1.1.3 = \"Pac"...) at regexec.c:2185 #5027 0x080f3849 in Perl_regexec_flags (prog=0x841d300, stringarg=0xb2939008 "3503 responses for oid .1.3.6.1.4.1.9.2.2.1. +1: \n\t.1.3.6.1.4.1.9.2.2.1.1.1.1 = \"1test\" (OCTETSTR)\n\t.1.3.6.1. +4.1.9.2.2.1.1.1.2 = \"test\" (OCTETSTR)\n\t.1.3.6.1.4.1.9.2.2.1.1.1.3 + = \"Pac"..., strend=0xb295d493 ".1.1.107.22 = 0 (COUNTER)\n\t.1.3.6.1.4.1.9.2.2 +.1.1.107.23 = 1219 (COUNTER)\n\t.1.3.6.1.4.1.9.2.2.1.1.107.24 = 0 (CO +UNTER)\n\t.1.3.6.1.4.1.9.2.2.1.1.107.25 = 0 (COUNTER)\n\t.1.3.6.1.4.1 +.9.2.2.1.1.107.26 = 427"..., strbeg=0xb2939008 "3503 responses for oid .1.3.6.1.4.1.9.2.2.1.1: +\n\t.1.3.6.1.4.1.9.2.2.1.1.1.1 = \"test\" (OCTETSTR)\n\t.1.3.6.1.4.1. +9.2.2.1.1.1.2 = \"test" (OCTETSTR)\n\t.1.3.6.1.4.1.9.2.2.1.1.1.3 = \" +te"..., minend=0, sv=0x828c714, data=0x0, flags=2) at regexec.c:2012
    Here is the script simplified but produces the same error:

    #!/usr/local/bin/perl use DBI; use Data::Dumper; use strict; use POSIX; my $USER = ''; my $PASS = ''; my $HOST = ''; my $SID = ''; my $PROCESS_STAGE = 'SNMP Collection'; my $PROCESS_ID = 'Test PID 2'; my $PROCESS_TS = '2006-03-28-09-38-59-161895'; my $dbh = DBI->connect("dbi:Oracle:host=$HOST;sid=$SID", $USER,$PASS); my $sth = $dbh->prepare("SELECT a.process_id,a.process_ts,a.process_st +age,b.ekl_set,b.mapping FROM sys_ekl_ipt_001 a, cor_ekl_set_dfn b WHERE a.ekl_set = b.ekl_set"); $sth->execute; my ( $PATH_PREFIX, $PARENT_MAPPING, $PARENT_ELEMENT, $RULE, $DATA_NATURE, $TABLE_SUFFIX, $ELEMENT, $COLUMN_NAME, $PRIORITY_GLOBAL, $PRIORITY_LOCAL, $regex ); my ($PROCESS_ID,$PROCESS_TS,$EKL_SET,$MAPPING,$PROCESS_STAGE); my (%DATA,%OUTPUT,%TABLE,%leaves); #Data Structures my %REGEX; while (($PROCESS_ID,$PROCESS_TS,$PROCESS_STAGE,$EKL_SET,$MAPPING) = $s +th->fetchrow_array) { my $sth2 = $dbh->prepare("SELECT a.mapping, a.path_prefix,a.parent_m +apping, a.parent_element,a.rule,a.data_nature,a.tabl +e_suffix, b.element,b.column_name,b.priority_local,b.p +riority_global FROM cor_ekl_map a, cor_ekl_map_dfn b WHERE a.mapping = '$MAPPING' and a.mapping = + b.mapping"); $sth2->execute; while ( ($MAPPING,$PATH_PREFIX,$PARENT_MAPPING,$PARENT_ELEMENT,$RULE, +$DATA_NATURE,$TABLE_SUFFIX, $ELEMENT,$COLUMN_NAME,$PRIORITY_LOCAL,$PRIORITY_GLOBAL) = $st +h2->fetchrow_array) { my $table_name = join '_',('dat',$DATA_NATURE,$TABLE_SUFFIX); my $dth = $dbh->prepare("SELECT regex FROM cor_ekl_rul WHERE rule = '$RULE'"); $dth->execute; $regex = $dth->fetchrow_array; open (OUTFILE,">REGEX/$RULE"); print OUTFILE $regex; close (OUTFILE); $DATA {$MAPPING}{path_prefix} = $PATH_PREFIX; $REGEX {$MAPPING} = $RULE; $DATA {$MAPPING}{parent_element} = $PARENT_ELEMENT if defined ($P +ARENT_ELEMENT); $DATA {$MAPPING}{parent_mapping} = $PARENT_MAPPING if defined ($P +ARENT_MAPPING); $DATA {$MAPPING}{process_id} = $PROCESS_ID if defined ($PROCESS_I +D); $DATA {$MAPPING}{process_ts} = $PROCESS_TS if defined ($PROCESS_T +S); $DATA {$MAPPING}{process_stage} = $PROCESS_STAGE if defined ($PRO +CESS_STAGE); $DATA {$MAPPING}{table_name}= $table_name; push @{$DATA {$MAPPING}{element}}, $ELEMENT if defined ($ELEMENT) +; push @{$DATA {$MAPPING}{priority_local}},$PRIORITY_LOCAL if defin +ed ($PRIORITY_LOCAL); push @{$DATA {$MAPPING}{priority_global}},$PRIORITY_GLOBAL if def +ined ($PRIORITY_GLOBAL); push @{$DATA {$MAPPING}{column_name}},$COLUMN_NAME if defined ($C +OLUMN_NAME); my $primarykeys = $dbh->prepare ("SELECT column_name,column_type FR +OM cor_dat_col WHERE data_nature = '$DATA_NATURE' AND table_suffix = '$TABLE_SUFFIX' +"); $primarykeys->execute; my @pks = ('process_id','process_ts','process_stage'); my @columns; my ($column,$type); while (($column,$type) = $primarykeys->fetchrow) { push (@pks ,$column) if $type eq 'pk'; push (@columns,$column); } $TABLE{$table_name}{numofkeys} = scalar(@pks); my $TEST = $DATA{$PARENT_MAPPING}; my $child_mapping = $MAPPING; if ($PARENT_MAPPING ne '{N/A}') { if (!defined($TEST)) { my $sth3 = $dbh->prepare("SELECT a.mapping, a.path_prefix,a.p +arent_mapping, a.parent_element,a.rule,a.data_nature,a.tabl +e_suffix, b.element,b.column_name,b.priority_local,b.p +riority_global FROM cor_ekl_map a, cor_ekl_map_dfn b WHERE a.mapping = '$PARENT_MAPPING' and a.ma +pping = b.mapping"); $sth3->execute; while(($MAPPING,$PATH_PREFIX,$PARENT_MAPPING,$PARENT_ELEMENT, +$RULE,$DATA_NATURE,$TABLE_SUFFIX, $ELEMENT,$COLUMN_NAME,$PRIORITY_LOCAL,$PRIORITY_GLOBAL) = $st +h3->fetchrow_array) { my $dth = $dbh->prepare("SELECT regex FROM cor_ekl_rul WHERE rule = '$RULE'"); $dth->execute; $regex = $dth->fetchrow_array; open (OUTFILE,">REGEX/$RULE"); print OUTFILE $regex; close (OUTFILE); $DATA {$MAPPING}{path_prefix} = $PATH_PREFIX; $REGEX{$MAPPING} = $RULE; $DATA {$MAPPING}{parent_element} = $PARENT_ELEMENT; $DATA {$MAPPING}{parent_mapping} = $PARENT_MAPPING; $DATA {$MAPPING}{process_id} = $PROCESS_ID; $DATA {$MAPPING}{process_ts} = $PROCESS_TS; $DATA{$MAPPING}{process_stage} = $PROCESS_STAGE; $DATA {$MAPPING}{table_name} = join '_',('dat',$DATA_NATURE,$T +ABLE_SUFFIX); push @{$DATA {$MAPPING}{element}}, $ELEMENT; push @{$DATA {$MAPPING}{priority_local}}, $PRIORITY_LOCAL; push @{$DATA {$MAPPING}{priority_global}},$PRIORITY_GLOBAL; push @{$DATA {$MAPPING}{column_name}},$COLUMN_NAME; push @{$DATA {$MAPPING}{child_mappings}},$child_mapping; } } else { my $is_there = 0; foreach my $c (@{$DATA{$PARENT_MAPPING}{child_mappings}}) +{ if ($c eq $child_mapping) { $is_there = 1; } } if ($is_there == 0) { push @{$DATA{$PARENT_MAPPING}{child_mappings}},$child_map +ping; $is_there = 0;} } } undef($dth); } } undef($sth); $dbh->disconnect; undef ($dbh); print Dumper %DATA; foreach $MAPPING (keys %DATA) { my $one_fn = $DATA{$MAPPING}{path_prefix}; if ($one_fn ne '{N/A}') { $PROCESS_ID = $DATA{$MAPPING}{process_id}; $PROCESS_TS = $DATA{$MAPPING}{process_ts}; $PROCESS_STAGE = $DATA{$MAPPING}{process_stage}; opendir(TWO, "/shaw/data/link/$PROCESS_ID/$PROCESS_TS/$PROCESS_STAGE +/$one_fn") || die "Cannot open /shaw/data/link/$PROCESS_ID/$PROCESS_TS/$PROCE +SS_STAGE/$one_fn: $!"; my @two = grep(!/^\.\.?$/, readdir TWO); foreach my $two_fn (@two) { opendir(THREE, "/shaw/data/link/$PROCESS_ID/$PROCESS_TS/$PROCESS_STAG +E/$one_fn/$two_fn") || die "$!"; my @three = grep (!/^\.\.?$/, readdir THREE); my $three_fn = less(@three); my $TIMESTAMP = $three_fn; my $DEVICE = $two_fn; my $file = "/shaw/data/link/$PROCESS_ID/$PROCESS_TS/$PROCESS_STAGE +/$one_fn/$DEVICE/$TIMESTAMP"; open (INFILE,"<$file") || die "Cannot open $file: $!"; print "\n $file \n"; my $input =(); read INFILE, my $input,-s $file; close(INFILE); my $pmap = $DATA{$MAPPING}{parent_mapping}; if ($pmap eq '{N/A}') { #print "starting regex\n"; print $REGEX{$MAPPING}; open reg,"<REGEX/$REGEX{$MAPPING}"; read reg, my $regex,-s "REGEX/$REGEX{$MAPPING}"; print " \n REGEXP \n"; print Dumper $regex; $input =~/$regex/xg; # print Dumper @a; } } } } sub less { my (@a) = @_; my ($less,$value,$lessp,$val); foreach my $val (@a) { my $value = $val; my $lessp = $less; if(defined($less)){$lessp =~ s/-//g}; $val =~ s/-//g; if (!defined($less)||$lessp < $val) { $less = $value; } } return $less; }

    If you see anything wrong here that could be causing the segfault please let me know and I will investigate more. Lots of useless stuff has been added for testing.

      Well, that certainly is a lot of detail -- most of which doesn't really shed light on the basic problem. We do at least get to see that you really are pulling a regex out of a database, and applying it to the full content of a data file. I presume the regex at the top of the thread indicates what comes from the database, but you haven't shown us what is in the file, or how you built the regex that went into the database. That might matter.
      Splitting it into lines is a major design change

      I assume this is because of how the regexes are being loaded into the database. So what is the point of trying to store and use monster regexes this way? Is that really necessary? There is so much redundant stuff in that big regex, if you really need the match to extend over the entire data file (which I doubt), it would make more sense to construct most of the regex on the fly in the perl script, rather than storing it all in a table (numerous times, presumably, with minor, systematic variations). Maybe a different design would actually be better.

      As for simplifying the code, you still have farther to go on that. Preparing the DBI statement handles in advance and using placeholders would be a good start. Consider an idiom like this, especially for the queries that you are using over and over again:

      my @map_cols = qw( a.mapping a.path_prefix a.parent_mapping a.parent_element a.rule a.data_nature a.table_suffix b.element b.column_name b.priority_local b.priority_global ); my $map_sql = 'SELECT ' . join( ',', @map_cols ) . 'FROM cor_ekl_map a, cor_ekl_map_dfn b WHERE a.mapping = ? and a.mapping = b.mapping'; my $map_sth = $dbh->prepare( $map_sql ); my @proc_cols = qw(a.process_id a.process_ts a.process_stage b.ekl_set b.mapping); my $proc_sql = 'SELECT ' . join( ',', @proc_cols ) . 'FROM sys_ekl_ipt_001 a, cor_ekl_set_dfn b WHERE a.ekl_set = b.ekl_set'; my $proc_sth = $dbh->prepare( $proc_sql ); s/^[ab]\.// for ( @map_cols, @proc_cols ); # don't need table prefixe +s now my $regex_sth = $dbh->prepare( 'SELECT regex FROM cor_ekl_rul WHERE ru +le = ?' ); my $cdata_sth = $dbh->prepare( 'SELECT column_name column_type FROM cor_dat_col WHERE data_nature = ? AND table_suffix = ?' ); $proc_sth->execute; while ( my $proc_row = $proc_sth->fetchrow_arrayref ) { my %procdata; @procdata{@proc_cols} = @$proc_row; $map_sth->execute( $procdata{"mapping"} ); while ( my $map_row = $map_sth->fetchrow_arrayref ) { my %mapdata; @mapdata{@map_cols} = @$map_row; my $table_name = join( '_', 'dat', $mapdata{data_nature}, $mapdata{table_suffix} ); $regex_sth->execute( $mapdata{rule} ); my ( $regex ) = $regex_sth->fetchrow_array; # and so on... } }
      I think that going this direction will make your code a lot shorter, simpler, and easier to maintain. Boiling down the regexes to just the stuff that matters will help too. I suspect that you don't really need to store regexes in the database as all.