Corrupted storable string (binary v2.7) at ../../lib/Storable.pm (autosplit into ../../lib/auto/Storable/thaw.al) line 415, at /usr/share/perl5/IPC/Shareable.pm line 545 #### Munged shared memory segment (size exceeded?) at ./dup_finder2.pl line 45 Munged shared memory segment (size exceeded?) at ./dup_finder2.pl line 45 Object #3223600 should have been retrieved already at ../../lib/Storable.pm (autosplit into ../../lib/auto/Storable/thaw.al) line 415, at /usr/share/perl5/IPC/Shareable.pm line 545 Object #3223600 should have been retrieved already at ../../lib/Storable.pm (autosplit into ../../lib/auto/Storable/thaw.al) line 415, at /usr/share/perl5/IPC/Shareable.pm line 545 Corrupted storable string (binary v2.7) at ../../lib/Storable.pm (autosplit into ../../lib/auto/Storable/thaw.al) line 415, at /usr/share/perl5/IPC/Shareable.pm line 545 Corrupted storable string (binary v2.7) at ../../lib/Storable.pm (autosplit into ../../lib/auto/Storable/thaw.al) line 415, at /usr/share/perl5/IPC/Shareable.pm line 545 #### #!/usr/bin/perl use warnings; use strict; use Text::LevenshteinXS qw(distance); use Benchmark qw(timediff timestr); use IPC::Shareable qw(:all); use Parallel::ForkManager; my $t0 = Benchmark->new(); my %contacts; my $minimum_score = 80; my $max_childs = 5; my $glue = 1978; my $db = tie %contacts, 'IPC::Shareable', $glue, { create => 1, exclusive => 0, destroy => 1 } or die "cannot tie contacts\n"; # :WARNING:21-10-2010:arfreitas: child process must receive this $SIG{INT} = sub { die "$$ dying\n" }; # :WARNING:21-10-2010:arfreitas: must get registries AFTER creating the shared memory tie get_db_data(); my @control = keys(%contacts); print 'Starting processing data with maximum of ', $max_childs, ' childs', "\n"; my $manager = Parallel::ForkManager->new($max_childs); $manager->set_max_procs($max_childs); my $max_tries = 3; foreach my $id (@control) { $manager->start() and next; my $tries = 0; if ( exists( $contacts{$id} ) ) { while (1) { if ( $db->shlock(LOCK_EX) ) { print "child $$ got a lock\n"; # :BUG:21-10-2010:arfreitas: race condition when deleting keys of the tied hash my $testing = delete( $contacts{$id} ); my %cache = %contacts; unless ( $db->shunlock() ) { print "child $$ could not release the lock\n"; } else { print "child $$ released the lock\n"; } validate_contact( $id, $testing, \%cache ); last; } else { print "child $$ can't lock\n"; sleep 1; $tries++; last if ( $tries >= $max_tries ); } } } else { print "child $$: somebody deleted key $id\n"; } $manager->finish(); } $manager->wait_all_children(); my $t1 = Benchmark->new(); my $td = timediff( $t1, $t0 ); print "\nThe code took: ", timestr($td), "\n"; sub validate_contact { my $id = shift; my $testing = shift; my $cache_ref = shift; my $file = 'tmp/processing-' . $id . '.log'; my $source_name = $testing; my $counter = 0; foreach my $contact ( keys( %{$cache_ref} ) ) { my $dest_name = $cache_ref->{$contact}; my $dest_len = length($dest_name); $dest_len = 1 unless ( $dest_len > 0 ); my $distance = distance( $source_name, $dest_name ); my $score = 100 - ( ( $distance * 100 ) / $dest_len ); #how much equal is the current row with the original one if ( $score >= $minimum_score ) { print "$id looks like $contact\n"; } $counter++; } print "child $$ compared id $id with $counter contacts\n"; } sub get_db_data { my $file = 'contacts.txt'; open( my $in, '<:utf8', $file ) or die "cannot read $file: $!\n"; while (<$in>) { chomp; my @temp = split( /\|/, $_ ); my $id = splice( @temp, 1, 1 ); $contacts{$id} = $temp[0]; } close($in); print "OK\n"; return; }