in reply to Re: stitch together text file recovered by photorec/testdisk?
in thread stitch together text file recovered by photorec/testdisk?
This narrows the list to 68.6 MB by elminating subsequent duplicates
#!/usr/bin/perl -- use strict; use warnings; use Path::Tiny qw/ path /; use File::Find::Rule qw/ find /; use autodie; use Digest::MD5 qw( md5_hex ); my $qphotoreclog = 'qphotorec.log'; $qphotoreclog = path( $qphotoreclog )->realpath; chdir path( $qphotoreclog )->parent; my $log = path( $qphotoreclog )->slurp_raw; my @files; my %seen; while( $log =~ m{^(.*?)[\r\n]*$}mg ){ my $line = $1; next if not $line =~ /recup_dir/; my( $filename, $blocks ) = split ' ', $line, 2; my $md5 = md5_hex( path( $filename )->slurp_raw ) ; push @{$seen{$md5}}, $filename; push @files, [ $filename, $blocks , $md5 , int@{$seen{$md5}} ]; } undef $log; # dd(\@files ); use constant FILENAME => 0; use constant SEEN => 3; print "Files before ", int @files, "\n"; @files = map { $_->[FILENAME] } grep { $_->[SEEN()] == 1 } @files; print "Files after ", int @files, "\n"; # dd(\@files ); path('myfinalrecup')->mkpath; for my $filename ( @files ){ path( $filename )->copy( 'myfinalrecup/' ); } __END__ Files before 40330 Files after 6341
Cant really see a relationship between the blocks and the filename , probably there isnt one
[ "recup_dir.3/f8580464.txt", "70013087-70013102", "2615e08f437222995c7aab0569f015f3", 1, [ "C:/undelet/testdisk-7.0.win/recup_dir.3/f8580480.txt", "70013103-70013110", "6fb0dd36db299c9b713d5c622bf5b499", 1, ], ... [ "recup_dir.3/f8583480.txt", "70016103-70016118", "2615e08f437222995c7aab0569f015f3", 2, ],
|
|---|