#!/usr/bin/perl use strict; use warnings; use DBI; use DateTime; use Data::Dumper; my $dbh = DBI->connect("dbi:SQLite:dbname=checksum_db.sqlite","",""); open my $script,">duplicate_erase_script.sh"; sub get_unique_checksums { my $sql="SELECT checksum as groupsize FROM checksums GROUP BY size HAVING groupsize > 1;"; #because groups of size 1 cannot have duplicates my $sth=$dbh->prepare($sql); $sth->execute; my $results=$sth->fetchall_arrayref; return map { $_->[0] } @{$results}; }; sub checksum2names { my ($checksum)=@_; my $sql=sprintf "SELECT name FROM checksums WHERE checksum=\"%s\";",$checksum; my $sth=$dbh->prepare($sql); $sth->execute; my $results=$sth->fetchall_arrayref; return map { $_->[0] } @{$results}; }; for my $checksum (get_unique_checksums()) { my @same_checksum=checksum2names($checksum); my $leader = shift @same_checksum;#take aside on element of the group making it the leader print $script "# duplicates of $leader follow:\n"; for my $name (@same_checksum) {#get all the others and write commands to delete them print $script "# rm $name\n"; } }; close $script;