#!/usr/bin/perl -w
use strict;
use Carp;
=head1 picthresher
C<picthresher.pl>
=head2 Info
Author: Michael Libby
Contact: michael@andsoforth.com
Copyright: 2001, And So Forth Internet Services
This is Free software under the GPL, see http://www.gnu.org/copyleft/
+for info.
=head2 Summary
This program takes jpg files from a specified directory and
assists the user in cataloging and storing the images using a SQL
database to keep everything tidy. The program assists in preventing
duplicates by assigning unique, but derived IDs to each image based on
an MD5 hash (it is possible that these will not be unique, but in
practice, a duplicate is unlikely).
The user can classify the image, designate it for archiving, or ban
the image. The program is built around the notion of harvesting images
from a variety of sources and needing to specify which ones should
stay "live" (i.e. on the hard-drive) and which ones should be set up
for writing to CD or other archive media. The program will
automatically delete duplicate and banned images (don't worry, it will
let you watch while it does this).
The program will also create the directories to correspond to the sort
categories if needed.
=head2 Preparation
You will need an RDBMS with a table for storing picture info. The
implementation here works with postgreSQL, but I imagine that any DB
for which there is a DBD::* module would work. The table needs to have
the following fields (you're on your own for creating this):
hex_id char[32] * stores the unique key. recommend building
an index on this field
height int * the height of the image
width int * the width of the image
status varchar * if you want to use a more efficient char[x]
type, make sure the code strips trailing spaces
times_seen int * not yet used, intended to store number of times
this image has been run through the thresher
location varchar * the path to the file
names varchar * all names this file has had when found by
this program. not implemented.
You will need to customize a few global variables to work with your
particular installation.
=cut
;
############################################################
#
# External Modules
use Tk;
use Tk::JPEG;
use DBI;
use DBD::Pg;
use Digest::MD5;
use File::Find;
use File::Copy;
use File::Basename;
############################################################
#
# Prepare Globals
my $ARCH_DIR = '/home/user/images';
my $SRC_DIR = "$ARCH_DIR/thresh_pile";
my $PREP_DIR = "$ARCH_DIR/cd_prep";
my $TOP_DIR = "$ARCH_DIR/favorites";
my @CATEGORIES = ( '01_category_one',
'02_category_two',
'03_category_three',
'04_category_four'
);
my %DB_VAR = ( dbname => 'database_name',
host => 'machine.domain',
user => 'username',
pass => 'password',
table => 'table_name',
key => 'hex_id', #must match this column in DB
);
my $DB_HANDLE = '';#_open_DB_connection();
my %IMAGE;
############################################################
#
# Prepare the main window
my $MW = MainWindow->new;
$MW->title( "Pic Thresher" );
_maximize_MW( $MW );
my $Menubar = $MW->Menu();
$MW->configure( -menu => $Menubar );
_fill_menubar( $Menubar );
my $Filename = get_next_jpg();
my $Filename_LINE = $MW->
Label( -textvariable => \$Filename )->
pack( -side => 'top', -anchor => 'n', -fill => 'x' );
my $Message = 'Initializing';
my $Message_Line = $MW->
Label( -textvariable => \$Message )->
pack( -side => 'top', -anchor => 'n', -fill => 'x' );
my $Image = $MW->
Label()->
pack( -side => 'top',
-anchor => 'center',
-fill => 'both',
-expand => 1,
);
my $Photo = $Image->
Photo( '-format' => 'jpeg',
-file => $Filename );
my $Sized_Photo;
resize_image();
MainLoop();
############################################################
#
# Tk-related initialization routines
sub _maximize_MW {
my $mw = shift;
my $max_width = $mw->screenwidth()-10;
my $max_height = $mw->screenheight()-55;
my $geostring = join '', $max_width, 'x', $max_height, '+0+0';
$mw->geometry($geostring);
}
sub _fill_menubar {
my $menubar = shift;
my $file_menu = $menubar->cascade( -label => '~Thresher', -tearoff
+ => 0 );
$file_menu->command( -label => 'Create Archive Dirs', -command =>
+\&create_archives );
$file_menu->command( -label => 'Quit', -command => \&quit );
my $archive_menu = $menubar->cascade( -label => '~Archive', -tearo
+ff => 0 );
my $highlight_menu = $menubar->cascade( -label => '~Favorites', -t
+earoff =>0 );
foreach my $category ( @CATEGORIES ) {
$archive_menu->command( -label => $category, -command => [\&ar
+chive_pic, "$category"] );
$highlight_menu->command( -label => $category, -command => [\&
+top_pic, "$category"] );
}
my $ban_menu = $menubar->cascade( -label => '~Ban', -tearoff => 0
+);
$ban_menu->command( -label => 'Ban Image', -command => \&ban_pic )
+;
}
############################################################
#
# Program flow controls
sub _open_DB_connection {
my $dbh = DBI->
connect( "DBI:Pg:dbname=$DB_VAR{'dbname'};" .
"host=$DB_VAR{'host'};",
$DB_VAR{'user'}, $DB_VAR{'pass'},
{ RaiseError => 1, AutoCommit => 1}
) or
confess( "Unable to connect to DB: $!\n" );
return $dbh;
}
sub quit {
my $msg = shift || 'Done threshing.';
print "$msg\n";
$DB_HANDLE->disconnect() or
confess( "Error disconnecting DB: $!\n" );
exit;
}
sub ban_pic {
populate_image_data();
store_image_data();
change_image_status( 'banned' );
delete_file( $Filename );
display_next_image();
}
sub top_pic {
my $category = shift;
archive_pic( $category, 'favorites' );
}
sub archive_pic {
my $category = shift;
my $status = shift || undef;
my $newpath = store_pic( $category, $PREP_DIR );
if( $status && $status eq 'favorites') {
store_pic( $category, $TOP_DIR );
}
populate_image_data( $newpath );
store_image_data();
change_image_status( $status ) if $status;
delete_file( $Filename );
display_next_image();
}
sub store_pic {
my $category = shift;
my $store_dir = shift;
my $filename = basename( $Filename );
my $newpath = "$store_dir/$category/$filename";
$newpath = check_filename( $newpath );
copy( $Filename, $newpath ) or
confess( "cannot copy $Filename" );
return $newpath;
}
############################################################
#
# File Handlers
sub check_filename {
my $newpath = shift;
$newpath =~ s/'//g;
while( stat $newpath ) {
my( $base, $dir, $ext ) = fileparse( $newpath, '\..*?' );
if( $base =~ m/^\d$/){
$base = "thresher_$base";
$newpath = "$dir/$base.$ext";
}
elsif ( $base =~ m/\w.+\d$/ ) {
$base =~ m/(.+)(\d+)$/;
my( $basetext, $basenum ) = ( $1, $2 );
$basenum = $basenum + 1;
$newpath = "$dir/$basetext$basenum.$ext";
}
else {
$base = $base . "_001";
$newpath = "$dir/$base.$ext";
}
}
return $newpath;
}
sub get_next_jpg {
opendir( JPG, "$SRC_DIR" ) or
confess( "Unable to open $SRC_DIR: $!\n" );
my @filenames = readdir( JPG );
closedir( JPG ) or
confess( "Unable to close $SRC_DIR: $!\n" );
foreach my $filename (@filenames) {
if( $filename =~ m;jpe*g$;i ) {
#Exit as soon as a valid file is found
return "$SRC_DIR/$filename";
}
}
#else no JPG files in $SRC_DIR
quit( "Out of images to thresh" );
}
sub create_archives {
#if these directories exist, this should not overwrite them
mkdir( $ARCH_DIR );
mkdir( $PREP_DIR );
mkdir( $TOP_DIR );
foreach my $new_dir ( @CATEGORIES ) {
mkdir( "$PREP_DIR/$new_dir" );
mkdir( "$TOP_DIR/$new_dir" );
}
}
sub delete_file {
#this routine needs to interact with the database someday
my $filename = shift;
unlink( $filename ) or
confess( "Cannot unlink $filename: $!\n" );
$Message = "Deleted $filename";
$MW->update();
}
sub create_hex_id {
my $filename = shift;
my $md5 = Digest::MD5->new;
open( IMAGE, "$filename" ) or
confess( "Unable to open $filename: $!\n" );
$md5->add( <IMAGE> );
close( IMAGE ) or
confess( "Unable to close $filename: $!\n" );
my $hex = $md5->hexdigest;
return $hex;
}
############################################################
#
# Image Handlers
sub clear_IMAGE_data {
my %temp_hash = ( 'hex_id' => '',
'height' => '',
'width' => '',
'status' => '',
'times_seen' => '',
'location' => '',
'names' => ''
);
return %temp_hash;
}
sub display_next_image {
$Filename = get_next_jpg;
$Photo->blank;
$Photo->configure( -file => $Filename );
$Photo->read( $Filename );
resize_image();
#$Message = 'Image Loaded';
verify_image();
}
sub verify_image {
my $hex_id = create_hex_id( $Filename );
if( get_DB_entry( $hex_id ) ) {
$Message = 'Deleting duplicate image';
delete_file( $Filename );
display_next_image();
}
}
sub populate_image_data {
my $location = shift || $Filename;
# the ' will mess up the SQL otherwise
# and should only be here if the file is being banned
# as it should have been eliminated earlier for stored files
$location =~ s/'//g;
my $hex_id = create_hex_id( $Filename );
%IMAGE = ( 'hex_id' => $hex_id,
'height' => $Photo->height(),
'width' => $Photo->width(),
'status' => 'archived',
'times_seen' => '1',
'location' => $location,
'names' => basename( $location )
);
}
sub resize_image {
my ($img_w, $img_h) = ($Photo->width, $Photo->height);
my $max_width = $MW->screenwidth();
my $max_height = $MW->screenheight() - 20;
my $xfactor = $img_w / $max_width;
my $yfactor = $img_h / $max_height;
my $intfactor = $xfactor > $yfactor ? int($xfactor) : int($yfactor
+);
$intfactor += 1;
unless( $Sized_Photo) {
$Sized_Photo = $Image->
Photo( '-format' => 'jpeg',
-file => $Filename );
}
$Sized_Photo->blank;
$Sized_Photo->copy( $Photo, -subsample => $intfactor, -shrink);
$Message = ( $Sized_Photo->width == $Photo->width ) ?
'image loaded: full-size' : "image loaded: reduced by $intfact
+or";
$Image->configure( -image => $Sized_Photo );
}
############################################################
#
# Database Handlers
sub exec_SQL {
my $sql = shift;
#print "Trying:\n$sql\n\n";
my $sth = $DB_HANDLE->prepare( $sql );
my $rv = $sth->execute() or
confess( "Cannot execute SQL : $!\n" );
return $sth;
}
sub get_DB_entry {
my $key_id = shift or
confess( "Missing parameter: $!\n" );
my $sql = "SELECT * FROM $DB_VAR{'table'} WHERE $DB_VAR{'key'} = '
+$key_id';";
my $sth = exec_SQL( $sql );
my @record = $sth->fetchrow_array;
return @record if $record[0];
return 0;
}
sub store_image_data {
my $sql = "INSERT INTO $DB_VAR{'table'} values ( " .
"'$IMAGE{'hex_id'}', " .
"$IMAGE{'height'}, " .
"$IMAGE{'width'}, " .
"'$IMAGE{'status'}', " .
"$IMAGE{'times_seen'}, " .
"'$IMAGE{'location'}', " .
"'$IMAGE{'names'}' );";
my $sth = exec_SQL( $sql );
}
sub change_image_status {
my $status = shift;
my $sql = "UPDATE $DB_VAR{'table'} SET status = '$status' " .
"WHERE hex_id = '$IMAGE{'hex_id'}';";
my $sth = exec_SQL( $sql );
}
|