#!/usr/bin/perl # itmspgen.pl # (C) 2012 Chris Lindensmith # licensed under Perl5 license (http://dev.perl.org/licenses/) # pulls book metadata from the books database and generates an itmsp package. # main value of this is in doing bulk uploads of books to the ibookstore. # the secondary value is in avoiding having to enter all the rights data country by # country for 30+ countries. Uses crude but reasonably effective rules for price conversion: # - base price is USD, finds price tier by rounding up. # - physical price is USD, and there's a hash for entering the price conversions # which is not necessarily the same as straight currency conversions. # # requires the user to put in a bunch of globals: # $imprint, $publisher, $producer (get this looking in one of your itmsp files) # plus the database login information. # %dbmap is for mapping your database columns to the itmsp tag names # # looks in the "$book_source_files" directory and below for the book file and the cover # looks for the isbn in the filename to find the content file and cover. # copies the book file and cover into a new directory (named for the $isbn.".itmsp") # computes an md5 hash of each of the two files to put in the xml file # MacOS takes care of identifying it as a package. # # the user can then open the itmsp file with iTunesProducer and all the metadata will be there # simply check that the metadata are correct, make any adjustments, and then send to apple. # if you have to make changes to the content or cover file, replace them in iTunes Producer # and resave the itmsp file from producer. # all file uploading occurs from iTunes Producer. # # An itmsp package is just a directory with 3 files in it: # the xml metadata (metadata.xml), the book content file (an epub), and the cover image # the metadata file does care about whitespace in the values. use strict; use warnings; use DBI; use Data::Dumper; use XML::LibXML; use XML::LibXML::PrettyPrint; use DateTime; use Cwd; use File::Find; use File::Copy; use File::Path; use Digest::MD5 qw(md5_hex); # debug=1 mode turns prettyprint on for xml output. # the prettyprint mode used should load correctly into iTunes Producer, but if there are problems, # set debug=0 and it will spew straight linear text. my $debug=1; # database access information for dbi my $dbuser='username'; my $dbpassword='password'; my $dbname='databasename'; # imprint and publisher (these could be moved to the db, but in my case they're not) my $imprint='Your Imprint Here'; my $publisher='Publisher Name here'; my $provider='ProviderName'; # get this from an itmsp file you made with Producer. my $book_source_files="/full/path/to/the/directory/that/contains/your/books/and/images"; my $global_isbn; #used for passing isbn to File::Find my $global_coverfname; #to return from File::Find my $global_contentname; #to return from File::Find my $dest_dir=getcwd; # mapping of itunes names (key) to database column names (value). # edit the values to match your db. my %dbmap=('isbn'=>'e_isbn13', 'title'=>'title', 'printisbn'=>'print_isbn', 'bisac_codes'=>'BISAC_subs', 'description'=>'description', 'year'=>'copyright', 'eprice'=>'amazon_price', 'author_sort_name'=>'author', 'print_price'=>'print_price' ); my $cleared_for_sale='true'; my $drmfree='true'; my %country_currency=('AU'=>'AUD', 'AT'=>'EUR', 'BE'=>'EUR', 'BG'=>'EUR', 'CA'=>'CAD', 'CY'=>'EUR', 'CZ'=>'EUR', 'DK'=>'DKK', 'EE'=>'EUR', 'FI'=>'EUR', 'FR'=>'EUR', 'DE'=>'EUR', 'GR'=>'EUR', 'HU'=>'EUR', 'IE'=>'EUR', 'IT'=>'EUR', 'LV'=>'EUR', 'LT'=>'EUR', 'LU'=>'EUR', 'MT'=>'EUR', 'NL'=>'EUR', 'NO'=>'NOK', 'PL'=>'EUR', 'PT'=>'EUR', 'RO'=>'EUR', 'SK'=>'EUR', 'SI'=>'EUR', 'ES'=>'EUR', 'SE'=>'SEK', 'CH'=>'CHF', 'GB'=>'GBP', 'US'=>'USD' ); # the conversion rate is for the price of the print version and should be # based on print pricing in the country, which may not correspond directly to # currency conversions. my %conversion_rate=('AUD'=>1.1, 'EUR'=>0.8, 'CAD'=>1.0, 'DKK'=>6.0, 'NOK'=>6.0, 'SEK'=>6.6, 'CHF'=>1.0, 'GBP'=>0.7, 'USD'=>1.0 ); # set up the database connection and slurp the rows my $dbh = DBI->connect("DBI:mysql:$dbname", $dbuser, $dbpassword ) || die "Could not connect to database: $DBI::errstr"; my $sth=$dbh->prepare("Select * from metadata where e_ISBN13>0"); $sth->execute; #iterate through the resulting rows while (my $result= $sth->fetchrow_hashref()){ #print STDERR Dumper($result); create_itmsp($result); } # this is where most of the action happens # generate the metadata.XML # find the content file and copy it to a new directory # find the cover image and copy it to a new directory # compute md5 hashes of the two asset files # write the metadata file to the new directory # rename the directory sub create_itmsp{ my $hashref=shift; my %datahash=%$hashref; my $isbn=$datahash{$dbmap{'isbn'}}; my $newdoc=XML::LibXML::Document->new('1.0', "UTF-8"); $newdoc->setStandalone(0); # set standalone "no" # set up the package my $newroot=XML::LibXML::Element->new("package"); $newroot->setAttribute("xmlns","http://apple.com/itunes/importer/publication"); $newroot->setAttribute("version","publication4.3"); #set up the provider (fixed as always us) $newdoc->setDocumentElement($newroot); $newroot->appendTextChild("provider",$provider); #create the book node my $booknode=XML::LibXML::Element->new("book"); $booknode->appendTextChild("vendor_id",$isbn); #insert the book in the package node: $newroot->addChild($booknode); # add the metadata $booknode->addChild(build_metadata($hashref)); mkdir $dest_dir."/".$isbn.".itmsp" || die "Can't make directory $isbn: $!\n"; #find the cover and the content and copy them $global_isbn=$isbn; # some books aren't available in epub, clearing the filename so we can tell $global_contentname=undef; $global_coverfname=undef; find(\&find_assets,$book_source_files); if($global_contentname && $global_coverfname){ #if we found a cover and an epub, make the nodes with the files. $booknode->addChild(build_assets($isbn)); if($debug==1){ my $pp = XML::LibXML::PrettyPrint->new(indent_string => " ", element =>{ compact => [qw/vendor_id publication_type identifier title primary name sort_name role language subject imprint publisher preorder_previews publication_date provider number_of_pages territory cleared_for_sale price_tier release_type preorder_sales_start_date sales_start_date physical_list_price drm_free file_name size checksum/] } ); $pp->pretty_print($newdoc); # modified in-place } print $newdoc->toFile($dest_dir."/".$isbn.".itmsp/metadata.xml"); } else{ # if there's no epub, delete the itmsp directory rmtree $dest_dir."/".$isbn.".itmsp"; } } sub build_assets{ my $isbn=shift; my $assets_node=XML::LibXML::Element->new("assets"); #build the artwork node $assets_node->addChild(add_asset($isbn,"artwork",$global_coverfname)); $assets_node->addChild(add_asset($isbn,"full",$global_contentname)); return $assets_node; } sub add_asset{ my $isbn=shift; my $type=shift; my $fname=shift; my $asset_node=XML::LibXML::Element->new("asset"); $asset_node->setAttribute("type",$type); my $file_node=XML::LibXML::Element->new("data_file"); $asset_node->addChild($file_node); $file_node->appendTextChild("file_name",$fname); my $size= -s "$dest_dir/$isbn.itmsp/$fname"; $file_node->appendTextChild("size",$size); $file_node->addChild(add_checksum("$dest_dir/$isbn.itmsp/$fname")); return $asset_node; } sub add_checksum{ my $file = shift; my $checksum_node=XML::LibXML::Element->new("checksum"); $checksum_node->setAttribute("type","md5"); open(FILE, $file) or die "Can't open '$file': $!"; binmode(FILE); $checksum_node->appendText(Digest::MD5->new->addfile(*FILE)->hexdigest); return $checksum_node; } sub find_assets{ if ($_ =~ /($global_isbn.*?\.(jpg|png|gif|jpeg))\b/i){ $global_coverfname=$1; $global_coverfname=~s/\s+/_/g; #itunes doesn't like spaces in filenames copy($File::Find::name,$dest_dir."/".$global_isbn.".itmsp/$global_coverfname"); } if ($_ =~ /($global_isbn.*?\.(epub))\b/i){ $global_contentname=$1; $global_contentname=~s/\s+/_/g; copy($File::Find::name,$dest_dir."/".$global_isbn.".itmsp/$global_contentname"); } } sub build_metadata{ my $hashref=shift; my %metadata=%$hashref; my $isbn=$metadata{$dbmap{'isbn'}}; my $title=$metadata{$dbmap{'title'}}; my $metadata_node=XML::LibXML::Element->new("metadata"); $metadata_node->appendTextChild("publication_type", "book"); $metadata_node->addChild(build_identifiers("isbn13",$isbn)); #is there a print version? if(my $print_isbn=$metadata{$dbmap{'printisbn'}}){ $metadata_node->addChild(add_related($print_isbn)); } $metadata_node->appendTextChild("title",$title); #add the author (if any) if(my $author=$metadata{$dbmap{'author_sort_name'}}){ $metadata_node->addChild(add_contributors($author)); } #add languages: $metadata_node->addChild(add_languages()); #skip page count #add bisac codes. unfortunately they're all smushed together if (my $bisac_codes=$metadata{$dbmap{'bisac_codes'}}){ $metadata_node->addChild(add_subjects($bisac_codes)); } #add the description $metadata_node->addChild(add_description($isbn)); #add the publisher information: $metadata_node->appendTextChild("imprint",$imprint); $metadata_node->appendTextChild("publisher", $publisher); $metadata_node->appendTextChild("preorder_previews","true"); $metadata_node->appendTextChild("publication_date", text_year($metadata{$dbmap{'year'}})); #build the territories and prices list $metadata_node->addChild(add_products($cleared_for_sale, $metadata{$dbmap{'eprice'}}, $metadata{$dbmap{'print_price'}}, $drmfree ) ); return $metadata_node; } #generate the products node sub add_products{ my $cleared_for_sale=shift; my $eprice=shift; my $print_price=shift; my $drm_free=shift; my $dt = DateTime->today(time_zone=>'local'); my $todaystring=$dt->ymd('-'); my $products_node=XML::LibXML::Element->new("products"); foreach my $key (keys %country_currency){ my $product_node=XML::LibXML::Element->new("product"); $products_node->addChild($product_node); $product_node->appendTextChild("territory",$key); $product_node->appendTextChild("cleared_for_sale",$cleared_for_sale); $product_node->appendTextChild("price_tier",get_price_tier($key,$eprice)); $product_node->appendTextChild("release_type","digital-only"); $product_node->appendTextChild("sales_start_date",$todaystring); #preorder_sales and sales_start would go here if ($print_price){ my $phys_price_node=XML::LibXML::Element->new("physical_list_price"); $phys_price_node->setAttribute("currency", $country_currency{$key}); $phys_price_node->appendText($conversion_rate{$country_currency{$key}}*$print_price); $product_node->addChild($phys_price_node); } $product_node->appendTextChild("drm_free",$drm_free); } return $products_node; } #gets the price tier for USD by rounding. #get the price tier for other currencies by adding 2 to the US price tier #based on approximate exchange rates 10/18/12 sub get_price_tier{ my $key=shift; my $eprice=shift; my $tier=int($eprice+0.5); # the USD tier if ($key ne 'US'){ $tier=$tier+2;} return $tier; } #generate the year node in appropriate format sub text_year{ my $year=shift; my $release_date="$year-01-01"; } # pull the description from the existing itunes data because it will meet the length lim. sub add_description{ my $isbn=shift; my $sth=$dbh->prepare("select description from itunes_data where isbn13=?"); $sth->execute($isbn); my @desc=$sth->fetchrow_array(); my $desc_node=XML::LibXML::Element->new("description"); $desc_node->setAttribute("format","html"); $desc_node->appendText($desc[0]); return $desc_node; } # my BISAC codes are all combined in one cell, along with their text description # they need to be split, and just the codes pulled out. sub add_subjects{ my $bisac_codes=shift; my @codelist=split /;/,$bisac_codes; my $subjects=XML::LibXML::Element->new("subjects"); my $count=0; foreach (@codelist){ my $subject=XML::LibXML::Element->new("subject"); $subject->setAttribute("scheme","bisac"); if ($count++==0){ $subject->setAttribute("primary","true");} if (/(\w{3}\d{6})/i){$subject->appendText($1);} $subjects->addChild($subject); } return $subjects; } sub add_languages{ my $languages=XML::LibXML::Element->new("languages"); my $language=XML::LibXML::Element->new("language"); $language->setAttribute("type","main"); $language->appendText("eng"); $languages->addChild($language); return $languages; } #only handles single contributor right now #need to make it sort out multiples. sub add_contributors{ my $author_sort_name=shift; my $author_name=''; if ($author_sort_name=~/Ph\.D\./){ $author_sort_name=~ /(\w+),\s*(.+)\s+Ph\.D\./; $author_name="$2 $1, Ph.D."; } else { $author_sort_name=~ /(\w+),\s(.+)/; $author_name="$2 $1"; } if ($3){$author_name="$author_name, Ph.D.";} my $contribs_node=XML::LibXML::Element->new("contributors"); my $contrib_node=XML::LibXML::Element->new("contributor"); $contribs_node->addChild($contrib_node); $contrib_node->appendTextChild("primary","true"); $contrib_node->appendTextChild("name",$author_name); $contrib_node->appendTextChild("sort_name",$author_sort_name); my $roles_node=XML::LibXML::Element->new("roles"); $roles_node->appendTextChild("role","author"); $contrib_node->addChild($roles_node); return $contribs_node; } # if there's a print isbn, put together the related node sub add_related{ my $print_isbn=shift; my $related_node=XML::LibXML::Element->new("related_content"); my $related_item=XML::LibXML::Element->new("related_item"); $related_item->setAttribute("type","print-equivalent"); my $identifier=XML::LibXML::Element->new("identifier"); $identifier->setAttribute("scheme","isbn13"); $identifier->appendText($print_isbn); #build the node: $related_item->addChild($identifier); $related_node->addChild($related_item); return $related_node; } sub build_identifiers{ my $idscheme=$_[0]; my $idvalue=$_[1]; my $identifiers=XML::LibXML::Element->new("identifiers"); my $identifier=XML::LibXML::Element->new("identifier"); $identifier->setAttribute("scheme",$idscheme); $identifier->appendText($idvalue); $identifiers->addChild($identifier); return $identifiers; }