#!/usr/bin/perl use Algorithm::Bucketizer; use Number::Format qw(:subs); @patterns = ( qr/(.*)\(*dis[ck]\s\d+\)*$/i, qr/(.*)cd\s*\d+$/, qr/^([^-]+\s\-\s).*$/, ); $mediasize = 4.4 * 1000 * 1024 * 1024; print "Buckets are ", format_bytes($mediasize), " each.\n"; # Create a bucketizer my $b = Algorithm::Bucketizer->new( bucketsize => $mediasize, algorithm => "retry" ); while () { chomp; chomp; ( $name, $asize, $path ) = split(/\t/); print $path,"\n"; if($path eq "") { $skipped_size += $asize; } else { $sizes{$path} = $asize; } } print "\n"; $groupsize=0; $whole_line=""; foreach $line (sort keys %sizes) { $group = 0; $mysize = $sizes{$line}; foreach $re (@patterns) { ($result) = ($line =~ m/$re/); if( ($result ne "") && ($result eq $prevresults{$re})) { $group=1; } $prevresults{$re} = $result; } # override the pattern matching, if we're about to create an unfileable lump if( ($groupsize + $mysize) > $mediasize) { $group = 0; } if( (!$group) && ($groupsize>0)) { print "COMMITING $groupcount ($groupsize): $whole_line\n"; $b->add_item($whole_line,$groupsize) || die($!); $whole_line = ""; $groupsize = 0; $groupcount = 0; } $whole_line .= $line."\t"; $groupcount++; $groupsize += $mysize; } print "COMMITING $groupcount ($groupsize): $whole_line\n"; $b->add_item($whole_line,$groupsize) || die($!); $b->optimize( algorithm => "random", maxtime => 60 ); for my $bucket ( $b->buckets() ) { print "\n\n# Bucket ",$bucket->serial(), " has ", format_bytes($bucket->level()), " in it.\n"; for my $item ( $bucket->items() ) { $item =~ s/\t$//; print " ",join("\n ",split(/\t/,$item)),"\n"; } print "\n"; } print "\n\nWe skipped ",format_bytes($skipped_size)," of Misc.\n\n"; __DATA__ The Creatures - A bestiary of 85907653 /var/music/WholeAlbums2/The Creatures - A bestiary of The Cure - The Head On The Door 54978322 /var/music/WholeAlbums2/The Cure - The Head On The Door The Disposable Heroes Of Hiphoprisy - Hypocracy Is The Greatest Luxury 99314506 /var/music/WholeAlbums2/The Disposable Heroes Of Hiphoprisy - Hypocracy Is The Greatest Luxury The Hives - Your New Favourite Band 33943504 /var/music/WholeAlbums2/The Hives - Your New Favourite Band The Jimi Hendrix Experience - Electric Ladyland 110201733 /var/music/WholeAlbums2/The Jimi Hendrix Experience - Electric Ladyland Prince - Emancipation - Disc 1 78945501 /var/music/WholeAlbums2/Prince - Emancipation - Disc 1 Prince - Emancipation - Disc 2 80622538 /var/music/WholeAlbums2/Prince - Emancipation - Disc 2 Prince - Emancipation - Disc 3 77673429 /var/music/WholeAlbums2/Prince - Emancipation - Disc 3 Talking Heads - Stop Making Sense 1992195 /var/music/WholeAlbums2/Talking Heads - Stop Making Sense Talking Heads - The Name Of This Band Is Talking Heads 125134992 /var/music/WholeAlbums2/Talking Heads - The Name Of This Band Is Talking Heads Tracy Chapman - Matters of the Heart 63415367 /var/music/WholeAlbums2/Tracy Chapman - Matters of the Heart Turin Brakes - the Optimist LP 52981417 /var/music/WholeAlbums2/Turin Brakes - the Optimist LP The Vines - Highly Evolved 127977590 /var/music/WholeAlbums2/The Vines - Highly Evolved The Wannadies - Bagsy Me 47384936 /var/music/WholeAlbums2/The Wannadies - Bagsy Me