#!/usr/bin/perl use strict; use vars qw($VERSION @IN %_IN @FILES $ABS_CACHE $BASE @TAG_NAMES $DATA); use LEOCHARRE::CLI2 ':all' ,'qsrt:a:Sz:x'; use LEOCHARRE::Dir ':all'; use Cache::File; use MP3::Tag; #use Smart::Comments '###'; $VERSION = sprintf "%d.%02d", q$Revision: 1.3 $ =~ /(\d+)/g; @TAG_NAMES = qw/artist album track title/; # the tags we care about sub say { $opt_q and return 1; warn "@_\n"; 1 } BEGIN { $BASE = "$ENV{HOME}/.id3normalizer"; -d $BASE or mkdir $BASE or die("Cant make '$BASE', $!"); $ABS_CACHE = "$BASE/cache"; $_IN{"$ENV{HOME}/music"}++; ### base : $BASE ### cache : $ABS_CACHE } my $cache = Cache::File->new( cache_root => $ABS_CACHE ); debug( "Cache initialized"); # @IN is where to look for files if ( argv_dirs_count() ){ my @dirs = argv_dirs(); debug("adding @dirs"); $_IN{$_}++ for @dirs; } @IN = keys %_IN; debug("in: @IN"); debug("getting list of mp3s.."); @FILES = abs_mp3s(); my $count = scalar @FILES; say("Got $count mp3 files.\n"); $opt_r and DATA_reload() and exit; # load DATA the main data by tags/files DATA_load(); stats(); find_by_tag_value(); show_file_info(); show_all_unique_tag(); # like all artists, or whatever run_similarity_report(); suggest_for_missing_tags(); die("No args\n"); # by using simplest cache.. #for my $tag_name (@TAG_NAMES){ # my $data = _files_by_tag($tag_name); # $DATA{$tag_name} = $data; #} sub show_file_info { argv_files_count() or return; my @files = grep { /mp3$/i } argv_files(); debug(); for my $abs (@files){ _show_tags_for_file($abs); } exit; } sub show_all_unique_tag { $opt_t or return; $opt_a and return; # not if we have a att val my @uniques = _unique_tag_values($opt_t); my $count = scalar @uniques; print "$_\n" for @uniques; printf "\ntotal: %s\n", $count; exit; } #, ok.. how many of each? sub stats { $opt_s or return; print "cache: $ABS_CACHE\n"; print "path: $_\n" for @IN; printf "total mp3s: %s\n\n", (scalar @FILES); for my $tag_name (keys %$DATA){ my @uniques = _unique_tag_values( $tag_name ); my $count = scalar @uniques; my $unknowns = _files_by_tag_value($tag_name, 'unknown'); my $ucount = scalar @$unknowns; printf "Unique %s: %s\nFiles missing tag value: %s\n\n", $tag_name, $count, $ucount; } exit; } sub _show_tags_for_file { my $abs = shift;# must be abs path my $info = _id3_info($abs) or next; print "$abs\n"; for my $tag (keys %$info){ printf "%s: '%s'\n", $tag, $info->{$tag}; } print "\n"; } sub suggest_for_missing_tags { # as sh commands $opt_x or return; require String::Prettify; TAG: for my $tag_name (keys %$DATA){ # skip track and title tags.. ($tag_name eq 'title') or ($tag_name eq 'track') and next TAG; my @unknowns = _files_by_tag_value($tag_name, 'unknown'); my $ucount = scalar @unknowns; $ucount or next; printf "# Tag: %s\n# files missing tag: %s\n# suggestions:\n", $tag_name, $ucount; for my $abs (@unknowns){ my $suggestion=''; if ($tag_name eq 'artist'){ # are there files in here with artist tag already? # if inside abs music, waht's the next dir.. # such as ~/music/element/.. # what does element hold... ?? # and prettify that. BASEDIR: for my $basedir (@IN){ $abs=~/^$basedir/ or next BASEDIR; # can we get a subdir... $abs=~/^$basedir\/([^\/]+)\// or warn("cant get subdir for $abs") and next BASEDIR; my $subdir = $1; $suggestion = String::Prettify::prettify($subdir); } } # end if artist if ($tag_name eq 'album'){ BASEDIR: for my $basedir (@IN){ $abs=~/^$basedir/ or next BASEDIR; # can we get a subdir... $abs=~/^$basedir\/[^\/]+\/([^\/]+)\// or warn("cant get subdir for $abs") and next BASEDIR; my $subdir = $1; # sometimes the album subdirs are named like.. # Artist Name - album name # if so... if( $subdir=~m/[a-zA-Z0-9 ]+[\-](.+)/ ){ $subdir = $1; } $suggestion = String::Prettify::prettify($subdir); } } #end if album printf qq{id3tag --%s="%s" "%s"\n}, $tag_name, $suggestion, $abs; } print "\n\n"; } exit; } sub find_by_tag_value { $opt_t and $opt_a or return; debug("Looking for files tag '$opt_t' value '$opt_a'\n"); my @files = _files_by_tag_value($opt_t, $opt_a); my $count = scalar @files; debug("Got $count"); print "$_\n" for @files; print "\n"; exit; } sub run_similarity_report { $opt_S or return; $opt_z ||= '0.7'; my @report; require String::Similarity::Group; require YAML; TAG: for my $tag_name (qw/artist album/){ #should do title also?? not as important ? debug("doing $tag_name"); my @uniques = _unique_tag_values($tag_name); my $count = scalar @uniques; debug("unique count: $count, grouping.."); my @groups = String::Similarity::Group::groups( $opt_z, \@uniques ); my $gcount = scalar @groups; debug("count of groups: $gcount"); # ok, we need to have the list of files.. GROUP: for my $group ( @groups ){ my $groupdata = { tagname => $tag_name, tagvalues => $group, }; VALUE: for my $similar_tag_value ( @$group ){ my @files = _files_by_tag_value( $tag_name, $similar_tag_value ); my $count = scalar @files; $groupdata->{tagvalue}->{$similar_tag_value}->{files} = \@files; $groupdata->{tagvalue}->{$similar_tag_value}->{count} = scalar @files; } # which val has highest count ? my @order = sort { $groupdata->{tagvalue}->{$b}->{count} <=> $groupdata->{tagvalue}->{$a}->{count} } @$group; $groupdata->{tagvalues_by_order_of_occurrence} = \@order; $groupdata->{tagvalue_with_highest_occurrence} = $order[0]; push @report, $groupdata; } } my $out = YAML::Dump(@report); print $out; exit; } # unused.. # do via: _files_by_tag_value( $tag_name, 'unknown' ); exit; # get unique values of a tag.. # for example 'artist', what are the unique artist names found? etc sub _unique_tag_values { my $tag_name = shift; $tag_name or die; # if artist, for example, how many? my @all =( sort keys %{$DATA->{$tag_name}} ); wantarray ? (@all) : \@all; } # if we want all the files that have artist= u2 # ALSO... if the tag value is 'unknown' !!! :-) sub _files_by_tag_value { my ($tag_name, $value) = @_; $value or die; my $data = $DATA->{$tag_name}->{$value}; $data or debug("Had nothing for '$tag_name', value '$value'") and $data=[]; wantarray ? (@$data) : $data; } =pod $DATA = { artist => { u2 => [ files ], unknown => [ files ], }, track => { 1 => [ files ], 2 => [ files ], }, title => { 'fire' => [ files ], unknown => [ files ], }, album => { 'self titled' => [ files ], unknown => [ files ], }, } =cut # this analizes all the mp3s on disk and stores # by tags, which files are that.. # so that you can ask.. what are the artists present, # which files have that artist # whichfiles do not have a artis track.. etc. sub DATA_load { debug(); unless( $DATA = $cache->thaw( 'DATA' ) ){ say("Reloading DATA..\n"); TAG: for my $tag_name (@TAG_NAMES){ my $data = _files_by_tag( $tag_name ); $DATA->{$tag_name} = $data; } $cache->freeze( DATA => $DATA, 'never' ); # every day?? } $DATA; } sub DATA_reset { $cache->clear('DATA') ; 1 } sub DATA_reload { say("Reloading DATA, scanning dirs for files, etc..\nThis may take a while..\n\n"); DATA_reset(); DATA_load(); say("Done.\n"); } sub _files_by_tag { # make hash, each key is tag info, each val is a array of files # so.. like.. # { # $artist_name => [ files] # $artist_name_2 => [ files ] my $tag_name = shift; $tag_name or die; debug("loading $tag_name.."); my $data = {}; for my $abs ( @FILES ){ ### Working===[%] my $tag_value; # by storing path/tag individual values.. # $tag_value = _abs_to_tag( $abs, $tag_name );# WAY SLOW # by storing hashref of info.. my $info = _id3_info($abs) or next; $tag_value = $info->{$tag_name}; #$tag_value ||= "$tag_name\_unknown"; push @{$data->{$tag_value}}, $abs; } debug("done loading $tag_name"); $data; } sub _id3_info { my $abs = shift; $abs or die; my $info; unless ( $info = $cache->thaw( "_id3_info_$abs") ){ debug("loading MP3::Tag..."); my $tag = mp3_tag( $abs ) or return; my $_info = $tag->autoinfo; unless( $_info ){ warn("Cant get autoinfo for $abs"); $_info = {}; } for my $tag_name (@TAG_NAMES){ $info->{$tag_name} = ($_info->{$tag_name} || 'unknown'); } $cache->freeze( "_id3_info_$abs", $info, 'never' ); } $info; } sub abs_mp3s { #File::Find::Rule->new; ( split( /\n/, `find @IN -type f -iname "*.mp3"`) ) } sub mp3_tag { my $abs = shift; $abs or die; my $tag; #unless( $tag = $cache->thaw( "mp3_tag_$abs" ) ){ $tag = MP3::Tag->new($abs) or warn("cant instance MP3::Tag for '$abs'") and return; #$cache->freeze( "mp3_tag_$abs", $tag, '10 minutes' ); #} $tag; } =pod # NOPE _ THIS IS SO SLOW # ANOTHER WAY... would it be faster to store abs => tag data? # usage: # my $artist = _abs_to_tag( './file.mp3', 'artist' ); sub _abs_to_tag { my($abs,$tag) = @_; $tag or die; my $value; unless( $value = $cache->get( "$tag\_$abs" ) ){ my $tag = mp3_tag( $abs ) or return; my $info = $tag->autoinfo or return; $value = $info->{$tag}; $cache->set("$tag\_$abs", $value, 'never'); } $value; } =cut exit; sub usage { qq{$0 [OPTION].. [DIR].. -d debug -h help -v version -q quiet -r reload/update -s stats -t string tag name -a string value -S run similarity test for main tags, print as YAML -z float similarity threshold -x suggest for missing tags, shows sh commands to stdout Any DIR arguments will be interpreted as the directory to look up mp3s in. By default we do look inside ~/music To clear cache, delete $ABS_CACHE dir. If you want to find all files with artist=unknown $0 -t artist -a unknown If you want to see all artists $0 -t artist If you want to see the id3 tag info for files.. $0 ./file.mp3 If you want to see which artists or albums etc are similar: $0 -t artist | xargs --delimiter=\\\\n gbs }} /c>
id3normalizer_parse_report.pl :