Creates a dependency and statistics report of a directory with BASH scripts.
The script will generate a dependency report for each BASH script in the specified path.
Right now it call's 'rpm' to get dependencies for RPM's, if your system doesn't have RPM just replace the line that calls it with an undef variable.
Guess I will make a test for it in the next version.
The current output looks like below for each script and in the end there is a summary for all scripts.
/etc/init.d/multipathd |-- binary: basename |-- binary: test |-- teardown_slaves | |-- binary: pwd | |-- binary: sed | |-- binary: echo | |-- binary: readlink | |-- function: teardown_slaves | |-- binary: sed | `-- binary: echo |-- binary: test |-- binary: echo |-- binary: touch |-- binary: echo |-- binary: echo |-- binary: echo `-- binary: echo /etc/init.d/multipathd Code lines: 102 Comment lines: 13 Empty lines: 12 Total lines: 127 Function(s): teardown_slaves Uses function(s): teardown_slaves Uses binarie(s): basename echo info pwd readlink rm sed test touch Uses RPM(s): coreutils-5.97-19.el5 info-4.8-14.el5 sed-4.1.5-5.fc6
The summary looks as following. This output is slightly cut down and is taken from CentOS 5.x running a report on "/etc/init.d"
------------------------------------------- Duplicate local function names: 5 stop 5 start 2 status 2 restart 2 condrestart 1 do_restart_sanity_check 1 makedev 1 start_isdnlog ... ------------------------------------------- Most used functions: 18 stop 12 start 6 restart 4 status 4 invoke_command ... ------------------------------------------- Most used binaries: 422 echo 72 rm 57 touch 40 grep 36 test 30 awk ... ------------------------------------------- Most used RPM's: 671 coreutils-5.97-19.el5 50 grep-2.5.1-54.2.el5 30 gawk-3.1.5-14.el5 19 util-linux-2.13-0.50.el5 10 sed-4.1.5-5.fc6 9 file-4.17-15.el5_3.1 4 xorg-x11-xfs-1.0.2-4 ... ------------------------------------------- Total code lines: 4438 Total comment lines: 989 Total empty lines: 705 Total lines: 6132 Total files: 51
Without further ado here's the code in all it's ugliness.
Updated to v0.9 Solved some of the issues mentioned in the replies. Thanks again for the input.
#!/usr/bin/perl # Bash Parser v0.9 # # Creates a dependency and statistics report of a directory with BASH +scripts. # # Copyright (c) 2009, Michael Persson (mickep76@mac.com) # All rights reserved. # Error checking for files use strict; # Check arguments die "usage: bash_reporter directory\n" if $#ARGV; my $path = $ARGV[0]; die "Please specify a valid directory\n" if not -d $path; # Ignore functions our %ignore_functions = ( # 'echo' => 1, # 'true' => 1 ); # Ignore binaries our %ignore_binaries = ( # 'sed' => 1, # 'awk' => 1, # 'echo' => 1, # 'cut' => 1 ); # BASH keywords 'compgen -k' our %bash_keywords = ( 'if' => 1, 'then' => 1, 'else' => 1, 'elif' => 1, 'fi' => 1, 'case' => 1, 'esac' => 1, 'for' => 1, 'select' => 1, 'while' => 1, 'until' => 1, 'do' => 1, 'done' => 1, 'in' => 1, 'function' => 1, 'time' => 1 ); # Global hashes our %binaries_in_path; our %binaries_to_rpm; our %all_sources; our %all_functions; our %dependencies; our %variables; # Global statistics hashes our %stat_sources; our %stat_functions; our %stat_uses_binaries; our %stat_uses_functions; our %stat_uses_rpms; # Global totals our $tot_code_lines = 0; our $tot_comment_lines = 0; our $tot_empty_lines = 0; our $tot_lines = 0; our $tot_files = 0; # Get all binaries in the PATH get_binaries(); # Recurse through all files in the path recurse($path); # Go through all sourced files foreach(keys %all_sources) { match($_, 0) } # Print statistics print_stat_hash(\%stat_sources, "Most sourced files:"); print_stat_hash(\%stat_functions, "Duplicate local function names:", 1 +); print_stat_hash(\%stat_uses_functions, "Most used functions:"); print_stat_hash(\%stat_uses_binaries, "Most used binaries:"); print_stat_hash(\%stat_uses_rpms, "Most used RPM's:"); # Print totals print_separator(); print "Total code lines:\t$tot_code_lines\n"; print "Total comment lines:\t$tot_comment_lines\n"; print "Total empty lines:\t$tot_empty_lines\n"; print "Total lines:\t\t$tot_lines\n"; print "Total files:\t\t$tot_files\n"; # Get all binaries in the PATH sub get_binaries { foreach my $directory (split ':', $ENV{'PATH'}) { foreach(glob("$directory/*")) { my $file = $_; if(-f $_ && -x $_) { s/.*\///; $binaries_in_path{$_} = $file; } } } } # Recurse through all files in a given path sub recurse { my $path = shift; foreach(glob("$path/*")) { if(-d $_) { recurse($_) } else { match($_, 1) } } } # Find sources, functions and binaries used in file sub match { my $file = shift; my $check_if_bash = shift; undef %all_functions; undef %dependencies; undef %variables; my %functions; my %sources; my %uses_binaries; my %uses_functions; my %uses_rpms; my $function = undef; my $line_number = 1; my $comment_lines = 0; my $empty_lines = 0; my $text_block = undef; my $function_line = undef; open FH, "<$file" or die "Failed to open file $file: $!\n"; foreach(<FH>) { chomp(); my $ignore_line = 0; # Check if file is a Bash script if($check_if_bash == 1 && $line_number == 1 && ! /bash/) { return +} # Count comments and empty lines elsif(/(\%\%.*\%\%)/) { if($1 eq $text_block) { $text_block = undef } else { $text_block = $1; } } elsif(/^[ \t]*#/) { $comment_lines++; $ignore_line = 1; } elsif($text_block) { $ignore_line = 1 } elsif($_ eq '') { $empty_lines++ } # Get variable assignments elsif(/([\w\.\-\_]+)=(.*)/) { $variables{$1} = $2; } # Get files sourced by script elsif(/^[ \t]*source +([\w\-\/\.\_]+)/) { my $source = expand_path($1); $sources{$source}++; $stat_sources{$source}++; push @{$dependencies{$file}}, $source; get_sources($source); } # Get functions in script elsif((/function +([\w\.\-\_]+)/ || /([\w\.\-\_]+)\(\)/) && ! $bas +h_keywords{$1}) { $function = $1; $function_line = $line_number; } elsif($function ne undef && ($function_line + 1) == $line_number +) { if(/^{/) { $functions{$function}++; $all_functions{$function}++; $stat_functions{$function}++; push @{$dependencies{$file}}, $function; } else { $function = undef; $function_line = undef; } } elsif(/^}/) { $function = undef; $function_line = undef; } $line_number++; if($ignore_line) { next } # Get all binaries and functions called from script s/\#.*//; foreach my $word (split /[\`\;\| \t]+/) { if($all_functions{$word} && ! $ignore_functions{$word}) { $uses_functions{$word}++; $stat_uses_functions{$word}++; if($function eq undef) { push @{$dependencies{$file}}, "functi +on: $word" } else { push @{$dependencies{$function}}, "function: $word" } } elsif($binaries_in_path{$word} ne undef && ! $ignore_binaries{$w +ord}) { $uses_binaries{$word}++; $stat_uses_binaries{$word}++; if($function eq undef) { push @{$dependencies{$file}}, "binar +y: $word" } else { push @{$dependencies{$function}}, "binary: $word" } if($binaries_to_rpm{$word} eq undef) { my $rpm = `rpm -qf $binaries_in_path{$word}`; if($rpm =~ /([\w\.\-\_]+)/ && $rpm !~ /not owned/) { $uses_rpms{$1}++; $binaries_to_rpm{$word} = $1; $stat_uses_rpms{$1}++; } } else { $uses_rpms{$binaries_to_rpm{$word}}++; $stat_uses_rpms{$binaries_to_rpm{$word}}++; } } } } close FH; $tot_code_lines += $line_number - $comment_lines - $empty_lines - 1; $tot_comment_lines += $comment_lines; $tot_empty_lines += $empty_lines; $tot_lines += $line_number - 1; $tot_files++; print_separator(); print_dependencies($file); print "\n$file\n"; printf "\tCode lines:\t%s\n", $line_number - $comment_lines - $empty +_lines - 1; printf "\tComment lines:\t%s\n", $comment_lines; printf "\tEmpty lines:\t%s\n", $empty_lines; printf "\tTotal lines:\t%s\n", $line_number - 1; print_hash(\%sources, 'Source(s):'); print_hash(\%functions, 'Function(s):'); print_hash(\%uses_functions, 'Uses function(s):'); print_hash(\%uses_binaries, 'Uses binarie(s):'); print_hash(\%uses_rpms, 'Uses RPM(s):'); } # Get all functions for sourced files and place in all_functions sub get_sources { my $file = shift; $all_sources{$file}++; my $function = undef; open FH, "<$file" or die "Failed to open file $file: $!\n"; foreach(<FH>) { chomp(); # Get files sourced by script if(/^[ \t]*source +([\w\-\/\.\_\$]+)/) { my $source = expand_path($1); push @{$dependencies{$file}}, $source; get_sources($source); } # Get variable assignments elsif(/([\w\.\-\_]+)=(.*)/) { $variables{$1} = $2; } # Get functions in script elsif(/function +([\w\.\-]+)/ || /([\w\.\-]+)\(\)/) { $function = +$1; } elsif($function ne undef && /{/ && ! $bash_keywords{$function}) { +$all_functions{$function}++; } elsif(/}/) { $function = undef; } } close FH; } # Expand variables in path sub expand_path { my $path = shift; if($path =~ /\$([\w\-\.\_]+)/) { my $variable = $1; $path =~ s/\$$variable/$variables{$variable}/; } return $path; } # Print separator sub print_separator { print "-" x 100 . "\n"; } # Print hash sub print_hash { my $hash = shift; my $heading = shift; if(scalar(keys %{$hash}) > 0) { print "\t$heading\n" } foreach(sort keys %{$hash}) { print "\t\t$_\n" } } # Print statistics hash sub print_stat_hash { my $hash = shift; my $heading = shift; my $minimum = shift; if(scalar(keys %{$hash}) > 0) { print_separator(); print "$heading\n"; } foreach(sort { ${$hash}{$b} <=> ${$hash}{$a} } keys %{$hash}) { if(${$hash}{$_} > $minimum) { printf "%s\t$_\n", ${$hash}{$_} } } } # Print dependency tree sub print_dependencies { my $function = shift; my $padding = shift; my $where = shift; if($where eq 'middle') { print "$padding|-- "; $padding .= '| '; } elsif($where eq 'last') { print "$padding`-- "; $padding .= ' '; } print "$function\n"; $where = "middle"; my $count = 1; foreach(@{$dependencies{$function}}) { if($count++ == scalar(@{$dependencies{$function}})) { $where = "la +st" } if($function ne $_) { print_dependencies($_, $padding, $where) } } }
In reply to Bash Parser by mickep76
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |