in reply to perldiag upgrade

ok, here it is.
pls tryit.

if you go code spelunking, try the -d option, it will tell you where the errs/warns are issued; file and line.

#!/usr/local/bin/dperl # -w use strict; use File::Find; use Getopt::Std; use diagnostics; use Data::Dumper; $Data::Dumper::Indent = 1; $Data::Dumper::Sortkeys = 1; # sub { shift <=> shift }; my %diags; # pairsof( =item(.*) => linenum) by scanning perldiag. +pod my %stmts; # source statement calls to Perl_croak(*) or Perl_warn +er(*) my ($ct_croak, $ct_diag); # controls extraction of info when already doc'd in perldiag our ($opt_p, $opt_s, $opt_e, $opt_u, $opt_d, $opt_r, $opt_x); sub usage { die<<EOH; usage: $0 [options] # scans perl source, finds diagnostics -p : dump perldiags -s : dump source statements which match to diagnostics -e : output existing diagnostics too -u : print unmatched croaks -d : dump info about the source of the diagnostics -r <pat>: only report stuff matching regex (ex: Win, POSIX) -x : print items dropped in match() EOH } main: { # i just like the indent. #$, = "\n"; getopts('pseudr:x') or usage; $ct_croak = $ct_diag = 0; # check pod open(my $fh, "./pod/perldiag.pod") or die "bad open on perldiag\n"; my $lnum = -1; while (my $line = <$fh>) { $lnum++; chomp $line; # print "$line\n"; if ($line =~ m/^=item (.*)$/) { $diags{$1} = $lnum; } } $ct_diag = keys %diags; print "found $ct_diag diagnostic msgs defined in perldiag.pod\n"; print "diags from perldiag.pod", Dumper {reverse %diags} if $opt_ +p; find({ wanted => \&lookin, no_chdir => 1}, '.'); print "found $ct_croak croaks in code\n"; if ($opt_s) { my (@found, %useddiags); foreach my $diag (keys %diags) { push my @new, grep /\Q$diag\E/, keys %stmts; if (@new) { push @found, @new; $useddiags{$diag} = \@new; } } print "\ndiags-which-matched ", Dumper(\%useddiags); # print sort @found; #delete @stmts{@found}; #print "\n\ncroaks w/o diag match\n", sort keys %stmts; } my %crks = %stmts; delete @crks{keys %diags}; if ($opt_u) { print "croaks unmatched by diag\n"; print Dumper([sort keys %crks]); } print "croaks unmatched by diags: ", scalar keys %stmts, "\n"; print "croaks unmatched by diags: ", scalar keys %crks, "\n"; #print "total matched: ", scalar keys %crks, "\n"; print "OK now do a SWAG\n"; # want hash, w the Perl_croak statement's fmt string as key, and # val is hash, w whole statement as key, and an array of "$file: # $line" items my %items; my @itempairs = map { (/.*?\"(.*)\".*/) ? ($1,$_) : () } sort keys %stmts; while (my $fmt = shift @itempairs) { my $statement = shift @itempairs; # store what we know about the croak push @{$items{$fmt}}, {$statement => $stmts{$statement}}; } if ($opt_r) { foreach (keys %items) { delete $items{$_} unless $_ =~ /$opt_r/o; } } if ($opt_d) { foreach (sort keys %items) { print "=item $_\n\n " . Dumper ($items{$_}) . "\n"; } } # match(\%items); markup_perldiag(\%items); print "done\n"; } sub match { my ($items) = @_; my %dropped; foreach my $k (keys %$items) { $dropped{$k} = delete $items->{$k} if &diagnostics::transmo($i +tems->{k}); } #print Dumper $items; print Dumper \%dropped if $opt_x; print "after crosscheck, have ", scalar keys %$items, " items \n"; } sub lookin { # File::Find callback # print "looking in $_\n"; return unless -f $_; return unless /[chml]$/; open(my $fh, $_) or die "bad open on $_\n"; # slurp so match works across newlines local $/ = undef; my ($fullbuf, $buf); $fullbuf = $buf = <$fh>; my $atline; # approx point of match while ($buf =~ m/(Perl_(croak|warn)\w*\s*\(.*?\).*?;)/smg) { # save match b4 finding line my $match = $1; # find line where call is made my ($where) = pos($buf) || 'unknown line'; my $sofar = $`; $atline = scalar (my @newlines = ($sofar =~ m/(\n)/sg)); $atline++; $ct_croak++; # clean up match chomp $match; #print "in $_: <$match>\n"; $match =~ s/^aTHX_\s*//; $match =~ s/\s+/ /smg; # copy before doing 2 different things with value my $matchitem = $match; # strip out all but double quoted 'fmt' string # this needs rework to handle concats $match =~ s/^ \"//; $match =~ s/ \"\s*$//; # do positive match to find concatenations # - commas either side delimit $matchitem =~ s/^.*?(\w* \s? ".*?" \s? \w*).*?$/$1/x; # try to store some useful info about where match occurred push @{$stmts{$match}}, "$_: $atline"; #push @{$croakargs{$match}}, $matchitem; } return; # way slow ($`), doesnt work like i hoped foreach my $match (keys %stmts) { # now find line-number. # start with original buffer, and search for specific match # just found above, then count newlines in the pre-match $fullbuf = /\Q$match/ms; my $pre = $`; my @lines = $pre =~ m/(\n)/msg; push @{$stmts{$match}}, "$_:" . scalar @lines; } } # get the sortable key for an item sub get_item_key { my($item) = @_; # remove POD formatting $item =~ s/[A-Z]<(.*?)>/$1/g; # remove printf-style escapes # note: be careful not to remove things like %hash $item =~ s/%(?:[scg]|lx|#o)//g; # remove all non-letter characters $item =~ tr/A-Za-z//cd; return lc $item; } sub markup_perldiag { my ($items) = @_; my $fh; # re-used several times, relies on tacit close b4 reopen open($fh, "./pod/perldiag.pod") or die "bad open on perldiag\n"; local $/ = undef; my $buf = <$fh>; # slurp it # pull all existing items into my ($header, %existing_items) = split (/^(?:=item (.*?)\n)/sm, $bu +f); # store items with new key thats stripped of all \W # this matches sort-order as doc'd in $header text # but play games to prevent items that differ only by \W # from collapsing into 1 hash element foreach my $k (sort keys %existing_items) { my $alpha = get_item_key($k); $alpha++ if exists $existing_items{$alpha}; $existing_items{$alpha} = [ "=item $k\n", delete $existing_ite +ms{$k} ]; } my %newitems; foreach my $k (keys %$items) { my $alpha = get_item_key($k); if (0) { $alpha =~ s/%\w+//g; $alpha =~ s/\W//g; } my $label = $k; $label =~ s/\\n/ /g; $label =~ s/\\"//g; #" balance quote for emacs $label =~ s/\\\\//g; $newitems{$alpha} = [ "=item $label\n", $items->{$k}[0] ]; } my %h = (%existing_items, %newitems); open($fh, "> ./pod/perldiag.pod.new") or die "bad open on perldiag\n"; open(my $fhd, "> ./pod/perldiag.pod.dump") or die "bad open on perldiag\n"; print $fh $header; foreach (sort keys %h) { # print original entry 1st; print $fh "$h{$_}->[0]"; print $fhd "$h{$_}->[0]"; if (exists $existing_items{$_}) { print $fh "$existing_items{$_}->[1]"; print $fhd "$existing_items{$_}->[1]"; } if (exists $newitems{$_} and (not exists $existing_items{$_} or $opt_e)) { my ($str,$dump) = extract_msg($_, $newitems{$_}); print $fh $str; print $fhd $str,$dump; } } return; } sub extract_msg { # given: # 1: an item-name (reduced for sorting) # 2: [ item-name (not reduced), { source-statement, [ @line-number +s] } # returns: a warning-cat-string (for incl into perldiag.pod), a du +mp-string my ($k, $v) = @_; my $item = $v->[0]; my ($call) = keys %{ $v->[1] }; my ($str,$dump); if ($call =~ /packWARN\(WARN_(\w+)\)/) { my $type = lc $1; $str = "\n(W $type)."; } elsif ($call =~ /packWARN2\(WARN_(\w+),\s*WARN_(\w+)\)/) { my $type = lc $1; my $type2 = lc $2; $str = "\n(W $type, $type2)."; } elsif ($call =~ /Perl_warn/) { $str = "\n(W)."; } elsif ($call =~ /Perl_croak/) { $str = "\n(F)."; } $dump = Data::Dumper->Dump ([$v->[1]], ['in-code-at']) . "\n"; if ($opt_r) { chomp $item; $item =~ s|::|/|; # only once $item =~ s/\(.*\)//; $item =~ s/=item usage: //; # yeah, not very general ! chomp $item; $str .= " See L<$item> for information, proper use."; } $str .= "\n\n"; return ($str,$dump); } __END__ # this doesnt produce anything useful if (0) { # do set differences %dgs = %diags; %crks = %stmts; my (@founddiags, @foundcroaks); @founddiags = grep defined $_, delete @dgs{keys %stmts}; @foundcroaks = grep defined $_, delete @crks{keys %diags}; print "\n\nunmatched diagnostics:\n", sort keys %dgs; print "\n\nunmatched croaks:\n", sort keys %crks; %diffs = (%dgs, %crks); foreach $k (sort keys %diffs) { # print "in $diffs{$k}: $k\n"; } print "\n\nmatched diagnostics:\n", sort @founddiags; print "\n\nmatched diagnostics:\n", Dumper (\@founddiags); print "\n\nmatched croaks:\n", sort @foundcroaks; print "\n\nmatched croaks:\n", Dumper(\@foundcroaks); }
=head1 Latent Problems It appears that a the reason (contributing at least) that diags dont match to croaks is because the printf substitutions that are there. The =items are loaded into (my) %diagnostics::transfmt, and values are initialized to a corresponding regex. %transfmt is used in diagnostics::transmo() by splainthis() to match against the actual error and produce the message. By using diagnostics, I think I can splainthis() on the croaks, thus eliminating the ones that are matched by a diagnostics entry.

Edit by castaway - changed pre tags to code tags

Replies are listed 'Best First'.
Re^2: perldiag upgrade - 1st test-snippet
by kidongrok (Acolyte) on Jun 09, 2005 at 14:25 UTC
    1st test-snippet to trigger perl error msg, courtesy Nick Clark on p5p.
    ./perl -e '$a = ${*STDOUT{IO}}' Bizarre copy of IO in sassign at -e line 1.
    Should it be possible to get the 'bizzare copy' error at all?
Re^2: perldiag upgrade
by kidongrok (Acolyte) on Jun 12, 2005 at 22:13 UTC
    heres a new, shorter, cleaner, less obfuscated version. It actually produces something reasonable.

    it doesnt presently figure out which perldiag entries are obsolete (ie no code issues them), but, well, its a start.

    #!/usr/local/bin/dperl # -w use strict; use File::Find; use Getopt::Std; use diagnostics; use Data::Dumper; use Text::Balanced qw( extract_delimited extract_bracketed ); $Data::Dumper::Indent = 1; $Data::Dumper::Sortkeys = 1; # sub { shift <=> shift }; my $header; # stores perldiag.pod preamble my %diagpod; # pairsof( =item (.*) => podtext) my %diags; # pairsof( =item (.*) => linenum) by scanning perldiag.p +od my %stmts; # source statement calls to Perl_croak(*) or Perl_warner +(*) my %diagpodidx; # my %warncat; # my (%diagcall, %diagcallidx, %diagcallfull); my %diaginfo; my ($ct_croak, $ct_diag); # controls extraction of info when already doc'd in perldiag our ($opt_p, $opt_s, $opt_i, $opt_u, $opt_d, $opt_c, $opt_U); sub usage { die<<EOH; usage: $0 [options] # scans perl source, finds diagnostics -p : dump perldiags -s : dump source statements which match to diagnostics -u : print unmatched calls -U : print unmatched calls (more info) -d : dump info about the source of the diagnostics -i : insert info into the pod about the source of the diagnostic +s -c : print code extracted EOH } my %formats = qw( IVdf "ld" UVuf "lu" UVof "lo" UVxf "lx" UVXf "lX" NVef "e" NVff "f" NVgf "g" SVf "s" ); my $format_re = join "|", keys %formats; main: { getopts('pseuUdci') or usage; $ct_croak = $ct_diag = 0; parsediagpod(); print "diags from perldiag.pod, numbered: ", Dumper {reverse %diag +s} if $opt_p; find({ wanted => \&lookin, no_chdir => 1}, '.'); print "found $ct_croak different calls in code\n"; my %useddiags; { my (@found); foreach my $diag (keys %diagpod) { push my @new, grep /\Q$diag\E/, keys %stmts; if (@new) { push @found, @new; $useddiags{$diag} = \@new; } } my $exacts = keys %useddiags; print "found $exacts calls matched by diag entry\n"; print "\ndiags-which-matched: ", Dumper(\%useddiags) if $opt_s; } print "calls unmatched by diag: ", Dumper([sort keys %stmts]) if $opt_u; print "calls unmatched by diag: ", Dumper(\%stmts) if $opt_U; print "statements unmatched by diags: ", scalar keys %stmts, "\n"; print "OK now do a SWAG\n"; diagify(%stmts); print "done: ", Dumper(\%diaginfo) if $opt_d; write_diag(); print "done\n"; } sub parsediagpod { open(my $fh, "./pod/perldiag.pod") or die "bad open on perldiag\n"; local $/ = undef; my $buf = <$fh>; # slurp it # pull all existing items and text into hash ($header, %diagpod) = split (/^(?:=item (.*?)\n)/sm, $buf); my @lines = split (/\n/, $buf); my ($lnum,$items); foreach my $line (@lines) { if ($line =~ m/^=item (.*)$/) { $diags{$1} = $lnum; $items++; } $lnum++; } my $ct_diag = keys %diags; print "found $ct_diag diagnostic msgs defined in perldiag.pod\n"; # check for repeats (low value, may trash) if ($ct_diag == $items) { print "theyre all different\n"; } else { print "there'a some overlap\n"; } foreach my $item (keys %diagpod) { my $k = item_sort_key($item); $diagpodidx{$k} = $item; } } sub lookin { # File::Find callback return unless -f $_; return unless /(.*)\.(c|xs)$/; return if $2 eq 'c' and -f "$1.xs"; open(my $fh, $_) or die "bad open on $_\n"; my $file = $_; my ($buf, @warner_lines, $atline); while (<$fh>) { # remember the line number where Perl_warner & Perl_croak are +used. push @warner_lines, $. if /Perl_(warn|croak)/; $buf .= $_; } close $fh; unless ($buf) { warn "file $file is apparently empty\n"; return; } while ($buf =~ m/(Perl_(croak|warn(?:\w*)))/smg) { my $func = $1; $atline = shift @warner_lines; $ct_croak++ if $2 eq 'croak'; my ($argstr, $remainder) = extract_bracketed($buf,'()'); # clean up the match $argstr =~ s/^aTHX_\s*//; $argstr =~ s/\s+/ /smg; if ($opt_c) { print "in $file, at line $atline:\n"; print "\t got $func $argstr\n" if $argstr; } push @{$stmts{"$func$argstr"}}, "$file: $atline"; } return; } sub item_sort_key { # create a key with which to sort diagnostics messages issued by p +erl # see the perldiag.pod intro/header for definition. my($item) = @_; my $old = $item; # remove POD formatting $item =~ s/[A-Z]<(.*?)>/$1/g; # remove printf-style escapes # note: be careful not to remove things like %hash $item =~ s/%([scgp]|lx|\#o)//g; # remove all non-letter characters $item =~ tr/A-Za-z//cd; $item = lc $item; # print "old: $old\nnew: $item\n"; return $item; } sub diagify { # massage a call into item and category my (%stmts) = @_; my $cat; foreach my $call (keys %stmts) { my $new = $call; if ($new =~ s/($format_re)/$formats{$1}/g) { # print "\nold: $call\nnew: $new\n"; } my $warncat; $new =~ s/\)$//; $new =~ s/^Perl_croak\(\s*.THX_ // and $warncat = 'F'; $new =~ s/^Perl_warn(er)?\(\s*.THX_ // and $warncat = 'W'; $new =~ s{packWARN\w*\((.*?)\),\s*}{ print STDERR "whats $new\n" unless $1; $warncat .= ' '.lc($1); $warncat =~ s/warn_//g; #print STDERR "handling: $warncat\t in $new " #if $warncat =~ /,/; $warncat; }e; my ($string,@strings); while ($string = extract_delimited($new, '"','\s*')) { $string =~ s/^\"(.*)\"$/$1/; push @strings, $string; } $new = join('',@strings); $new =~ s/\\\"/\"/g; #print "\nold: $call\nnew: $new\n"; my $sortkey = item_sort_key($new); $diagcall{$new} = delete $stmts{$call}; $diagcallidx{$sortkey} = $new; $diagcallfull{$sortkey} = $call; $warncat{$new} = $warncat; $diaginfo{$sortkey} = { diagmsg => $new, dwarningcat => $warncat, invocation => $call, whence => $diagcall{$new}, }; } } sub write_diag { # output all the data print $header; # # $diagpodidx{$k} = $item; my %idx = (%diagpodidx, %diagcallidx); foreach my $k (sort keys %idx) { my $indiag; if ($diagpodidx{$k}) { print "=item $diagpodidx{$k}\n"; print "$diagpod{$diagpodidx{$k}}"; $indiag = 1; } if (!$indiag and $diagcallidx{$k}) { print "=item $diagcallidx{$k}\n"; print "\n($warncat{$diagcallidx{$k}})\n\n"; print "info: ", Dumper($diaginfo{$k}), "\n" if $opt_i; } elsif ($indiag and $diagcallidx{$k}) { # check for warn-cat match print "correction?: $warncat{$diagcallidx{$k}}\n\n" unless ($diagpod{$diagpodidx{$k}} =~ /\($warncat{$diagcallidx{$k}}\)/); } } print "\n=back\n\n=cut\n"; } __END__ =head1 Latent Problems It appears that a the reason (contributing at least) that diags dont match to croaks is because the printf substitutions that are there. The =items are loaded into (my) %diagnostics::transfmt, and values are initialized to a corresponding regex. %transfmt is used in diagnostics::transmo() by splainthis() to match against the actual error and produce the message. By using diagnostics, I think I can splainthis() on the croaks, thus eliminating the ones that are matched by a diagnostics entry.