heres a new, shorter, cleaner, less obfuscated version. It actually produces something reasonable.

it doesnt presently figure out which perldiag entries are obsolete (ie no code issues them), but, well, its a start.

#!/usr/local/bin/dperl # -w use strict; use File::Find; use Getopt::Std; use diagnostics; use Data::Dumper; use Text::Balanced qw( extract_delimited extract_bracketed ); $Data::Dumper::Indent = 1; $Data::Dumper::Sortkeys = 1; # sub { shift <=> shift }; my $header; # stores perldiag.pod preamble my %diagpod; # pairsof( =item (.*) => podtext) my %diags; # pairsof( =item (.*) => linenum) by scanning perldiag.p +od my %stmts; # source statement calls to Perl_croak(*) or Perl_warner +(*) my %diagpodidx; # my %warncat; # my (%diagcall, %diagcallidx, %diagcallfull); my %diaginfo; my ($ct_croak, $ct_diag); # controls extraction of info when already doc'd in perldiag our ($opt_p, $opt_s, $opt_i, $opt_u, $opt_d, $opt_c, $opt_U); sub usage { die<<EOH; usage: $0 [options] # scans perl source, finds diagnostics -p : dump perldiags -s : dump source statements which match to diagnostics -u : print unmatched calls -U : print unmatched calls (more info) -d : dump info about the source of the diagnostics -i : insert info into the pod about the source of the diagnostic +s -c : print code extracted EOH } my %formats = qw( IVdf "ld" UVuf "lu" UVof "lo" UVxf "lx" UVXf "lX" NVef "e" NVff "f" NVgf "g" SVf "s" ); my $format_re = join "|", keys %formats; main: { getopts('pseuUdci') or usage; $ct_croak = $ct_diag = 0; parsediagpod(); print "diags from perldiag.pod, numbered: ", Dumper {reverse %diag +s} if $opt_p; find({ wanted => \&lookin, no_chdir => 1}, '.'); print "found $ct_croak different calls in code\n"; my %useddiags; { my (@found); foreach my $diag (keys %diagpod) { push my @new, grep /\Q$diag\E/, keys %stmts; if (@new) { push @found, @new; $useddiags{$diag} = \@new; } } my $exacts = keys %useddiags; print "found $exacts calls matched by diag entry\n"; print "\ndiags-which-matched: ", Dumper(\%useddiags) if $opt_s; } print "calls unmatched by diag: ", Dumper([sort keys %stmts]) if $opt_u; print "calls unmatched by diag: ", Dumper(\%stmts) if $opt_U; print "statements unmatched by diags: ", scalar keys %stmts, "\n"; print "OK now do a SWAG\n"; diagify(%stmts); print "done: ", Dumper(\%diaginfo) if $opt_d; write_diag(); print "done\n"; } sub parsediagpod { open(my $fh, "./pod/perldiag.pod") or die "bad open on perldiag\n"; local $/ = undef; my $buf = <$fh>; # slurp it # pull all existing items and text into hash ($header, %diagpod) = split (/^(?:=item (.*?)\n)/sm, $buf); my @lines = split (/\n/, $buf); my ($lnum,$items); foreach my $line (@lines) { if ($line =~ m/^=item (.*)$/) { $diags{$1} = $lnum; $items++; } $lnum++; } my $ct_diag = keys %diags; print "found $ct_diag diagnostic msgs defined in perldiag.pod\n"; # check for repeats (low value, may trash) if ($ct_diag == $items) { print "theyre all different\n"; } else { print "there'a some overlap\n"; } foreach my $item (keys %diagpod) { my $k = item_sort_key($item); $diagpodidx{$k} = $item; } } sub lookin { # File::Find callback return unless -f $_; return unless /(.*)\.(c|xs)$/; return if $2 eq 'c' and -f "$1.xs"; open(my $fh, $_) or die "bad open on $_\n"; my $file = $_; my ($buf, @warner_lines, $atline); while (<$fh>) { # remember the line number where Perl_warner & Perl_croak are +used. push @warner_lines, $. if /Perl_(warn|croak)/; $buf .= $_; } close $fh; unless ($buf) { warn "file $file is apparently empty\n"; return; } while ($buf =~ m/(Perl_(croak|warn(?:\w*)))/smg) { my $func = $1; $atline = shift @warner_lines; $ct_croak++ if $2 eq 'croak'; my ($argstr, $remainder) = extract_bracketed($buf,'()'); # clean up the match $argstr =~ s/^aTHX_\s*//; $argstr =~ s/\s+/ /smg; if ($opt_c) { print "in $file, at line $atline:\n"; print "\t got $func $argstr\n" if $argstr; } push @{$stmts{"$func$argstr"}}, "$file: $atline"; } return; } sub item_sort_key { # create a key with which to sort diagnostics messages issued by p +erl # see the perldiag.pod intro/header for definition. my($item) = @_; my $old = $item; # remove POD formatting $item =~ s/[A-Z]<(.*?)>/$1/g; # remove printf-style escapes # note: be careful not to remove things like %hash $item =~ s/%([scgp]|lx|\#o)//g; # remove all non-letter characters $item =~ tr/A-Za-z//cd; $item = lc $item; # print "old: $old\nnew: $item\n"; return $item; } sub diagify { # massage a call into item and category my (%stmts) = @_; my $cat; foreach my $call (keys %stmts) { my $new = $call; if ($new =~ s/($format_re)/$formats{$1}/g) { # print "\nold: $call\nnew: $new\n"; } my $warncat; $new =~ s/\)$//; $new =~ s/^Perl_croak\(\s*.THX_ // and $warncat = 'F'; $new =~ s/^Perl_warn(er)?\(\s*.THX_ // and $warncat = 'W'; $new =~ s{packWARN\w*\((.*?)\),\s*}{ print STDERR "whats $new\n" unless $1; $warncat .= ' '.lc($1); $warncat =~ s/warn_//g; #print STDERR "handling: $warncat\t in $new " #if $warncat =~ /,/; $warncat; }e; my ($string,@strings); while ($string = extract_delimited($new, '"','\s*')) { $string =~ s/^\"(.*)\"$/$1/; push @strings, $string; } $new = join('',@strings); $new =~ s/\\\"/\"/g; #print "\nold: $call\nnew: $new\n"; my $sortkey = item_sort_key($new); $diagcall{$new} = delete $stmts{$call}; $diagcallidx{$sortkey} = $new; $diagcallfull{$sortkey} = $call; $warncat{$new} = $warncat; $diaginfo{$sortkey} = { diagmsg => $new, dwarningcat => $warncat, invocation => $call, whence => $diagcall{$new}, }; } } sub write_diag { # output all the data print $header; # # $diagpodidx{$k} = $item; my %idx = (%diagpodidx, %diagcallidx); foreach my $k (sort keys %idx) { my $indiag; if ($diagpodidx{$k}) { print "=item $diagpodidx{$k}\n"; print "$diagpod{$diagpodidx{$k}}"; $indiag = 1; } if (!$indiag and $diagcallidx{$k}) { print "=item $diagcallidx{$k}\n"; print "\n($warncat{$diagcallidx{$k}})\n\n"; print "info: ", Dumper($diaginfo{$k}), "\n" if $opt_i; } elsif ($indiag and $diagcallidx{$k}) { # check for warn-cat match print "correction?: $warncat{$diagcallidx{$k}}\n\n" unless ($diagpod{$diagpodidx{$k}} =~ /\($warncat{$diagcallidx{$k}}\)/); } } print "\n=back\n\n=cut\n"; } __END__ =head1 Latent Problems It appears that a the reason (contributing at least) that diags dont match to croaks is because the printf substitutions that are there. The =items are loaded into (my) %diagnostics::transfmt, and values are initialized to a corresponding regex. %transfmt is used in diagnostics::transmo() by splainthis() to match against the actual error and produce the message. By using diagnostics, I think I can splainthis() on the croaks, thus eliminating the ones that are matched by a diagnostics entry.

In reply to Re^2: perldiag upgrade by kidongrok
in thread perldiag upgrade by kidongrok

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.