comment on

heres a new, shorter, cleaner, less obfuscated version. It actually produces something reasonable.

it doesnt presently figure out which perldiag entries are obsolete (ie no code issues them), but, well, its a start.


#!/usr/local/bin/dperl
# -w


use strict;
use File::Find;
use Getopt::Std;
use diagnostics;
use Data::Dumper;
use Text::Balanced qw( extract_delimited extract_bracketed );

$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;    # sub { shift <=> shift };

my $header;    # stores perldiag.pod preamble
my %diagpod;    # pairsof( =item (.*) => podtext)
my %diags;    # pairsof( =item (.*) => linenum) by scanning perldiag.p
+od


my %stmts;    # source statement calls to Perl_croak(*) or Perl_warner
+(*)
my %diagpodidx; #     
my %warncat;    #
my (%diagcall, %diagcallidx, %diagcallfull);
my %diaginfo;
 
my ($ct_croak, $ct_diag);

# controls extraction of info when already doc'd in perldiag
our ($opt_p, $opt_s, $opt_i, $opt_u, $opt_d, $opt_c, $opt_U);

sub usage {
    die<<EOH;
  usage: $0 [options] # scans perl source, finds diagnostics
      -p : dump perldiags
      -s : dump source statements which match to diagnostics
      -u : print unmatched calls
      -U : print unmatched calls (more info)
      -d : dump info about the source of the diagnostics
      -i : insert info into the pod about the source of the diagnostic
+s
      -c : print code extracted
EOH
}

my %formats = qw(
         IVdf            "ld"
         UVuf            "lu"
         UVof            "lo"
         UVxf            "lx"
         UVXf            "lX"
         NVef            "e"
         NVff            "f"
         NVgf            "g"
         SVf             "s"
    );
my $format_re = join "|", keys %formats;


main: {

    getopts('pseuUdci') or usage;
    $ct_croak = $ct_diag = 0;

    parsediagpod();
    print "diags from perldiag.pod, numbered: ", Dumper {reverse %diag
+s}
        if $opt_p;

    find({ wanted => \&lookin, no_chdir => 1}, '.');
    print "found $ct_croak different calls in code\n";

    my %useddiags;
    {
    my (@found);
    foreach my $diag (keys %diagpod) {
        push my @new, grep /\Q$diag\E/, keys %stmts;
        if (@new) {
        push @found, @new;
        $useddiags{$diag} = \@new;
        }
    }
    my $exacts = keys %useddiags;
    print "found $exacts calls matched by diag entry\n";

    print "\ndiags-which-matched: ", Dumper(\%useddiags)
        if $opt_s;
    }

    print "calls unmatched by diag: ", Dumper([sort keys %stmts])
    if $opt_u;

    print "calls unmatched by diag: ", Dumper(\%stmts)
    if $opt_U;

    print "statements unmatched by diags: ", scalar keys %stmts, "\n";

    print "OK now do a SWAG\n";

    diagify(%stmts);
    print "done:  ", Dumper(\%diaginfo) if $opt_d;

    write_diag();
    print "done\n";
}

sub parsediagpod {

    open(my $fh, "./pod/perldiag.pod")
    or die "bad open on perldiag\n";

    local $/ = undef;
    my $buf = <$fh>;    # slurp it
    
    # pull all existing items and text into hash
    ($header, %diagpod) = split (/^(?:=item (.*?)\n)/sm, $buf);

    my @lines = split (/\n/, $buf);
    my ($lnum,$items);
    foreach my $line (@lines) {
    if ($line =~ m/^=item (.*)$/) {
        $diags{$1} = $lnum;
        $items++;
    }
    $lnum++;
    }
    my $ct_diag = keys %diags;
    print "found $ct_diag diagnostic msgs defined in perldiag.pod\n";

    # check for repeats (low value, may trash)
    if ($ct_diag == $items) {
    print "theyre all different\n";
    } else {
    print "there'a some overlap\n";
    }

    foreach my $item (keys %diagpod) {
    my $k = item_sort_key($item);
    $diagpodidx{$k} = $item;
    }
}

sub lookin {
    # File::Find callback
    return unless -f $_;
    return unless /(.*)\.(c|xs)$/;
    return if $2 eq 'c' and -f "$1.xs";

    open(my $fh, $_)
    or die "bad open on $_\n";

    my $file = $_;
    my ($buf, @warner_lines, $atline);
    while (<$fh>) {
        # remember the line number where Perl_warner & Perl_croak are 
+used.
        push @warner_lines, $. if /Perl_(warn|croak)/;
        $buf .= $_;
    }
    close $fh;
    unless ($buf) {
    warn "file $file is apparently empty\n";
    return;
    }
    while ($buf =~ m/(Perl_(croak|warn(?:\w*)))/smg) {
    
    my $func = $1;
    $atline = shift @warner_lines;
    $ct_croak++ if $2 eq 'croak';

    my ($argstr, $remainder) = extract_bracketed($buf,'()');

    # clean up the match
    $argstr =~ s/^aTHX_\s*//;
    $argstr =~ s/\s+/ /smg;

    if ($opt_c) {
        print "in $file, at line $atline:\n";
        print "\t got $func $argstr\n" if $argstr;
    }
    push @{$stmts{"$func$argstr"}}, "$file: $atline";
    }
    return;
}

sub item_sort_key {
    # create a key with which to sort diagnostics messages issued by p
+erl
    # see the perldiag.pod intro/header for definition.

    my($item) = @_;
    my $old = $item;

    # remove POD formatting
    $item =~ s/[A-Z]<(.*?)>/$1/g;
    
    # remove printf-style escapes
    # note: be careful not to remove things like %hash
    $item =~ s/%([scgp]|lx|\#o)//g;
    
    # remove all non-letter characters
    $item =~ tr/A-Za-z//cd;

    $item = lc $item;
    # print "old: $old\nnew: $item\n";
    return $item;
}

sub diagify {
    # massage a call into item and category
    my (%stmts) = @_;
    my $cat;

    foreach my $call (keys %stmts) {
    my $new = $call;

    if ($new =~ s/($format_re)/$formats{$1}/g) {
        # print "\nold: $call\nnew: $new\n";
    }

    my $warncat;
    $new =~ s/\)$//;
    $new =~ s/^Perl_croak\(\s*.THX_ //        and $warncat = 'F';
    $new =~ s/^Perl_warn(er)?\(\s*.THX_ //        and $warncat = 'W';
    $new =~ s{packWARN\w*\((.*?)\),\s*}{
        print STDERR "whats $new\n" unless $1;
        $warncat .= ' '.lc($1);
        $warncat =~ s/warn_//g;
        #print STDERR "handling: $warncat\t in $new  "
        #if $warncat =~ /,/;
        $warncat;
    }e;

    my ($string,@strings);
    while ($string = extract_delimited($new, '"','\s*')) {
        $string =~ s/^\"(.*)\"$/$1/;
        push @strings, $string;
    }
    $new = join('',@strings);
    $new =~ s/\\\"/\"/g;

    #print "\nold: $call\nnew: $new\n";

    my $sortkey = item_sort_key($new);
    $diagcall{$new} = delete $stmts{$call};
    $diagcallidx{$sortkey} = $new;
    $diagcallfull{$sortkey} = $call;

    $warncat{$new} = $warncat;
    $diaginfo{$sortkey} = {
        diagmsg => $new,
        dwarningcat => $warncat,
        invocation => $call,
        whence => $diagcall{$new},
    };
    }
}

sub write_diag {

    # output all the data
    print $header;
    # 
    # $diagpodidx{$k} = $item;

    my %idx = (%diagpodidx, %diagcallidx);
    foreach my $k (sort keys %idx) {

    my $indiag;

    if ($diagpodidx{$k}) {
        print "=item $diagpodidx{$k}\n";
        print "$diagpod{$diagpodidx{$k}}";
        $indiag = 1;
    }
    if (!$indiag and $diagcallidx{$k})
    {
        print "=item $diagcallidx{$k}\n";
        print "\n($warncat{$diagcallidx{$k}})\n\n";

        print "info: ", Dumper($diaginfo{$k}), "\n"
        if $opt_i;
    }
    elsif ($indiag and $diagcallidx{$k}) {
        # check for warn-cat match

        print "correction?: $warncat{$diagcallidx{$k}}\n\n"
        unless ($diagpod{$diagpodidx{$k}}
            =~ /\($warncat{$diagcallidx{$k}}\)/);
    }
    }
    print "\n=back\n\n=cut\n";
}

__END__

=head1  Latent Problems

It appears that a the reason (contributing at least) that diags dont
match to croaks is because the printf substitutions that are there.

The =items are loaded into (my) %diagnostics::transfmt, and values are
initialized to a corresponding regex.  %transfmt is used in
diagnostics::transmo() by splainthis() to match against the actual
error and produce the message.

By using diagnostics, I think I can splainthis() on the croaks, thus
eliminating the ones that are matched by a diagnostics entry.
[download]

In reply to Re^2: perldiag upgrade by kidongrok
in thread perldiag upgrade by kidongrok

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.