Beefy Boxes and Bandwidth Generously Provided by pair Networks
We don't bite newbies here... much
 
PerlMonks  

comment on

( [id://3333]=superdoc: print w/replies, xml ) Need Help??

Description: Purpose is to remove inane (one-line) commit log messages of FreeBSD Ports Git repository, and highlights some others. It is not currently suitable to filter long form of commit messages.

#!/usr/local/bin/perl use v5.30; use strict; use warnings; use feature qw/ signatures /; no warnings qw/ experimental::signatures /; my $VERSION = q/20210710.01/; use Getopt::Long; use Pod::Usage; use Term::ANSIColor qw/ color /; my $help; GetOptions( 'h|help' => \$help ) or pod2usage( '-verbose' => 1 , '-exitval' => 1, '-message' => q/Only -h or --help option is available +./ ); $help and pod2usage( '-verbose' => 2 ); # Show usage() if neither a pipe is connected, nor CLI arguments given +. ( scalar @ARGV || -p \*STDIN ) or pod2usage( '-verbose' => 99, '-exitval' => 1, '-sections' => [ qw/Name Synopsis/ ] ); =pod =head1 Name filter-ports-commit-log.pl - Remove inane one-line commit log messages + in FreeBSD Ports repository. =head1 Synopsis % ( cd /usr/ports && git log --no-merges --pretty=oneline -n 100 ) \ | filter-ports-commit-log.pl | less ... or ... % collect='git-log' ; \ ( cd /usr/ports && git log --no-merges --pretty=oneline -n 100 ) \ >| $collect \ && filter-ports-commit-log.pl $collect >| $collect.filtered =head1 Description Purpose is to remove inane one-line commit log messages in FreeBSD Por +ts repository, and highlights some others. It is not currently suitable t +o filter long form of commit messages. To see usage, run without any arguments or standard input provided via + pipe. Complete list of regexen to highlight port names of interest, and part +ial list to skip the commit lines are extracted from C<DATA>. Currently there i +s no way to provide those lists via files. =head2 Options =over =item B<--help> Show complete help message. =back =cut # To skip low SNR commit lines. my $skip_inane = 1; # To highlight possibly significant changes. my $highlight_changes = 1; ( @ARGV ? sub{ filter_log_file( @ARGV ) } : sub{ filter( \*STDIN ) } )->(); exit; # Prints some lines with highlights; others with none; skips some alto +gether. # # Arguments ... # @file: file list. sub filter_log_file( @file ) { for my $path ( @file ) { my $fd; if ( ! open $fd, '<', $path ) { warn qq/Cannot open '$path': $!\n/; next; } filter( $fd ); close( $fd ) or die qq/Could not close '$path': $!\n/; } } # Prints some lines with highlights; others with none; skips some alto +gether. # # Global Variables ... # $skip_inane : flag to skip lines. # $highlight_changes: flag to highlight changes. # # Arguments ... # $fd: file descriptor. sub filter( $fd ) { state $skipper = $skip_inane ? \&may_skip : sub { 0 }; state $highlighter = ! $highlight_changes ? sub { $_[0] } : sub { highlight_change( highlight_tracked( $ +_[0] ) ) } ; while ( defined ( my $line = <$fd> ) ) { $skipper->( $line ) or print $highlighter->( $line ); } return } # Returns a truth value if to skip printing the line. # # Part of regex list to skip a line is extracted from C<DATA>; start o +f the list # is marked by C<qr/^::skip::/>. # # Arguments ... # $line: string to match. sub may_skip( $line ) { state $version = qr/[a-z0-9,._-]+/iaa; state $list_start = qr/^ ::skip:: /xi; state @skip = ( # Version updates. qr/ up(?:dat|grad)e.*to [ \t]+ $version /ix , qr/$version -> $version/ # Applies to all the ports. , quotemeta( q[*/*] ) , extract_list( $list_start, 0, 0 ) ); for my $re ( @skip ) { $line =~ $re and return 1; } return 0; } # Returns a string possibly with highlight (reversed bold white on bla +ck # background). # # List of regex list to identify port names of interest, to me is extr +acted from # C<DATA>; start of the list is marked with C<qr/^::interest::/>. # # Arguments ... # $line: string to search to find things of interest. sub highlight_tracked( $line ) { $line or return $line; state $term_track = color( q/reverse bold white on_black/ ); state $list_start = qr/^ ::interest:: /ix; state @track = extract_list( $list_start, 1, 1 ); return apply( $line, $term_track, \@track ); } # Returns a highlighted string. # # Arguments ... # $line : string to highlight, in part or in whole. # $highlight: terminal escape sequence, 'highlight'. Terminal reset + sequence # is added after the matched string. # $change : array reference of regexen with capture(s); only the +first is # used. sub apply( $line, $highlight, $change ) { ( $change && $highlight ) or return $line; state $term_reset = color( q/reset/ ); for my $re ( @{ $change } ) { $line =~ s/$re/${highlight}$1${term_reset}/; } return $line; } # Returns a regex list extracted from C<DATA>, between C<qr/$sec_start +/> and # (/^::/ or EoF). # # One regex per line is expected. # # Arguments ... # $sec_start : regex to identify start of the list (on its own li +ne). # $set_boundary: flag to wrap a regex with boundary assertion, C<\b +>. # $capture : flag to create a regex to capture the matched stri +ng. sub extract_list( $sec_start, $set_boundary = undef, $capture = 1 ) { # In order to extract multiple lists by coming back to this positio +n. state $DATA_start = tell DATA; my @found; seek DATA, $DATA_start, 0; while( defined ( my $line = <DATA> ) ) { $line =~ m{$sec_start} ... ( $line =~ /^::[a-z0-9]+::/i or eof( +DATA ) ) or next; $line =~ /^\s*(?::|#|$)/ and next; push @found, $line =~ s/\s+$//r; } my $flag = $set_boundary ? '\b' : '' ; # XXX: Rather UGLY! my $maker = $capture ? sub { qr/($flag$_[0]$flag)/i } : sub { qr/$flag$_[0]$flag/i} ; my %seen; return map { $maker->( $_ ) } grep { !$seen{ $_ }++ } @found ; } # Returns a string possibly with highlights. # # Terminal highlight is applied only once. See C<&setup_highlight> for + the # highlight details. # # Arguments ... # $line: string to search to find things of interest. sub highlight_change( $line ) { $line or return; # As of perl 5.32.1, cannot assign C<state> variables in a list. state $order; state %map; ( $order, %map ) = setup_highlight(); my $orig = $line; for my $hilite ( @{ $order } ) { $line = apply( $line, $hilite, $map{ $hilite } ); # Only apply a single highlight. $orig eq $line or last; } return $line; } # Returns a 2-element list ... # - array reference of terminal escape sequence ('highlight') strin +gs as the # order of highlights; # - a hash of above highlight string as key and regex array referen +ce to # match thing of interest as value. # # Highlight order & proposed usage ... # - alert: revered red text on white background; # - good : revered green text on black background; # - fair : bold yellow text on black background. sub setup_highlight() { state $term_good = color( q/reverse green on_black/ ); state $term_fair = color( q/bold yellow on_black/ ); state $term_alert = color( q/reverse red on_white/ ); # List of regex is not put in C<DATA> as fully formed regex per lin +e is # expected; no allowance is made for regex interpolations. # # XXX: A better way would be a proper parser. state $redo = q/re(?:add|surrect|store|vert)/; state $undo = q/un(?:break|bork|delet)/; state $fix_change = qr/(?:fix|$redo|$undo|add miss)[a-z]*/i; state $normal_change = q/(?:add|new|update|change|remove)[a-z]*/; state $alert_change = q/(?:disable|ignore|broke|bork)[a-z]*/; state $port_category = qr/[a-z0-9-]+/aa; state $port_name = qr/[A-Za-z0-9-]+/aa; state $port_origin = qr{$port_category/$port_name}; # Only highlight matched change type. state @good = ( qr/\b(${fix_change}.+$)/i , qr/\b(options? $normal_change)/i ); state @fair = ( qr/\b($normal_change(?: (?:port|option|$port_origin +))?)/i , qr/\b(enable|lighten[a-z]*)/i ); # Highlight the whole line. state @alert = ( qr/\b(${alert_change}.+$)/i , qr/\b((?:add|update).+?depend[a-z]*.*$)/i ); state %map = ( $term_good => \@good , $term_fair => \@fair , $term_alert => \@alert ); state @order = ( $term_alert, $term_good, $term_fair ); return ( \@order, %map ); } =pod =head1 Future Improvement =over 2 =item * Currently list of regexen of interesting port names is hard coded in C +<DATA> section; list of regexen when to skip has been hard coded in the C<&ma +y_skip>. So collect them from external file(s) at some point. =item * Be able to skip long form of messages. Currently only the lines which +match in a long form of a commit message is skipped, possibly rendering such a me +ssage rather useless. =back =head1 Bugs Types of "inane" and "meaningful" changes (for some definitions of the +m) and related verbiage are constantly changing. A proper way to deal with mi +nimal changes to the program would be to use a proper parser. =head1 See Also C<git help log>, namely I<--since>; I<--grep>, I<--regexp-ignore-case> +, I<--all-match>, I<--perl-regexp>; & I<--no-merges> options. =head1 Author parv =head1 License Copyright 2021 parv Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are + met: 1. Redistributions of source code must retain the above copyright noti +ce, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright n +otice, this list of conditions and the following disclaimer in the documentat +ion and/ or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contr +ibutors may be used to endorse or promote products derived from this software +without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "A +S IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUEN +TIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOOD +S OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOW +EVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIA +BILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =cut __DATA__ # - "::blah::" marks the start of a regex section, one per line. It is + read # until another "::" section is seen or until end of the file. # # - Empty lines & lines starting with comment character, "#", are skip +ped. # Port names -- regex or fixed string -- of interest are matched on wo +rd # boundary. ::interest:: # Of most interest; zsh vim aspell w3m mutt slrn xorg fvmw3(?:-dev[a-z]*)? xterm(?:[-a-z0-9]+)? firefox xpdf gv xv perl5 p5-Try-Tiny p5-DateTime(?:-[a-zA-Z0-9-]+)? ack p5-Term-ReadLine-Gnu rsync curl wget git mercurial wmmixer xclock xrdb xset xkill virtualbox-ose-additions(?:-legacy)? mplayer tigervnc xclip xclipboard # medium interest; p5-Term-ANSIColor dma slrn xmodmap xkeycaps xsetroot xbatt xosview xload the_silver_searcher # low interest. portmaster pkg colordiff gnugrep wdiff pcre pcre2 tmux htop twm tvtwm libxml2 python(?:-?3)? lua(?:5[4-9])? erlang(?:-?24)? jq glow raku cron # Lines to skip. ::skip:: # Could not care less. cosmetic tidy up comment (add|set|update|remove)[ \t]+(?:NO_ARCH|LICENSE|URL|www|website|webpag +e)

In reply to filter-ports-commit-log.pl - Filter, highlight FreeBSD Ports one-line commit log messages by parv

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Are you posting in the right place? Check out Where do I post X? to know for sure.
  • Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
    <code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
  • Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
  • Want more info? How to link or How to display code and escape characters are good places to start.
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others sharing their wisdom with the Monastery: (3)
As of 2024-04-19 01:41 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found