#!/usr/local/bin/perl
use v5.30;
use strict; use warnings;
use feature qw/ signatures /;
no warnings qw/ experimental::signatures /;
my $VERSION = q/20210710.01/;
use Getopt::Long;
use Pod::Usage;
use Term::ANSIColor qw/ color /;
my $help;
GetOptions( 'h|help' => \$help )
or pod2usage( '-verbose' => 1 , '-exitval' => 1,
'-message' => q/Only -h or --help option is available
+./
);
$help and pod2usage( '-verbose' => 2 );
# Show usage() if neither a pipe is connected, nor CLI arguments given
+.
( scalar @ARGV || -p \*STDIN )
or pod2usage( '-verbose' => 99, '-exitval' => 1,
'-sections' => [ qw/Name Synopsis/ ]
);
=pod
=head1 Name
filter-ports-commit-log.pl - Remove inane one-line commit log messages
+ in
FreeBSD Ports repository.
=head1 Synopsis
% ( cd /usr/ports && git log --no-merges --pretty=oneline -n 100 ) \
| filter-ports-commit-log.pl | less
... or ...
% collect='git-log' ; \
( cd /usr/ports && git log --no-merges --pretty=oneline -n 100 ) \
>| $collect \
&& filter-ports-commit-log.pl $collect >| $collect.filtered
=head1 Description
Purpose is to remove inane one-line commit log messages in FreeBSD Por
+ts
repository, and highlights some others. It is not currently suitable t
+o filter
long form of commit messages.
To see usage, run without any arguments or standard input provided via
+ pipe.
Complete list of regexen to highlight port names of interest, and part
+ial list
to skip the commit lines are extracted from C<DATA>. Currently there i
+s no way
to provide those lists via files.
=head2 Options
=over
=item B<--help>
Show complete help message.
=back
=cut
# To skip low SNR commit lines.
my $skip_inane = 1;
# To highlight possibly significant changes.
my $highlight_changes = 1;
( @ARGV
? sub{ filter_log_file( @ARGV ) }
: sub{ filter( \*STDIN ) }
)->();
exit;
# Prints some lines with highlights; others with none; skips some alto
+gether.
#
# Arguments ...
# @file: file list.
sub filter_log_file( @file )
{
for my $path ( @file )
{
my $fd;
if ( ! open $fd, '<', $path )
{
warn qq/Cannot open '$path': $!\n/;
next;
}
filter( $fd );
close( $fd ) or die qq/Could not close '$path': $!\n/;
}
}
# Prints some lines with highlights; others with none; skips some alto
+gether.
#
# Global Variables ...
# $skip_inane : flag to skip lines.
# $highlight_changes: flag to highlight changes.
#
# Arguments ...
# $fd: file descriptor.
sub filter( $fd )
{
state $skipper = $skip_inane ? \&may_skip : sub { 0 };
state $highlighter = ! $highlight_changes
? sub { $_[0] }
: sub { highlight_change( highlight_tracked( $
+_[0] ) ) }
;
while ( defined ( my $line = <$fd> ) )
{
$skipper->( $line ) or print $highlighter->( $line );
}
return
}
# Returns a truth value if to skip printing the line.
#
# Part of regex list to skip a line is extracted from C<DATA>; start o
+f the list
# is marked by C<qr/^::skip::/>.
#
# Arguments ...
# $line: string to match.
sub may_skip( $line )
{
state $version = qr/[a-z0-9,._-]+/iaa;
state $list_start = qr/^ ::skip:: /xi;
state @skip = ( # Version updates.
qr/ up(?:dat|grad)e.*to [ \t]+ $version /ix
, qr/$version -> $version/
# Applies to all the ports.
, quotemeta( q[*/*] )
, extract_list( $list_start, 0, 0 )
);
for my $re ( @skip )
{
$line =~ $re and return 1;
}
return 0;
}
# Returns a string possibly with highlight (reversed bold white on bla
+ck
# background).
#
# List of regex list to identify port names of interest, to me is extr
+acted from
# C<DATA>; start of the list is marked with C<qr/^::interest::/>.
#
# Arguments ...
# $line: string to search to find things of interest.
sub highlight_tracked( $line )
{
$line or return $line;
state $term_track = color( q/reverse bold white on_black/ );
state $list_start = qr/^ ::interest:: /ix;
state @track = extract_list( $list_start, 1, 1 );
return apply( $line, $term_track, \@track );
}
# Returns a highlighted string.
#
# Arguments ...
# $line : string to highlight, in part or in whole.
# $highlight: terminal escape sequence, 'highlight'. Terminal reset
+ sequence
# is added after the matched string.
# $change : array reference of regexen with capture(s); only the
+first is
# used.
sub apply( $line, $highlight, $change )
{
( $change && $highlight ) or return $line;
state $term_reset = color( q/reset/ );
for my $re ( @{ $change } )
{
$line =~ s/$re/${highlight}$1${term_reset}/;
}
return $line;
}
# Returns a regex list extracted from C<DATA>, between C<qr/$sec_start
+/> and
# (/^::/ or EoF).
#
# One regex per line is expected.
#
# Arguments ...
# $sec_start : regex to identify start of the list (on its own li
+ne).
# $set_boundary: flag to wrap a regex with boundary assertion, C<\b
+>.
# $capture : flag to create a regex to capture the matched stri
+ng.
sub extract_list( $sec_start, $set_boundary = undef, $capture = 1 )
{
# In order to extract multiple lists by coming back to this positio
+n.
state $DATA_start = tell DATA;
my @found;
seek DATA, $DATA_start, 0;
while( defined ( my $line = <DATA> ) )
{
$line =~ m{$sec_start} ... ( $line =~ /^::[a-z0-9]+::/i or eof(
+DATA ) )
or next;
$line =~ /^\s*(?::|#|$)/ and next;
push @found, $line =~ s/\s+$//r;
}
my $flag = $set_boundary ? '\b' : '' ;
# XXX: Rather UGLY!
my $maker = $capture
? sub { qr/($flag$_[0]$flag)/i }
: sub { qr/$flag$_[0]$flag/i}
;
my %seen;
return
map { $maker->( $_ ) }
grep { !$seen{ $_ }++ }
@found
;
}
# Returns a string possibly with highlights.
#
# Terminal highlight is applied only once. See C<&setup_highlight> for
+ the
# highlight details.
#
# Arguments ...
# $line: string to search to find things of interest.
sub highlight_change( $line )
{
$line or return;
# As of perl 5.32.1, cannot assign C<state> variables in a list.
state $order;
state %map;
( $order, %map ) = setup_highlight();
my $orig = $line;
for my $hilite ( @{ $order } )
{
$line = apply( $line, $hilite, $map{ $hilite } );
# Only apply a single highlight.
$orig eq $line or last;
}
return $line;
}
# Returns a 2-element list ...
# - array reference of terminal escape sequence ('highlight') strin
+gs as the
# order of highlights;
# - a hash of above highlight string as key and regex array referen
+ce to
# match thing of interest as value.
#
# Highlight order & proposed usage ...
# - alert: revered red text on white background;
# - good : revered green text on black background;
# - fair : bold yellow text on black background.
sub setup_highlight()
{
state $term_good = color( q/reverse green on_black/ );
state $term_fair = color( q/bold yellow on_black/ );
state $term_alert = color( q/reverse red on_white/ );
# List of regex is not put in C<DATA> as fully formed regex per lin
+e is
# expected; no allowance is made for regex interpolations.
#
# XXX: A better way would be a proper parser.
state $redo = q/re(?:add|surrect|store|vert)/;
state $undo = q/un(?:break|bork|delet)/;
state $fix_change = qr/(?:fix|$redo|$undo|add miss)[a-z]*/i;
state $normal_change = q/(?:add|new|update|change|remove)[a-z]*/;
state $alert_change = q/(?:disable|ignore|broke|bork)[a-z]*/;
state $port_category = qr/[a-z0-9-]+/aa;
state $port_name = qr/[A-Za-z0-9-]+/aa;
state $port_origin = qr{$port_category/$port_name};
# Only highlight matched change type.
state @good = ( qr/\b(${fix_change}.+$)/i
, qr/\b(options? $normal_change)/i
);
state @fair = ( qr/\b($normal_change(?: (?:port|option|$port_origin
+))?)/i
, qr/\b(enable|lighten[a-z]*)/i
);
# Highlight the whole line.
state @alert = ( qr/\b(${alert_change}.+$)/i
, qr/\b((?:add|update).+?depend[a-z]*.*$)/i
);
state %map = ( $term_good => \@good
, $term_fair => \@fair
, $term_alert => \@alert
);
state @order = ( $term_alert, $term_good, $term_fair );
return ( \@order, %map );
}
=pod
=head1 Future Improvement
=over 2
=item *
Currently list of regexen of interesting port names is hard coded in C
+<DATA>
section; list of regexen when to skip has been hard coded in the C<&ma
+y_skip>.
So collect them from external file(s) at some point.
=item *
Be able to skip long form of messages. Currently only the lines which
+match in a
long form of a commit message is skipped, possibly rendering such a me
+ssage
rather useless.
=back
=head1 Bugs
Types of "inane" and "meaningful" changes (for some definitions of the
+m) and
related verbiage are constantly changing. A proper way to deal with mi
+nimal
changes to the program would be to use a proper parser.
=head1 See Also
C<git help log>, namely I<--since>; I<--grep>, I<--regexp-ignore-case>
+,
I<--all-match>, I<--perl-regexp>; & I<--no-merges> options.
=head1 Author
parv
=head1 License
Copyright 2021 parv
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
+ met:
1. Redistributions of source code must retain the above copyright noti
+ce, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright n
+otice,
this list of conditions and the following disclaimer in the documentat
+ion and/
or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contr
+ibutors
may be used to endorse or promote products derived from this software
+without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "A
+S IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUEN
+TIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOOD
+S OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOW
+EVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIA
+BILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=cut
__DATA__
# - "::blah::" marks the start of a regex section, one per line. It is
+ read
# until another "::" section is seen or until end of the file.
#
# - Empty lines & lines starting with comment character, "#", are skip
+ped.
# Port names -- regex or fixed string -- of interest are matched on wo
+rd
# boundary.
::interest::
# Of most interest;
zsh
vim
aspell
w3m
mutt
slrn
xorg
fvmw3(?:-dev[a-z]*)?
xterm(?:[-a-z0-9]+)?
firefox
xpdf
gv
xv
perl5
p5-Try-Tiny
p5-DateTime(?:-[a-zA-Z0-9-]+)?
ack
p5-Term-ReadLine-Gnu
rsync
curl
wget
git
mercurial
wmmixer
xclock
xrdb
xset
xkill
virtualbox-ose-additions(?:-legacy)?
mplayer
tigervnc
xclip
xclipboard
# medium interest;
p5-Term-ANSIColor
dma
slrn
xmodmap
xkeycaps
xsetroot
xbatt
xosview
xload
the_silver_searcher
# low interest.
portmaster
pkg
colordiff
gnugrep
wdiff
pcre
pcre2
tmux
htop
twm
tvtwm
libxml2
python(?:-?3)?
lua(?:5[4-9])?
erlang(?:-?24)?
jq
glow
raku
cron
# Lines to skip.
::skip::
# Could not care less.
cosmetic
tidy up comment
(add|set|update|remove)[ \t]+(?:NO_ARCH|LICENSE|URL|www|website|webpag
+e)