Beefy Boxes and Bandwidth Generously Provided by pair Networks
Clear questions and runnable code
get the best and fastest answer
 
PerlMonks  

cutf - cut by field name

by diotalevi (Canon)
on Sep 14, 2006 at 22:23 UTC ( [id://573019]=sourcecode: print w/replies, xml ) Need Help??
Category: Utility Scripts
Author/Contact Info
Description: Print selected parts of lines from each FILE to standard output. Selects parts by field name unlike /usr/bin/cut which uses column numbers.
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use autouse 'Pod::Usage' => 'pod2usage';

GetOptions(
    'd|delimiter=s' => \( my ($delim)     = "\t" ),
    'f|fields=s'    => \( my ($spec_list) = q{} ),
    'v'             => \my ($reverse),
    help => sub { pod2usage( -verbose => 1 ) },
    man  => sub { pod2usage( -verbose => 2 ) },
    )
    or pod2usage( -verbose => 0 );
my $delim_rx = qr/\Q$delim/xm;

my $header = <>;
my @field_names = map {uc} split /$delim_rx/xm, $header;
chomp @field_names;

my @selected_fields;
SPEC: for my $spec (
    map { $_ =~ /\S/xm ? uc $_ : () }
    split /,/xm, $spec_list
    )
{
    ## no critic ProhibitCascadingIfElse
    if ( $spec =~ /\A(\d+)\z/xm and ( 1 <= $1 and $1 <= @field_names )
+ ) {
        push @selected_fields, $1 - 1;
        next SPEC;
    }
    elsif ( $spec =~ /\A(\d+)-(\d+)\z/xm
        and ( 1 <= $1 and $1 <= @field_names )
        and ( 1 <= $2 and $2 <= @field_names )
        and $1 <= $2 )
    {
        push @selected_fields, $1 - 1 .. $2 - 1;
        next SPEC;
    }
    elsif ( $spec =~ /\A-(\d+)\z/xm
        and ( 1 <= $1 and $1 <= @field_names ) )
    {
        push @selected_fields, 0 .. $1 - 1;
        next SPEC;
    }
    elsif ( $spec =~ /\A(\d+)-\z/xm
        and ( 1 <= $1 and $1 <= @field_names ) )
    {
        push @selected_fields, $1 - 1 .. $#selected_fields;
        next SPEC;
    }
    else {
        for my $ix ( 0 .. $#field_names ) {

            # Push *all* columns with the name $
            my $found_it;
            if ( uc $spec eq $field_names[$ix] ) {
                push @selected_fields, $ix;
                $found_it = 1;
            }
            next SPEC if $found_it;
        }
    }

    die qq<Invalid spec "$spec".\n>;
}

if ($reverse) {
    my %selected;
    for my $col_idx ( 0 .. $#field_names ) {
        $selected{$_} = 1;
    }
    for my $col_idx (@selected_fields) {
        $selected{$_} = 0;
    }
    @selected_fields
        = grep { $selected{$_} } sort { $a <=> $b } keys %selected;
}

print join( $delim, @field_names[@selected_fields] ) . "\n";
while ( my $line = <> ) {
    my @line = split /$delim_rx/xm, $line;
    chomp @line;

    ## no critic NoWarnings
    no warnings 'uninitialized';
    print join( $delim, @line[@selected_fields] ) . "\n";
}

__END__

=head1 NAME

cutf - remove sections from each line of files

=head1 SYNOPSIS

  cutf [OPTION]... [FILE]...

  cutf --man for more options

=head1 DESCRIPTION

Print selected parts of lines from each FILE to standard output.

=head1 OPTIONS

Mandatory arguments to long options are mandatory for short options
too.

=over

=item -b, --bytes=LIST

TODO: output only these bytes

=item --output-delimiter=STRING

TODO: use C<STRING> as the output delimiter the default is to use the 
+input delimiter

=item --help

display this help and exit

=item --version

TODO: output version information and exit

=back

Use one, and only one of -b, -c or -f. Each LIST is made up of one
range, or many ranges separated by commas. Each range is one of:

  N      Nth byte, character or field, counted from 1
  
  N-     from Nth byte, character or field, to end of line
  
  N-M    from Nth to Mth (included) byte, character or field
  
  -M     from first to Mth (included) byte, character or field

=over

=item -c, --characters=LIST

TODO: output only these characters

=item -d, --delimiter=DELIM

use DELIM instead of TAB for field delimiter

=item -f, --fields=LIST

output only these fields; also print any line that contains no
delimiter character, unless the -s option is specified

=item -n

TODO: with -b: don't split multibyte characters

=item -s, --only-delimited

TODO: do not print lines not containing delimiters

=back

With no FILE, or when FILE is -, read standard input.

=head1 DIAGNOSTICS

=over

=item Invalid spec "%s".

Your spec wasn't one of the recognized forms:

 COLUMN
 FROM-TO
 -TO
 FROM-

=back

=cut

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: sourcecode [id://573019]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others browsing the Monastery: (3)
As of 2024-03-29 06:56 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found