#!/usr/bin/perl -w # # csved - apply a Perl expression to all records of a CSV file # # Copyright (c) 2003-2022 Gerben Vos. All rights reserved. # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl 5.8.0. # # Run without arguments for usage information. # # Requires the Text::CSV_XS module by Jochen Wiedmann and Alan Citterman, # available from http://www.cpan.org/ . use v5.10; use strict 'vars', 'refs'; # This is a weird usage of tying because the hash is tied only once, # but the underlying data changes for every processed line. package Tie::FieldNames; sub TIEHASH { my $self = shift; my $i = 0; my $tied = { HEADERS => { map { ($_, $i++) } @_ }, }; die "Duplicate field names: " . join(", ", map { "'$_'" } _dups(@_)) unless scalar keys %{$tied->{HEADERS}} == scalar @_; return bless $tied, $self; } sub _dups { my %count; ++$count{$_} for @_; return grep { $count{$_} > 1 } keys %count; } sub setcurrent { $_[0]->{CURRENT} = $_[1]; } sub FETCH { my ($self, $key) = @_; die "$key: No such field name" unless exists $self->{HEADERS}->{$key}; return $self->{CURRENT}->[$self->{HEADERS}->{$key}]; } sub STORE { my ($self, $key, $value) = @_; die "$key: No such field name" unless exists $self->{HEADERS}->{$key}; $self->{CURRENT}->[$self->{HEADERS}->{$key}] = $value; } sub DELETE { my ($self, $key) = @_; die "$key: No such field name" unless exists $self->{HEADERS}->{$key}; delete $self->{CURRENT}->[$self->{HEADERS}->{$key}]; } sub EXISTS { my ($self, $key) = @_; die "$key: No such field name" unless exists $self->{HEADERS}->{$key}; return exists $self->{CURRENT}->[$self->{HEADERS}->{$key}]; } sub FIRSTKEY { my ($self) = @_; my $dummy = keys %{$self->{HEADERS}}; each %{$self->{HEADERS}}; } sub NEXTKEY { return each %{$_[0]->{HEADERS}}; } sub SCALAR { return scalar grep { defined } @{$_[0]->{CURRENT}}; } package main; use Text::CSV_XS; use IO::Handle; my $_silent = 0; my $_use_headers = 0; my $_begin = ""; my $_end = ""; my $_sep = ','; my $_quote = '"'; my $_escape = '"'; my $_progname; ($_progname = $0) =~ s#.*/##; my $_usage = < 1) { ... }" to not apply some code to the first line. Variable names starting with _ are reserved, don't use them in expr. Uses the Text::CSV_XS module by Jochen Wiedmann and Alan Citterman, with thanks. USAGE while (@ARGV > 0 and $ARGV[0] =~ m{^-}) { my $opt = shift; $_silent = 1, next if $opt eq '-n'; $_use_headers = 1, next if $opt eq '-h'; $_begin = shift, next if $opt eq '-b'; $_end = shift, next if $opt eq '-e'; $_sep = shift, next if $opt eq '-F'; $_quote = shift, next if $opt eq '-Q'; $_escape = shift, next if $opt eq '-E'; die $_usage; } die $_usage unless @ARGV > 0; my $_expr = shift; my $_csv = Text::CSV_XS->new({ sep_char => $_sep, quote_char => $_quote, escape_char => $_escape, binary => 1, eol => "\n" }); unshift(@ARGV, '-') unless @ARGV; { no strict; eval $_begin; die $@ if $@; } my %F; my $_tied; while ($ARGV = shift) { open(_IN, $ARGV); my $_fields; while ($_fields = $_csv->getline(\*main::_IN) and @$_fields) { my(@F) = @$_fields; if ($_use_headers) { if ($. == 1) { $_tied = tie %F, 'Tie::FieldNames', @F; } $_tied->setcurrent(\@F); } # This means you can use next, last, and redo in expr # without excessive noise. Also turn off strictness. no warnings "exiting"; no strict; eval $_expr; die $@ if $@; $_csv->print(STDOUT, [ grep { defined } @F ]) unless $_silent; } } if ($_use_headers) { undef $_tied; untie %F; } { no strict; eval $_end; die $@ if $@; }