#!/usr/bin/perl =head1 NAME uhead -- unicode-aware version of unix "head" =head1 SYNOPSIS uhead -c N [file ...] show first N unicode chars from file(s) =head1 DESCRIPTION This does what the standard "head -c N" command (GNU version) would do (i.e. show the first N characters from one or more files), with just the following differences: =over 4 =item * The "-c N" option is required (not optional) =item * N refers to a number of UTF-8 encoded unicode characters rather than bytes =item * "Negative" values for N are not supported (you cannot elect to view all but the last N characters) =back If no files are provided on the command line, it will read from STDIN instead. (But if it notices that STDIN is actually the user's tty, not a pipe or redirection from a file, it will exit with a suitable error message.) =head1 AUTHOR David Graff <graff(at)ldc.upenn.edu> =cut use strict; my $Usage = "Usage: $0 -c N [file ...]\n"; die $Usage unless ( @ARGV > 1 and $ARGV[0] eq '-c' and $ARGV[1] =~ /^\d+$/ ); shift; my $show_chrs = shift; if ( -t ) { @ARGV or die "You need to provide some data (pipe or file(s))\n$Us +age"; } else { @ARGV = ( '__STDIN__' ); } binmode STDOUT, ":utf8"; my $nfiles = @ARGV; while ( @ARGV ) { my $file = shift; my $head; if ( $file eq '__STDIN__' ) { binmode STDIN, ":utf8"; read STDIN, $head, $show_chrs; } else { if ( open( I, "<:utf8", $file )) { read I, $head, $show_chrs; } else { warn "open failed on $file\n"; next; } } print "\n==> $file <==\n" if ( $nfiles > 1 ); print $head,"\n"; }
In reply to uhead: "head -c" for utf8 data by graff
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |