#!/usr/bin/perl -w

#----------------------------------------------------------------------------
# Copyright (C) 2001-2003 Ed Halley
#----------------------------------------------------------------------------

=head1 NAME

unhead - remove the lines before a match from the input stream

untail - remove the lines following a match from the input stream

=cut

# Both 'unhead' and 'untail' are identical; they determine the task from
# the $0 perl variable.  The scripts can be linked or symlinked or simply
# copied, and will still work as long as they have the correct filenames.

=head1 SYNOPSIS

    unhead  '--BEGIN MESSAGE--'  *.txt

    untail  '--END MESSAGE--'  *.txt

=head1 DESCRIPTION

The historically common C<head>(1) and C<tail>(1) commands are for
keeping the head or tail of stream input, usually by a count of lines.
This pair of scripts differ in two respects: these scripts work on a
single regex (regular expression) to find a matching "cut here" point in
the text, and these scripts edit text files in place.

By default, the first argument should be a regular expression that should
match at least once in each subsequent file.  If no additional arguments
are given, or if the filename is a hyphen (-), then the standard input
and standard output streams are assumed.  For filenames, each file is
processed in turn, creating a backup file with a tilde (~) appended to
the original name.

The C<unhead> variant will remove the header above the given match, and
write out the tail (including the first matching line).  The C<untail>
variant will remove the trailing below the given match, and only write
out the head (not including the first matching line).

=cut

#----------------------------------------------------------------------------

use warnings;
use strict;

my $want = ($0 =~ /untail/)? 'head' : 'tail';
my $suffix = '~';

my $pattern = shift(@ARGV);
die "First argument should be a regular expression string" if not $pattern;
$pattern = qr/$pattern/;
@ARGV = ('-') if not @ARGV;

exit(main(@ARGV));

#----------------------------------------------------------------------------

sub main
{
    while (@_)
    {
	my $inp = *STDIN;
	my $outp = *STDOUT;
	my $file = shift;
	if ($file ne '-')
	{
	    die if not -w $file;
	    $inp = *FIN; $inp = *FIN; # hush hush warning warning
	    $outp = *FOUT; $outp = *FOUT;
	    unlink($file.$suffix) if -f $file.$suffix;
	    rename($file, $file.$suffix);
	    open($inp, $file.$suffix)
		or die "Cannot open $file";
	    open($outp, '>'.$file)
		or die "Cannot open $file";
	}

	my $have = 'head';
	while (<$inp>)
	{
	    $have = 'tail' if m{$pattern};
	    print $outp $_ if $want eq $have;
	}

	if ($file ne '-')
	{
	    close($outp);
	    close($inp);
	}
    }
}

__END__

#----------------------------------------------------------------------------

=head1 IDEAS

    # chop off email routing header info (first empty line)
    cat email.msg | unhead '^$'  

    # just students from "Danziger, Jane" to "Funicello, Thomas"
    query-students | sort | unhead '^D' | untail '^G'

    # keep the center section of a YACC grammar
    unhead '^%%$' *.y ; untail '^%%$' *.y

=head1 BUGS

The C<unhead> variety will discard everything and leave an empty file if
no lines match the given pattern.  This is not a bug, but worth a
warning.  In the case of actual files, you still have the tilde backup.

The script for both C<unhead> and C<untail> are identical, and one can be
a link to the other.  The script determines which part to keep based on
the script's name.

=head1 LICENSE

Copyright (C) 2001-2003  Ed Halley  <ed@halley.cc>

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.  For details on the Perl Artistic
License, read the F<http://www.perl.com/language/misc/Artistic.html>
page.

=cut