#!/opt/perl5/bin/perl -w ## ## Split a file into 'num' equal-size chunks, or into chunks with ## at most 'num' lines each. ## ## Usage: ## chunkify <-n num | -l num> [-v] [-o <output name>] ## [-f <footer>] [-h <header>] <input file> ## # use strict; use Getopt::Std; my ($verbose, $outfront, $outback, $froot, $chunks, $lines); my ($infile, $headfile, @headlines, $footfile, @footlines); my ($rem,@line,$fname,$x,$y,$opts); my (%Options); ## ## Right now, assume the output file is based on the input file. ## This can be overridden by the -f option. ## $infile = $ARGV[$#ARGV]; $froot = $infile; $opts = getopts ('vo:h:f:n:l:',\%Options); $lines=0; $chunks=0; if ($opts) { if ($Options{v}) { $verbose=$Options{v}; } if ($Options{o}) { $froot = $Options{o}; } if ($Options{l} && $Options{n}) { print "\nERROR: Cannot use -l and -n simultaneously.\n"; exit(1); } if (! $Options{l} && ! $Options{n}) { print "\nERROR: Either -n or -l must be specified.\n"; exit (1); } if ($Options{n}) { $chunks=$Options{n}; } if ($Options{l}) { $lines=$Options{l}; } if ($Options{f}) { $footfile=$Options{f}; } if ($Options{h}) { $headfile=$Options{h}; } } else { if (exists $Options{n} && !defined $Options{n}) { print "\nERROR: -n requires a numeric argument if used.\n"; } if (exists $Options{l} && !defined $Options{l}) { print "\nERROR: -l requires a numeric argument if used.\n"; } if (exists $Options{o} && !defined $Options{o}) { print "\nERROR: -o requires a filename if used.\n"; } if (exists $Options{h} && !defined $Options{h}) { print "\nERROR: -h requires a filename if used.\n"; } if (exists $Options{f} && !defined $Options{f}) { print "\nERROR: -f requires a filename if used.\n"; } ShowUsage(); } $x=0; $y=1; ($outfront,$outback) = split ("%",$froot); ## ## If header and/or footer files were specified, get their contents ## if (defined $headfile) { open (INFILE, $headfile) || die "Could not open header file $headf +ile: $!"; @headlines = <INFILE>; close (INFILE) } if (defined $footfile) { open (INFILE, $footfile) || die "Could not open footer file $footf +ile: $!"; @footlines = <INFILE>; close (INFILE) } open (INFILE, $infile) || die "Could not open file $infile: $!"; ## ## Either we were given the "lines" option, or we were given the "chu +nks" option. ## If we got chunks, we need to figoure out how many lines that's goi +ng to be, and ## the only way is to read through the file, counting lines. ## if ($chunks != 0) { while (<INFILE>) { $x++; } $lines = sprintf "%d", ($x / $chunks); ## Did it divide out evenly? (eg: 141 lines into 13 "equal" files ain +'t evenly) ## If not, some files will need an extra line. foreach $y (1..$chunks) { $line[$y]=$lines } $rem = $x - ($chunks * $lines); if ($rem != 0) { foreach $y (1..$rem) { $line[$y]++; } } print "Input file has $x lines; will put at least $lines lines in +each output file.\n" if $verbose; close INFILE || die "ERROR: Could not close $infile: $!"; open (INFILE, $infile) || die "Could not open file $infile: $! +";; } $y=0; until (eof INFILE) { $y++; if (defined $outback) { $fname = $outfront . (sprintf "%0.3d",$y) . $outback; } else { $fname = $outfront . (sprintf "%0.3d",$y); } $lines= $line[$y] if (defined $line[$y]); print "Creating $fname with $lines lines\n" if $verbose; write_chunk(); } close (INFILE); sub write_chunk { open (OUTFILE,">$fname") || die "ERROR Could not open output file +$fname: $!"; $x=0; if (@headlines) { print OUTFILE @headlines || die "ERROR writing header to $fnam +e: $!"; } while(<INFILE>) { print OUTFILE $_ || die "ERROR writing to output file $fname: +$!"; $x++; last unless $x % $lines; } if (@footlines) { print OUTFILE @footlines || die "ERROR writing footer to $fnam +e: $!"; } close OUTFILE || die "ERROR closing $fname: $!"; } sub ShowUsage { print <<EOD; Usage: chunkify.pl <-n num | -l num> [-o <output filename>] [-v] [-h <header text file>] [-f <footer text file>] <input file na +me> Required paremeters (specify only one): -n : split input file name into 'num' (more-or-less) equal-size fil +es. -l : split input file into files with 'num' lines. In most cases, not all output files will have the same number of li +nes. If -l is used, each file will have that many lines, except for the last file, wh +ich may have fewer. If -n is used, each file will have X or X+1 lines, where X is cal +culated based on number of output files required and size of the input file. Optional parameters: -v : verbose output -o : output file name. Use % sign to indicate position of sequence +number. If % not specified, sequence number will be appended. Examples: file%.txt: will generate file001.txt, file002.txt, file003.txt +... file.txt: will generate file.txt001, file.txt002, file.txt003 . +.. %file: will generate 001file, 002file, 003file ... If -o is not specified, output filename will be built from input f +ile name, with sequence number appended. -h : file containing text to be prepended to each created output fi +le. -f : file containing text to be appended to each created output fil +e. EOD }

In reply to File Chunkifier by husker

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.