#!/usr/bin/perl use strict; use warnings; use POSIX qw( ceil ); use List::Util qw( shuffle ); my $basepos = 0; open my $fh, '<', $ARGV[ 0 ] or die "Couldn't open $ARGV[ 0 ] for reading: $!\n"; my $offs = pack 'L', 0; while( my $advance = read $fh, $_, 128 * 1024 ) { s/(?=\n)/$offs .= pack 'L', $basepos + pos(); ""/eg; $basepos += advance; } # we will be looking for "start of following line", so this is # a bogus entry to obviate need for special case at last line $offs .= pack 'L', $basepos; my $total_lines = length( $offs ) / 4; my $been_here = "\0" x ceil( $total_lines / 8 ); my $i = 0; while( $i < $total_lines ) { my $line = int rand $total_lines; next if vec $been_here, $line, 1; my ( $start, $end ) = unpack "x " . ( 4 * $line ) . " L L", $offs; seek $fh, $start, 0; read $fh, $_, $end - $start; print; ++$i; vec( $been_here, $line, 1 ) = 1; }