#!/usr/bin/perl use strict; use warnings; use POSIX qw( ceil ); use List::Util qw( shuffle ); my $basepos = 0; open my $fh, '<', $ARGV[ 0 ] or die "Couldn't open $ARGV[ 0 ] for reading: $!\n"; my $offs = pack 'L', 0; while( my $advance = read $fh, $_, 128 * 1024 ) { s/(?=\n)/$offs .= pack 'L', $basepos + pos(); ""/eg; $basepos += advance; } # we will be looking for "start of following line", so this is # a bogus entry to obviate need for special case at last line $offs .= pack 'L', $basepos; my $total_lines = length( $offs ) / 4; my $been_here = "\0" x ceil( $total_lines / 8 ); my $i = 0; while( $i < $total_lines ) { my $line = int rand $total_lines; next if vec $been_here, $line, 1; my ( $start, $end ) = unpack "x " . ( 4 * $line ) . " L L", $offs; seek $fh, $start, 0; read $fh, $_, $end - $start; print; ++$i; vec( $been_here, $line, 1 ) = 1; } #### #!/usr/bin/perl use strict; use warnings; use POSIX qw( ceil ); use List::Util qw( shuffle ); my $basepos = 0; open my $fh, '<', $ARGV[ 0 ] or die "Couldn't open $ARGV[ 0 ] for reading: $!\n"; my $offs; vec( $offs, 0, 32 ) = 0; my $total_lines = 1; while( my $advance = read $fh, $_, 128 * 1024 ) { s/(?=\n)/vec( $offs, $total_lines++, 32 ) = $basepos + pos(); ""/eg; $basepos += advance; } # we will be looking for "start of following line", so this is # a bogus entry to obviate need for special case at last line vec( $offs, $total_lines, 32 ) = $basepos; my $been_here = "\0" x ceil( $total_lines / 8 ); my $i = 0; while( $i < $total_lines ) { my $line = int rand $total_lines; next if vec $been_here, $line, 1; my $start = vec $offs, $line, 32; my $end = vec $offs, $line + 1, 32; seek $fh, $start, 0; read $fh, $_, $end - $start; print; ++$i; vec( $been_here, $line, 1 ) = 1; }