I put together a benchmark for most of the suggested solutions (or adaptations of them to get consistent results) and ran tests against an inline dataset of 50 lines with Test::More then with a 50,000 line file produced by this one-liner.
Here's the script.
use strict;
use warnings;
use Benchmark qw{ cmpthese };
use Test::More qw{ no_plan };
open my $inFH, q{<}, \ <<__EOD__ or die $!;
TATCGC.TGCCC.ATTAAGCCATACTTCAAGGATCCCCCCG.GA.GGGCA
GGAGTCGTC.ACACATCTTCACTAC.CATATCTTGCTACGGCCACTGACA
CGA.A.CAATTTTCGAATGGAGGGCGAATGCCGTTGTGCGCTGCGTGACG
TCT.AGCCT.CGAA.A.GCAGGCGTGGGGCGTACCACGGCTTGGCCTAT.
ATGCACTT.AA.CCCTCGTAT.CTCTACTCT.ACAACCTTGGGCAG.T.T
TAGGGC.CCGCG.TAAACTGCAGATAGTACTCCAAGAATCGCTCCGACCC
.CGTCTAAACAATTAGCGGAGGTCGTC.CTGCAA.CAATGATCTTAACAC
AG.AAGAAGT.CAAAGT.GTAGTGGCTGGGTACTTGATCA.TCAATTTCA
ACC.TTCCAG.AAATGAGGTCTC.AAAAG.AGTT.CCTG.GGCTGTGTAG
ACAT.A.TGTTAACACCACCTATAATAGAAGC.TTATATTCACC.TTAAC
GAATTGT.TTCAATGCCACATA.GCTGGGCCAC.GCCTTAGGCTATTT.A
AGCTTTACAGATGT.ACCAAA.CCAG.GACTAGATGGGGGGG.ATATGCC
AGAAAGCCC.TCCCTGCGAAT.TGGGGGAGTACT.ATAGACTA.GGCCAG
.CACTCCGG.CTAACACTACTTCCTGTAACAAA.CTGAAGAGA.CGTTTG
CAGTCCTGAGCGTGCTAC..GTTCT.CTGTCG.TTAGATGGGCGC..GTA
AGCAAGAAAACACGTACAGAAAAAGCCGACGGC.GGGGTTC.GTCAACC.
T.ACTTATCGACATTAGCATCTGG.TGCCGCCTTT.GAACCCATTACTCG
TAAACGA.CACCGCTTAGGGCTGACCTCCGAA.TTATCAGAGTACCGGGC
ACGGATATCCTA.ATAGA.TACCTATGTAGGAAG.TCGAACTC.TAAACT
ACCCCCGTCC.TATCCTC.CGA.ATTGCCCCCGGCCGTACTCCA.AAACC
T..TC.AGCCGGG.TTG.TGTGGATCAGAAGTGATTGAGACTGG.GCCTA
TCAACCTTGAAGTTGAAACTCCGAGAGT.CGCGGTCAACC.AGCCTGCGC
GATCAGTGAGGAGCTGT.C.AAAGC.TC.AG.GCC.G.GA.GTGAGATT.
G.C.AGTTTGCCACTT.CATCCAGAGTTCATCCAGCAGG.ATTGAAGTTA
GCGCTGCA.CTGT.CATTTTTTATTCCCACCCGGTCTCCCCAACCCCGAT
TGTATAGCCAGG.CGGAG.TTTCTT.TAGATCTAGTAAGACATT.CCCGA
AC.CCCCT.G.TAGCTAAATCGACGGG..GTCAATATACACGGAT.CTTT
AAGC.GGGCTGATGCTTATCTCCTAGCCCGC.CCTCGATGAT.ATTATTG
TCACCC.ACCGC..T.CGGAACGAAAACT..AT.CCTATTAGAACATCCT
.GCGTGAACC.TG.G.TACACCG.GATGGTCCCGAC.GGCTACCGAT.CA
.CGCTGCCCCCATCTCCAGCTATC.AAA.AGGGACGCGATATGCGGAAGC
CGTTCAGACAATCCTTTTGGGGTGTAAATGTCTCC.ACCTC.GAGA.CTG
AAGACATAGGAG.CCAGAAT.A.CGTCATACAGAGGCACTC.TAT..TCT
AGGCCTGCCTACTT.TTGGCTAA.C.AGACTTGG.AAG.ATGTAGAAC.G
GTCAACCCGTGCTAACTGGGGTGAGGAATCTTCCGAGCC.TGCTCGTCGC
TCG.TGGCAGCTC.AACTGGTGCGCGGCAG.CCTCCTGCCAAGTATTCAG
CGGGGGCTAC..GTT.TCATCGAACACGGCACACTAACAAACTCCTGTGT
TTCCGGGGTCGACCCTTTGGCCCAAGAGTGA.AGGGCTTCG.ACTGCG..
AA.CGTGCGGGTAGCCTAGACACGTAGC.TTGTGCGC.CG.CGCA.CAAG
AT.TATGTCA.ACTTCCGCCGGG.CTTCTGTGTACATT.AA.GAGAATAA
CGGCAAGGATTGCTCGACGTAGGAGTCCGTGGAGCTCGTGC.GACC.ACC
.GTTAA.CACGCCTTAACTTTTCGGAACAGAGTAAC.AATCCCGG.TA.C
CAGGTAATGTGTCACCAGGTTCGGGCCCT.CACCGTCCCAGCTAC.TGTT
TAGCCCCTCTTTT.GAT.GGCCC.AGCGACATCAA.TGATC..CTGTAGG
CGATCATATTTCATTGTTCCGC.TG.AGCGGT.A.TG.GCAAT.CAGCCG
ATCGATGTTCTGATATG..GTGTGAATAC.AGAAACCGGCTTTGTCGGGG
CCTTT.AGGAA.AC.TAGGT.TT.CTCAATGAAC.GACATCAAC.T.AGC
AAGGAGGTACACAGCGTTCAGCGGATCC.CT.AAGC.TAGCATCTGCTGA
GCAGATA.A.TCTGCTTCAACTGTGAAAGGGTTG.CTAATCAG.GCGTAG
GGTACGATC.GCAAC.AGTCCACAAGTACACGGTGGAATT.CC..C.TTG
__EOD__
my $fileStart = tell $inFH;
my $offset = 9; # Column 10 if numbering from 1
my $testOK = q{C.T.AGCTGTTG..GAGACCCAGGCAGTCCCAGTTGCCGATCTTTCAC..};
my %methods = (
rsubstr => sub { # BrowserUk's method
seek $inFH, 0, 0;
my $buffer;
my $rsWanted = \ substr $buffer, $offset, 1;
my $retStr;
while ( $buffer = <$inFH> )
{
$retStr .= ${ $rsWanted };
}
return \ $retStr;
},
brutish => sub { # Adapted from Anonymonk's brute force method
seek $inFH, 0, 0;
my $retStr;
while ( <$inFH> )
{
my @split = split m{};
$retStr .= $split[ $offset ];
}
return \ $retStr;
},
seek => sub { # Suggested by vr
seek $inFH, 0, 0;
my $retStr;
my $char;
my $len = length( <$inFH> ) - 1;
seek $inFH, $offset, 0;
while ( read $inFH, $char, 1 )
{
$retStr .= $char;
seek $inFH, $len, 1
}
return \ $retStr;
},
substr => sub { # Suggested by pryrt and Laurent_R
seek $inFH, 0, 0;
my $retStr;
while ( <$inFH> )
{
$retStr .= substr $_, $offset, 1;
}
return \ $retStr;
},
regex => sub { # Another of pryrt's solutions
seek $inFH, 0, 0;
my $retStr;
while ( <$inFH> )
{
$retStr .= $1 if m{^.{$offset}(.)};
}
return \ $retStr;
},
unpack => sub { # Suggested but not implemented by LanX & pryrt
seek $inFH, 0, 0;
my $retStr;
my $fmt = qq{x${offset}a};
while ( <$inFH> )
{
$retStr .= unpack $fmt, $_;
}
return \ $retStr;
},
unpackM => sub { # Multi-line unpack suggested by LanX
seek $inFH, 0, 0;
my $buffer = <$inFH>;
my $lineLen = length $buffer;
my $nLines = 500;
my $chunkSize = $lineLen * $nLines;
seek $inFH, 0, 0;
my $retStr;
my $fmt = qq{(x${offset}ax@{ [ $lineLen - $offset - 1 ] })*};
while ( my $bytesRead = read $inFH, $buffer, $chunkSize )
{
$retStr .= join q{}, unpack $fmt, $buffer;
}
return \ $retStr;
},
split => sub { # Simple split from thanos1983
seek $inFH, 0, 0;
my $retStr;
my $fmt = qq{x${offset}a};
while ( <$inFH> )
{
$retStr .= ( split m{} )[ $offset ];
}
return \ $retStr;
},
pushAoA => sub { # In memory solution from Discipulus
seek $inFH, 0, 0;
my @aoa;
while ( <$inFH> )
{
chomp;
push @aoa, [ split m{} ];
}
my $retStr = join q{}, map { $aoa[ $_ ]->[ $offset ] } 0 .. $#a
+oa;
return \ $retStr;
},
ANDmask => sub { # Multi-line AND mask by johngg
seek $inFH, 0, 0;
my $buffer = <$inFH>;
my $lineLen = length $buffer;
my $nLines = 500;
my $chunkSize = $lineLen * $nLines;
seek $inFH, 0, 0;
my $retStr;
my $mask
= qq{\x00} x ${offset}
. qq{\xff}
. qq{\x00} x ( $lineLen - $offset - 1 );
$mask x= $nLines;
while ( my $bytesRead = read $inFH, $buffer, $chunkSize )
{
( my $anded = $buffer & $mask ) =~ tr{\x00}{}d;
$retStr .= $anded;
}
return \ $retStr;
},
);
foreach my $method ( sort keys %methods )
{
ok( ${ $methods{ $method }->() } eq $testOK, $method );
}
close $inFH or die $!;
my $filename = q{spw1202693.txt};
open $inFH, q{<}, $filename
or die qq{open: < $filename: $!\n};
cmpthese(
-3,
{
map
{
my $codeStr
= q[sub { my $col = $methods{ ]
. $_
. q[ }->(); }];
$_ => eval $codeStr;
}
keys %methods
}
);
close $inFH
or die qq{close: < $filename: $!\n};
And the results.