in reply to replace fist and last occurrences of N
Update: this is an answer to the wrong question! It solves the problem of quoting only the first and last n/N runs. The OP wants to replace the first and last N/n in each run with ^.
The following (rather fussy because of special cases) code remembers where the last N/n run started then rewrites the last part of the output file with the markers inserted.
use strict; use warnings; my $data = <<DATA; acacccacacacaccacacccacacaccacacccacacccacacaccaca nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn cccacaccacacccacacaccacacaccacacccacacccacacacacca cacccacacaccacacccacacacaccctaaccctaacccctaaccccta accctaacccnnnnnnnnnnnnnnnnnnnnnnnnnnnccctaaccctaac ccctaaccctaaccctaaccgtaaccctaaccctttaccctaacccgaac ccctaacnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnggggg gaccctgaccgtgaccctgaccctaacccgaacccgaacccgaaccccga accccgaaccccgaaccccaaccccaaccccaaccccaaccctaacccct caccctcaccctcgacccccgacccccgacccccgacccccaccccgaac ggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn nnnnnnnnnnnnnnnnnnnnnaccctaaccctaaaaccctaaccctagcc ctagccctagccctagccctaacccctaacccctaaccctaagccgaagc DATA my $result; open my $inFile, '<', \$data or die "Unable to open input fille: $!\n +"; open my $outFile, '>', \$result or die "Unable to create output file: +$1\n"; my $currStartN; my $lastStartN; my $lastEndN; my $prevLineEnd = 0; while (<$inFile>) { my $runEnd; if (defined $currStartN) { # Looking for an end N/n next unless /([^nN\n])/; # Continue if full line of N/n $runEnd = $-[0]; # Insert ^ for first run if ($runEnd == 0) { # Need to insert ^ at end of previous line seek $outFile, $prevLineEnd, 0; print $outFile "^\n"; } else { substr $_, $runEnd, 0, '^' unless defined $lastStartN; } } else { # Looking for a start N/n next unless /([nN]+)/; $runEnd = $+[0]; $currStartN = tell ($inFile) - length ($_) + $-[0]; s/([nN])/^$1/ unless defined $lastStartN; # Insert ^ for first + run next unless $runEnd < length ($_) - 2; # Continue if full line + of N/n } $lastStartN = $currStartN; $lastEndN = tell ($inFile) - length ($_) + $runEnd; $currStartN = undef; } continue { $prevLineEnd = tell $outFile; print $outFile $_; chomp; $prevLineEnd += length; } # Insert ^ around last run of N/n seek $outFile, $lastStartN + 2, 0; print $outFile '^'; seek $inFile, $lastStartN, 0; read $inFile, my $nRun, ($lastEndN - $lastStartN - 1); print $outFile $nRun, '^'; print $outFile $_ while <$inFile>; close $inFile; close $outFile; print $result;
Prints:
acacccacacacaccacacccacacaccacacccacacccacacaccaca ^nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn^ cccacaccacacccacacaccacacaccacacccacacccacacacacca cacccacacaccacacccacacacaccctaaccctaacccctaaccccta accctaacccnnnnnnnnnnnnnnnnnnnnnnnnnnnccctaaccctaac ccctaaccctaaccctaaccgtaaccctaaccctttaccctaacccgaac ccctaacnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnggggg gaccctgaccgtgaccctgaccctaacccgaacccgaacccgaaccccga accccgaaccccgaaccccaaccccaaccccaaccccaaccctaacccct caccctcaccctcgacccccgacccccgacccccgacccccaccccgaac gg^nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn nnnnnnnnnnnnnnnnnnnn^naccctaaccctaaaaccctaaccctagcc ctagccctagccctagccctaacccctaacccctaaccctaagccgaagc
|
|---|