in reply to Simple regular expression that isn't simple
or, here are some liners to data munge fasta records that may be closer to what you are looking for:# Choose any Range of FASTA records # Called as perl script_name.pl --start start_line --end end_line file +_name # both --start and --end are optional; # no --start => start reading from end of file, end at --end # no --end => start reading from --start, read to end of file # neither => print every line use warnings; use strict; use Getopt::Long; my ($start, $end) = (undef, undef); GetOptions( "start=i" => \$start, "end=i" => \$end, ); $/='>'; while(<>) { if( (!defined($start) || $start <= $.) && (!defined($end) || $. <= $end) ) { print; } }
# Choose first N FASTA records perl -ne 'BEGIN {$/=">";$o=0}{chomp;$o<N?(/^$/?next:print">$_"):last;$ +o++}' EXAMPLE.fa
# Choose the Nth FASTA record perl -0x3E -e 'chomp(@lines = <>);print $lines[N]' EXAMPLE.fa
# Choose any Range of FASTA records perl -0x3E -e 'chomp(@l=<>);for(STrange..ENDrange){print "record $_ is + >$l[$_]\n"}' EXAMPLE.fa perl -0x3E -e 'chomp(@l=<>);print @l[STrange..ENDrange]' EXAMPLE.fa
# show the number of FASTA records in a given FASTA file perl -0x3E -e '@l=grep/\s/,<>;print "$ARGV contains ".scalar(@l)." rec +ords\n"' EXAMPLE.fa
|
|---|