#!/usr/bin/perl use strict; use warnings; print 'Please enter protein sequence filename: '; chomp( my $prot_filename = ); open my $PROTFILE, '<', $prot_filename or die "Cannot open '$prot_filename' because: $!"; my $report_name = $prot_filename.'_alanine_report'; open my $out_file, '>', $report_name or die "Cannot open '$report_name' because: $!"; my $rx_fasta_header = qr{ \A > }xms; # header pattern - very naive my $rx_sequence = qr{ \A [ACDEFGHIKLMNPQRSTVWY_]+ \z }xms; # please check my $sequence; # sequence accumulator - initially undefined RECORD: while (my $record = <$PROTFILE>) { chomp $record; # get rid of newline, if any # fasta header - process any sequence accumulated so far if ($record =~ $rx_fasta_header) { # do not process if no sequence accumulated so far (i.e., first header) next RECORD if not defined $sequence; my $seq_len = length $sequence; my $seq_n_A = $sequence =~ tr/A//;; do_something_with($sequence, $seq_len, $seq_n_A); undef $sequence; # prepare to process next sequence next RECORD; } # part of sequence - accumulate if ($record =~ $rx_sequence) { $sequence .= $record; # accumulate sub-sequences (records) next RECORD; } die "bad record: '$record'"; # don't know what this is } # show all results print $out_file; close $out_file;