Argument "sdAb_1193_LgLlama" isn't numeric in printf at ../duplicate.pl line 35, <$PROTFILE> chunk 165. 'Redundant argument in printf at ../duplicate.pl line 35, <$PROTFILE> chunk 1164.' #### #!/usr/bin/perl # cleanup.pl use strict; use warnings; print 'Enter protein sequence filename: '; chomp( my $prot_filename = ); open my $PROTFILE, '<', $prot_filename or die "Cannot open '$prot_filename' because: $!"; my $out_filename = 'duplicates_entries_in_'.$prot_filename; open my $OUTFILE, '>', $out_filename or die "Cannot open '$out_filename' because: $!"; $/ = ''; # Set paragraph mode my $name; my %fasta_seen; FASTA_RECORD: while ( my $para = <$PROTFILE> ) { # Remove fasta header line if ( $para =~ s/^>(.*)//m ){ $name = $1; }; # Remove comment line(s) $para =~ s/^\s*#.*//mg; # Trim trailing white space $para =~ s/\s+$//; # next FASTA_RECORD if $fasta_seen{ $para }++; if ( $fasta_seen{ $para }++ ){ printf "%d duplicate records written\n",$name,$para,$out_filename; next FASTA_RECORD; } } print "\n";