#! perl -slw use strict; ## Lookup table to map main atoms to a numerical value my %mainAtoms = ( '' => 0, N => 1, CA => 2, C => 3, O => 4, H => 5, '2H' => 6, '3H' => 7, '4H' => 8, '5H' => 9 ); ## Lookup table for "distance" multiplier ## Using '' => 1 ensures that unadorned main atom weights ## remain in the range 1 to 9. ## Using 10.n for the distance weights ## maps the weights to 10.0 .. 10.5 for N, ## 20.0 .. 20.5 for CA etc. my %distances = ( '' => 1, B => 10.0, G => 10.1, D => 10.2, E => 10.3, Z => 10.4, H => 10.5 ); ## Some test data. my @unordered = qw[ 2HB 3HB C CA CB CG CD1 CD2 CE1 CZ CE2 HE2 HE1 HH HD1 HD2 N O OH ]; ## The following is a 'standard' Swartzian Transform ## You have to read the blocks backwards to understand the process. my @sorted = map{ ## This just maps the original value back ## from the anonymous array created below $_->[ 1 ] } sort { ## This sorts the anonymous arrays according to ## the numerical value in element 0 of the Anon. arrays ## This is the weight calculated below $a->[ 0 ] <=> $b->[ 0 ] } map { ## The first part of the transform extracts the 3 fields ## from the catenated atomName into $1, $2, $3 or dies if it fails m[ ( N | CA | O | C | (?: \d?H ) ) ( [BGDEZH] )? ( \d )? ]x or die "Failed to separate '$_'"; ## This builds the anon. arrays. The atomName is in ->[ 1 ] ## The calculated weight is in ->[ 0 ] [ $mainAtoms{ $1 } ## 1 .. 9 * $distances{ $2 || ''} ## 1 or 10.x + ( $3 || 0 )/100 ## 0 or 0.0n , $_ ## The atomName ] } @unordered; ## The unordered data. ## Display the results print join ' | ', @sorted; __END__ P:\test>295668 N | CA | C | O | CB | CG | CD1 | CD2 | CE1 | CE2 | CZ | OH | HD1 | HD2 | HE1 | HE2 | HH | 2HB | 3HB