use strict; use warnings; while ( my $protein = ) { $protein =~ /([^:]+).+\s+([<=>]+)\s+/; print "$1 - $2\n"; } __DATA__ R00005: 00330: C01010 => C00011 R00005: 00791: C01010 => C00011 R00005: 01100: C01010 <=> C00011 R00006: 00770: C00022 => C00900 R00008: 00362: C06033 => C00022 R00008: 00660: C00022 => C06033 R00010: 00500: C01083 => C00031 R00013: 00630: C00048 => C01146 R00013: 01100: C00048 <=> C01146 #### R00005 - => R00005 - => R00005 - <=> R00006 - => R00008 - => R00008 - => R00010 - => R00013 - => R00013 - <=> #### /^([^:]+).+\s+([<=>]+)\s+/ ^ ^ ^ ^ | | | | | | | + - Capture characters from this class enclosed by 1+ spaces | | + - Keep going, matching any character except \n | + - Capture characters that are not : + - Start at the beginning