#!usr/bin/perl -w use strict; my %site_info=(); while () { next if /^\s*MULTI/; #skip MULTI lines next if /^\s*$/; #skip blank lines # default split is /\s+/ which includes # all of the whitespace charaters [ \n\r\f\t] # From the data shown (no spaces within the # tokens), why not just split on simple whitespace? my ($chrStr, $strand, $value) = (split)[0,1,3]; # ditch the dash # use simple regex to get the "chr number" my ($chrNum) = $chrStr =~ /(\d+)/; #just get the digits.. print "$chrNum $strand $value\n"; } =prints 16 28797601 0.777877 32 887755 0.11 =cut __DATA__ chr16 28797601 - 0.777877 MULTI some stuff chr32 887755 - 0.11