#!/usr/bin/perl -w use strict; my $begin = tell(DATA); #to rewind DATA later on while () { chomp; # (?<=\d) is a positive look behind assertion # a digit or / must preceed the \s+ in order to split # upon it. Note chomp is necessary because the # trailing \n will not be removed because there is # no digit in HA. my @tokens = split(/(?<=\d|\/)\s+/, $_); print join("\n",@tokens),"\n"; } =prints like: >cds:ADD23250 A/District of Columbia/INS17/2009 2009/10/26 HA =cut seek DATA,$begin,0; #rewinds DATA back to beginning while () { s/(?<=[a-zA-Z])\s+//g; #remove spaces if preceeded by letter my @tokens = split; print join("\n",@tokens),"\n"; } =prints like: >cds:ADD23250 A/DistrictofColumbia/INS17/2009 2009/10/26 HA =cut __DATA__ >cds:ADD75048 A/Brussels/INS71/2009 2009/10/30 HA >cds:ADF58353 A/Germany-MV/HGW4/2009 2009/12/ HA >cds:ADF58351 A/Germany-MV/HGW6/2009 2009/12/ HA >cds:ADU76781 A/England/94780010/2009 2009/10/22 HA >cds:AEA30293 A/Netherlands/2223b/2009 2009/11/18 HA >cds:ADD23250 A/District of Columbia/INS17/2009 2009/10/26 HA >cds:ADX98640 A/San Diego/INS13/2009 2009/10/19 HA >cds:ADD74978 A/San Diego/INS54/2009 2009/10/12 HA >cds:ADF27925 A/Texas/JMS407/2010 2010/01/11 HA >cds:ADM95824 A/Finland/661/2009 2009/10/26 HA >cds:ADD97035 A/Wisconsin/629-D00036/2009 2009/09/15 HA