sub breakup { my $filename = $_[0]; my $text = $_[1]; my $re = qr{ (? (?[^:;,\.\s]+) \s* : \s* (?[^:;,\.\s]+) \s* ; ) |(? (? \p{Uppercase_Letter}w+ ) : (? \p{Uppercase_Letter}+ ) ) | (? pp\.\s*\d+-\d+\. ) | (? \d+\.\d+\.\d+ (?::\d+)? ) | (? \d+\.\d+:\d+ ) | (? \( \d+ \) ) ##| (? (?: [^\s,\.]+, )+ ) | (? (?: \p{Uppercase_Letter}[.\w]+, )+ ) ## | (? (?:[^\s;.,\(\)]+\s*)+[.;,] ) | (?<title> (?:[^\s;.,\(\)]+\s*){3,} [.;,?!]? ) | (?<comma>,) | (?<notcommanotspace>[^\s,]+) | (?<space>\s+) | (?<other>.) }msx; my @parts; while( $text =~ m{$re}g ){ my $it = { %+ }; next if $it->{space}; push @parts, $it; } return \@parts ; } __END__ [ { year => "(1990)" }, { title => "Methods of the Association of Official Analytical Chemists.", }, { notcommanotspace => "15." }, { name => "Ed," }, { comma => "," }, { title => "Association of Official Analytical Chemists Washington;", }, { name => "Dumet," }, { name => "D.," }, { name => "Benson," }, { name => "E.E.," }, { title => "The use of physical and biochemical studies to elucidate and reduce cryopreservation-induced damage in hydrated/desiccated plant germplasm ", }, { year => "(2000)" }, { title => "Cryopreservation of Tropical Plant Germplasm: Current Research Progress and Application,", }, { ppage => "pp. 43-56." }, { comma => "," }, { notcommanotspace => "F." }, { title => "Engelmann and H." }, { title => "Takagi " }, { notcommanotspace => "(eds.)" }, { apair => "Tsukuba: JIRCAS;", key => "Tsukuba", val => "JIRCAS" }, { apair => "Rome: IPGRI;", key => "Rome", val => "IPGRI" }, { name => "Ferreira," }, { name => "D.F.," }, { title => "An\x{2B69}ses estat\x{ED34}icas por meio do SISVAR para Windows vers\x{4BE0}4.", }, { version2 => "0.1:225" }, { year => "(2000)" }, { title => "Reuni\x{4BE0}Anual da Regi\x{4BE0}Brasileira da Sociedade Internacional de Biometria,", }, { comma => "," }, { name => "S\x{4BE0}Carlos," }, { title => "SP: UFSCAR" }, ]