sub breakup {
my $filename = $_[0];
my $text = $_[1];
my $re = qr{
(?<apair>
(?<key>[^:;,\.\s]+)
\s*
:
\s*
(?<val>[^:;,\.\s]+)
\s*
;
)
|(?<ppair>
(?<key> \p{Uppercase_Letter}w+ )
:
(?<val> \p{Uppercase_Letter}+ )
)
| (?<ppage> pp\.\s*\d+-\d+\. )
| (?<version3> \d+\.\d+\.\d+ (?::\d+)? )
| (?<version2> \d+\.\d+:\d+ )
| (?<year> \( \d+ \) )
##| (?<name> (?: [^\s,\.]+, )+ )
| (?<name> (?: \p{Uppercase_Letter}[.\w]+, )+ )
## | (?<title> (?:[^\s;.,\(\)]+\s*)+[.;,] )
| (?<title> (?:[^\s;.,\(\)]+\s*){3,} [.;,?!]? )
| (?<comma>,)
| (?<notcommanotspace>[^\s,]+)
| (?<space>\s+)
| (?<other>.)
}msx;
my @parts;
while( $text =~ m{$re}g ){
my $it = { %+ };
next if $it->{space};
push @parts, $it;
}
return \@parts ;
}
__END__
[
{ year => "(1990)" },
{
title => "Methods of the Association of Official Analytical Chemis
+ts.",
},
{ notcommanotspace => "15." },
{ name => "Ed," },
{ comma => "," },
{
title => "Association of Official Analytical Chemists Washington;"
+,
},
{ name => "Dumet," },
{ name => "D.," },
{ name => "Benson," },
{ name => "E.E.," },
{
title => "The use of physical and biochemical studies to elucidate
+ and reduce cryopreservation-induced damage in hydrated/desiccated pl
+ant germplasm ",
},
{ year => "(2000)" },
{
title => "Cryopreservation of Tropical Plant Germplasm: Current Re
+search Progress and Application,",
},
{ ppage => "pp. 43-56." },
{ comma => "," },
{ notcommanotspace => "F." },
{ title => "Engelmann and H." },
{ title => "Takagi " },
{ notcommanotspace => "(eds.)" },
{ apair => "Tsukuba: JIRCAS;", key => "Tsukuba", val => "JIRCAS" },
{ apair => "Rome: IPGRI;", key => "Rome", val => "IPGRI" },
{ name => "Ferreira," },
{ name => "D.F.," },
{
title => "An\x{2B69}ses estat\x{ED34}icas por meio do SISVAR para
+Windows vers\x{4BE0}4.",
},
{ version2 => "0.1:225" },
{ year => "(2000)" },
{
title => "Reuni\x{4BE0}Anual da Regi\x{4BE0}Brasileira da Sociedad
+e Internacional de Biometria,",
},
{ comma => "," },
{ name => "S\x{4BE0}Carlos," },
{ title => "SP: UFSCAR" },
]
|