in reply to Can this be parsed ?
#!/usr/bin/perl -w use strict; my $Prep_Re = join '|',('VAN DER','VAN DE','DEN','DE','VAN'); while (<DATA>) { s/\s{2,}/ /g; s/^\s*(.*?)\s*$/$1/; if ( /(.+?) ($Prep_Re) ?((?:[^A-Za-z].*)?)/ ) { my ($Name,$Prep,$Unknown) = ($1,$2,$3); print "$Name == $Prep == $Unknown\n"; } elsif ( /(.+) ([^A-Za-z].*)?/ ) { my ($Name,$Prep,$Unknown) = ($1,'',$2); print "$Name == $Prep == $Unknown\n"; } else { print "No idea for $_\n"; } } __DATA__ WINTER DE <A240> ZANDEN VAN DER ŤAť JENSEN 230 WOODHEAD <D> BRINK 130,- HEYDIER DEN <240> SMITSER (4X115PJ) LINDEN VAN DER MOTEL GOLDEN LEEUW <A225>
.02
ps - the nearest word I could find was cognomen, but I bet that's not right either :)
--
seek(JOB,$$LA,0);
|
|---|