in reply to Enforcing growth of regex
#!/usr/bin/perl # # parse publications strings # use warnings; use strict; use Data::Dumper; my $TITLE = 'title'; my $YEAR = 'year'; my $START_PAGE = 'start_page'; my $END_PAGE = 'end_page'; my $JOURNAL = 'journal'; my $TYPE = 'type'; my $AUTHORS = 'authors'; my $VOLUME = 'volume'; sub parse_pub ($) { my $string = shift @_; local $_; my %ret = (); @ret{$AUTHORS, $TITLE, $TYPE, $JOURNAL, $VOLUME, $START_PAGE, $END_PAGE, $YEAR} = $string =~ m/^\d+\.\s+ #citation number ([^:]+):\s+ #authors (.+?[.?!])\s+ #title (as short as possible) (\(\w+.?\)\s+)? #type (optional) ((?:\w+[.?!]?\s+){1,10}?) #journal ([\w()]+):\s+ #volume (\d+)-(\d+),\s+ #start page, end page (\d+)\.?$ #year /x or return undef; #not sure the best way to fail gracefully $ret{$JOURNAL} =~ s/\s+$//; return %ret; } my $line = "110. Wunder, E.; Burghardt, U.; Lang, B.; Hamilton, L.: Fa +nconi's anemia: anomaly of enzyme passage through the nuclear membran +e? Anomalous intracellular distribution of topoisomerase activity in +placental extracts in a case of Fanconi's anemia. Hum. Genet. 58: 149 +-155, 1981."; print "$line\n"; my %pub = parse_pub($line); #print Dumper(\%pub); print "J:$pub{$JOURNAL}\n\n";
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: Enforcing growth of regex
by Hena (Friar) on Nov 24, 2005 at 07:53 UTC |