#!/usr/bin/perl # # parse publications strings # use warnings; use strict; use Data::Dumper; my $TITLE = 'title'; my $YEAR = 'year'; my $START_PAGE = 'start_page'; my $END_PAGE = 'end_page'; my $JOURNAL = 'journal'; my $TYPE = 'type'; my $AUTHORS = 'authors'; my $VOLUME = 'volume'; sub parse_pub ($) { my $string = shift @_; local $_; my %ret = (); # pos($string) = 0; # if ($string =~ m/^\d+\.([^:]+): ?((?:[^.]+\([^\)]+\)[^.?!]+|[^.?!]+)[.?!]) ?(\([\w.]+\))? (.+)$/i) { # if ($string =~ m/^([^:]+): (.+?[.?!]) (\(\w+\.?\) )?([A-Z](?=\w*[. ]).+)$/i) { # while ($string =~ m/\G^\d+\. ([^:]+): (.+?[.?!]) (\(\w+.?\) )?(?=[A-Z]\w+[. ])([A-Z].+)$/g) { while ($string =~ m/^\d+\. ([^:]+): (.+?[.?!]) (\(\w+.?\) )?(?=[A-Z]\w+[. ])([A-Z].+)$/g) { my $authors = $1; $ret{$TITLE} = $2; if ($4) { $ret{$TYPE} = $3; $_ = $4; } else { $_ = $3; } if (m/^([^:]+) ([\w()]+): (\d+)-(\d+), (\d+)\./) { $ret{$JOURNAL}=$1; $ret{$VOLUME}=$2; $ret{$START_PAGE}=$3; $ret{$END_PAGE}=$4; $ret{$YEAR}=$5; my @array = split (/ /,$ret{$JOURNAL}); # last if (10 > scalar(@{[split (/ /,$ret{$JOURNAL})]})); last if (scalar(@array) < 10); } else { last; } } return %ret; } my $line = "110. Wunder, E.; Burghardt, U.; Lang, B.; Hamilton, L.: Fanconi's anemia: anomaly of enzyme passage through the nuclear membrane? Anomalous intracellular distribution of topoisomerase activity in placental extracts in a case of Fanconi's anemia. Hum. Genet. 58: 149-155, 1981."; print "$line\n"; my %pub = parse_pub($line); #print Dumper(%pub); print "J:$pub{$JOURNAL}\n\n"; exit;