$oZ3950 = Win32::OLE->new('VBZOOMC.ZoomFactory'); $oConnection = $oZ3950->CreateZoomConnection( 'z3950.loc.gov', 7090 ) or die "CreateZoomConnection failed!\n"; $oConnection->SetOption( 'databaseName', 'Voyager' ); $oConnection->SetOption( 'preferredRecordSyntax', 'USmarc' ); #### #!/perl/bin/perl # # newISBN.pl -- ISBN to BibTex via webscraping... use strict; use warnings; use diagnostics; use LWP::Simple; my %bib = ( author => '?', title => '?', publisher => '?', address => '?', edition => '?', year => '?', ISBN => '?', ); my @list = map {chomp;ean2isbn($_)} ; for (@list) { my $webpage = get("http://lcweb.loc.gov/cgi-bin/zclient?host=z3950.loc.gov&port=7090&attrset=BIB1&rtype=USMARC&DisplayRecordSyntax=HTML&ESN=F&startrec=1&maxrecords=10&dbname=Voyager&srchtype=1,7,2,3,3,1,4,1,5,1,6,1&term_term_1=$_"); scrapeLOC($webpage,\%bib); print "\@book{,\n"; print " author=\{", $bib{'author'}, "\}\n"; print " title=\{", $bib{'title'}, "\}\n"; print " edition=\{", $bib{'edition'}, "\}\n"; print " publisher=\{", $bib{'publisher'}, "\}\n"; print " address=\{", $bib{'address'}, "\}\n"; print " year=\{", $bib{'year'}, "\}\n"; print " ISBN=\{", $bib{'ISBN'}, "\}\n"; print "}\n\n"; } sub parsespan { my ($begin,$end,$s) = @_; $s =~ /$begin((?:(?!$begin).)*)$end/ms; my $span = $1; $span =~ s/\n/ /g; $span =~ s/\s\s+/ /g; return $span; } sub scrapeLOC{ my ( $webpage, $bib ) = @_; for ( keys %$bib ) { $bib->{$_} = '?'; } if ($webpage) { my @list; my %fields; while ($webpage =~ /^(.*?:)/mg) { push(@list,$1); } for (0..@list - 2) { $fields{$list[$_]} = $list[$_ + 1]; } $fields{$list[-1]} = '<\/PRE>'; if (exists($fields{'Author:'})) { $bib->{'author'} = parsespan('Author:',$fields{'Author:'},$webpage); } if (exists($fields{'Title:'})) { my $title = parsespan('Title:',$fields{'Title:'},$webpage); my @title = split(/\//,$title); for (@title) { $_ = alltrim($_); } $title = $title[0]; my $author = $title[1]; $author =~ s/,/ and/g; $author =~ s/\.$//; $bib->{'title'} = $title; $bib->{'author'} = $author; } if (exists($fields{'Edition:'})) { my $edition = parsespan('Edition:',$fields{'Edition:'},$webpage); $edition =~ s/(.*?)\s.*$/$1/; $bib->{'edition'} = $edition if $edition; } if (exists($fields{'ISBN:'})) { my $isbn = parsespan('ISBN:',$fields{'ISBN:'},$webpage); $isbn =~ s/.*?(\d{9}\d|X).*/$1/; $bib->{'ISBN'} = $isbn; } if (exists($fields{'Published:'})) { my $published = parsespan('Published:',$fields{'Published:'},$webpage); $published =~ /(.*?)\s:\s(.*?),.*?(\d{4}).*$/; $bib->{'publisher'} = $2 if $2; $bib->{'address'} = alltrim($1) if $1; $bib->{'year'} = $3 if $3; } } } sub alltrim { my $s = shift; $s =~ s/^\s+//; $s =~ s/\s+$//; return $s; } sub ean2isbn { my $isbn = substr( shift, 3, 10 ); return substr($isbn,0,9) . checkDigit($isbn); } sub checkISBN { my $isbn = shift; my $n = length($isbn); if ( $n != 10 ) { return ( 0, ( $n < 10 ? '-' : '+' ) ); } else { my $cd = checkDigit($isbn); return ((($cd eq substr($isbn,-1,1)) ? 1 : 0), $cd); } } sub checkDigit { my @digits = split ( //, uc(shift) ); my $sum = 0; my $m = 10; for ( 0 .. @digits - 2 ) { $sum += $digits[$_] * $m--; } return qw(0 X 9 8 7 6 5 4 3 2 1) [ $sum % 11 ]; } __DATA__ 9780451458711 9780201185379 9780201489460 9780764545696 9780138482763