Skip to content

Commit

Permalink
Abfrage SWISS+OBVSG über alma-sru.php vereinheitlicht
Browse files Browse the repository at this point in the history
Die PPN-Abfrage jetzt auch insgesamt wieder gefixt für
die SRU-ALMA-Schnittstellen.
  • Loading branch information
zuphilip committed Jan 5, 2025
1 parent ce98764 commit e6f21dd
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 338 deletions.
85 changes: 55 additions & 30 deletions isbn/alma-sru.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* Copyright (C) 2024 Universitätsbibliothek Mannheim
*
* Author:
* Philipp Zumstein <philipp.zumstein@bib.uni-mannheim.de>
* Philipp Zumstein <[email protected]>
*
* This is free software licensed under the terms of the GNU GPL,
* version 3, or (at your option) any later version.
Expand All @@ -28,13 +28,6 @@
include 'conf.php';
include 'lib.php';

$suchString = '';
$suchStringSWB = '';

if (isset($_GET['ppn'])) {
$ppn = trim($_GET['ppn']);
$suchString = 'dc.id=' . $ppn;
}
if (isset($_GET['bibliothek'])) {
$file = file_get_contents('./srulibraries.json');
$json = json_decode($file, true);
Expand All @@ -46,24 +39,35 @@

$urlBase = $urlBase . '?version=1.2&operation=searchRetrieve&recordSchema=marcxml&query=';

$filteredSuchString = 'alma.mms_tagSuppressed=false';
if (isset($_GET['isbn'])) {
$n = trim($_GET['isbn']);
$nArray = preg_split("/\s*(or|,|;)\s*/i", $n, -1, PREG_SPLIT_NO_EMPTY);
$suchString = 'alma.all=' . implode('+OR+alma.all=', $nArray);
$suchStringSWB = implode(' or ', $nArray);
if (!isset($_GET['ppn']) and !isset($_GET['isbn'])) {
echo "Weder isbn noch ppn Parameter für eine Suche angegeben.\n";
exit;
}

if (strlen($suchString)) {
$filteredSuchString .= '+AND+(' . $suchString . ')';
$suchString = '';

if (isset($_GET['ppn'])) {
$n = trim($_GET['ppn']);
$searchObject = "ppn";
}
if (isset($_GET['isbn'])) {
$n = trim($_GET['isbn']);
$searchObject = "isbn";
}
$nArray = preg_split("/\s*(or|,|;)\s*/i", $n, -1, PREG_SPLIT_NO_EMPTY);
$suchString = 'alma.all=' . implode('+OR+alma.all=', $nArray);
$filteredSuchString = 'alma.mms_tagSuppressed=false' . '+AND+(' . $suchString . ')';

# work around ExLibris server configuration issue
# and increase timeout (i.e. waiting time)
$contextOptions = [
'ssl' => [
'verify_peer' => true,
'ciphers' => 'DEFAULT@SECLEVEL=1',
],
'http' => [
'timeout' => 10,
],
];
$context = stream_context_create($contextOptions);
$result = file_get_contents($urlBase . $filteredSuchString, false, $context);
Expand Down Expand Up @@ -93,20 +97,29 @@

foreach ($records as $record) {
// Filter out any other results which contain the ISBN but not in the 020 or 776 field
// or the PPN in the 001 or 035 field(s).
$pattern = [
"isbn" => './/datafield[@tag="020" or @tag="776"]/subfield',
"ppn" => './/controlfield[@tag="001"]|.//datafield[@tag="035"]/subfield'
];
$foundMatch = false;
$foundIsbns = $xpath->query('.//datafield[@tag="020" or @tag="776"]/subfield', $record);
foreach ($foundIsbns as $foundNode) {
$nodes = $xpath->query($pattern[$searchObject], $record);
foreach ($nodes as $foundNode) {
$foundValue = $foundNode->nodeValue;
foreach ($nArray as $queryValue) {
$testString = preg_replace('/[^0-9xX]/', '', $queryValue);
if (strlen($testString) == 13) {
// Delete the 978-prefix and the check value at the end for ISBN13
$testString = substr($testString, 3, -1);
} elseif (strlen($testString) == 10) {
// Delete check value at the end for ISBN10
$testString = substr($testString, 0, -1);
$testString = $queryValue;
if ($searchObject == "isbn") {
$testString = preg_replace('/[^0-9xX]/', '', $testString);
$foundValue = preg_replace('/[^0-9xX]/', '', $foundValue);
if (strlen($testString) == 13) {
// Delete the 978-prefix and the check value at the end for ISBN13
$testString = substr($testString, 3, -1);
} elseif (strlen($testString) == 10) {
// Delete check value at the end for ISBN10
$testString = substr($testString, 0, -1);
}
}
if (strpos(preg_replace('/[^0-9xX]/', '', $foundValue), $testString) !== false) {
if (strpos($foundValue, $testString) !== false) {
$foundMatch = true;
}
}
Expand All @@ -122,6 +135,18 @@
$map = STANDARD_MARC_MAP;
$map['bestand'] = '//datafield[@tag="AVA"]/subfield[@code="b"]';
$map['sammlung'] = '//datafield[@tag="AVE"]/subfield[@code="m"]';
// TODO Prüfen ob man die SW nicht allgemeingültig so wie folgt behandeln könnte
// (Feld 689 wird von HBZ und SWISS genutzt und Feld 650 von SWISS;
// Unterfeld 2 hat nur SWISS mit "gnd" gefüllt; aber alle nutzen Unterfeld
// 0 zur Verlinkung mit der GND beginnend mit "(DE-588)". Aber unklar wie dies
// etwa bei Formschlagwörtern ohne Verlinkung aussieht.)
$map['sw'] = array(
'mainPart' => '//datafield[starts-with(@tag,"6") and (starts-with(subfield[@code="0"],"(DE-588)") or subfield[@code="2"]="gnd")]',
'value' => './subfield[@code="a"]',
'subvalues' => './subfield[@code="b" or @code="t"]',
'additional' => './subfield[@code="g" or @code="z"]',
'key' => './subfield[@code="0" and contains(text(), "(DE-588)")]'
);

if (!isset($_GET['format'])) {
header('Content-type: text/xml');
Expand All @@ -142,7 +167,7 @@
header('Content-type: application/json');
echo json_encode($outputMap, JSON_PRETTY_PRINT);
} elseif ($_GET['format'] == 'holdings') {
echo "<html>\n<head>\n <title>Bestand Alma-SRU zu ISBN-Suche</title>\n <meta http-equiv='content-type' content='text/html; charset=UTF-8' />\n <style type='text/css'>body { font-family: Arial, Verdana, sans-serif; }</style>\n</head>\n<body>\n";
echo "<html>\n<head>\n <title>Bestand Alma-SRU zu ISBN-Suche</title>\n <meta http-equiv='content-type' content='text/html; charset=UTF-8' />\n <style type='text/css'>body { font-family: Arial, Verdana, sans-serif; }</style>\n</head>\n<body>\n";
$outputXml = simplexml_load_string($outputString);
$avaNodes = $outputXml->xpath('//datafield[@tag="AVA"]');
$aveNodes = $outputXml->xpath('//datafield[@tag="AVE"]');
Expand Down Expand Up @@ -244,11 +269,11 @@
}
echo '</div>';
} elseif ($size > 100) {
//if the isbn is not found, then the $outputString is a minimal xml document
//if the isbn/ppn is not found, then the $outputString is a minimal xml document
//of size 48, for larger size something might be found...
$urlMAN = 'man-sru.php?isbn=' . $suchStringSWB;
$sruUrl = str_replace('format=holdings', '', $_SERVER['REQUEST_URI']);
echo '<div>Bestand Alma-SRU: eventuell da (' . $size . ")</div>\n";
echo '<table><tr><td><a href="' . $urlMAN . '" taget="_blank">See SRU Result</a></td></tr></table>';
echo '<table><tr><td><a href="' . $sruUrl . '" taget="_blank">See SRU Result</a></td></tr></table>';
} else {
echo 'Es wurde nichts gefunden';
}
Expand Down
33 changes: 1 addition & 32 deletions isbn/hbz.php
Original file line number Diff line number Diff line change
@@ -1,35 +1,4 @@
<?php
/*
* Source: https://github.com/UB-Mannheim/malibu/
*
* Copyright (C) 2013 Universitätsbibliothek Mannheim
*
* Author:
* Philipp Zumstein <[email protected]>
*
* This is free software licensed under the terms of the GNU GPL,
* version 3, or (at your option) any later version.
* See <http://www.gnu.org/licenses/> for more details.
*
* Aufruf aus Webbrowser:
* hbz?isbn=ISBN
* ISBN ist eine 10- bzw. 13-stellige ISBN mit/ohne Bindestriche/Leerzeichen
* ISBN kann ebenfalls eine Komma-separierte Liste von ISBNs sein
* hbz?ppn=PPN
* PPN ist die eine ID-Nummer des HBZ
* hbz?isbn=ISBN&format=json
* hbz?ppn=PPN&format=json
* Ausgabe erfolgt als JSON
*
* Sucht übergebene ISBN bzw. PPN im HBZ-Katalog
* und gibt maximal 10 Ergebnisse als MABXML zurück
* bzw. als JSON.
*
* SRU-Schnittstelle vom HBZ beschrieben unter:
* https://service-wiki.hbz-nrw.de/display/VDBE/Zugriff+auf+die+hbz-Verbunddatenbank+via+SRU
*
*/


// Redirect
$url = str_replace('/hbz.php', '/alma-sru.php', $_SERVER['REQUEST_URI']) . "&bibliothek=DE-HBZ";
header('Location: '. $url);
143 changes: 3 additions & 140 deletions isbn/obvsg.php
Original file line number Diff line number Diff line change
@@ -1,141 +1,4 @@
<?php
/*
* Source: https://github.com/UB-Mannheim/malibu/
*
* Copyright (C) 2022 Universitätsbibliothek Mannheim
*
* Author:
* Philipp Zumstein <[email protected]>
*
* This is free software licensed under the terms of the GNU GPL,
* version 3, or (at your option) any later version.
* See <http://www.gnu.org/licenses/> for more details.
*
* Aufruf aus Webbrowser:
* obvsg.php?isbn=ISBN
* ISBN ist eine 10- bzw. 13-stellige ISBN mit/ohne Bindestriche/Leerzeichen
* ISBN kann ebenfalls eine Komma-separierte Liste von ISBNs sein
* obvsg.php?isbn=ISBN&format=json
*
* Sucht übergebene ISBN bzw. PPN in der SRU-Schnittstelle von obvsg.at (OBV-LIT)
* und gibt maximal 10 Ergebnisse als MARCXML oder JSON zurück.
*/

include 'conf.php';
include 'lib.php';

/*
SRU access
https://www.obvsg.at/services/verbundsystem/sru
*/
$urlBase = 'https://services.obvsg.at/sru/OBV-LIT?operation=searchRetrieve&query=';

$searchISBN = false;
$searchPPN = false;

if (isset($_GET['ppn'])) {
$n = trim($_GET['ppn']);
$searchPPN = true;
}
if (!$searchPPN && isset($_GET['isbn'])) {
$n = trim($_GET['isbn']);
$searchISBN = true;
}
if ($searchPPN || $searchISBN) {
$nArray = preg_split("/\s*(or|,|;)\s*/i", $n, -1, PREG_SPLIT_NO_EMPTY);
$suchString = 'alma.all=' . implode('+OR+alma.all=', $nArray);
}

$filteredSuchString = 'alma.mms_tagSuppressed=false+AND+(' . $suchString . ')&maximumRecords=10';

$contextOptions = [
'http' => [
'header' => 'Connection: close\r\n',
'timeout' => 10,
],
];
$context = stream_context_create($contextOptions);
$result = file_get_contents($urlBase . $filteredSuchString, false, $context);

if ($result === false) {
header('HTTP/1.1 400 Bad Request');
echo "Verbindung zu SRU-Schnittstelle fehlgeschlagen\n";
var_dump($urlBase . $filteredSuchString);
exit;
}

// Delete namespaces such that we don't need to specify them
// in every xpath query.
$result = str_replace(' xmlns:xs="http://www.w3.org/2001/XMLSchema"', '', $result);
$result = str_replace(' xmlns="http://www.loc.gov/MARC21/slim"', '', $result);

$doc = new DOMDocument();
$doc->preserveWhiteSpace = false;
@$doc->loadHTML($result);
$xpath = new DOMXPath($doc);

$records = $xpath->query("//records/record/recorddata/record"); //beachte: kein CamelCase sondern alles klein schreiben

$outputString = "<?xml version=\"1.0\"?>\n";
$outputString .= "<collection>\n";
$outputArray = [];


foreach ($records as $record) {
// Filter out any other results which contain the ISBN but not in the 020 or 776 field
$foundMatch = false;
if ($searchISBN) {
$foundIsbns = $xpath->query('.//datafield[@tag="020" or @tag="776"]/subfield', $record);
foreach ($foundIsbns as $foundNode) {
$foundValue = $foundNode->nodeValue;
foreach ($nArray as $queryValue) {
$testString = preg_replace('/[^0-9xX]/', '', $queryValue);
if (strlen($testString) == 13) {
// Delete the 978-prefix and the check value at the end for ISBN13
$testString = substr($testString, 3, -1);
} elseif (strlen($testString) == 10) {
// Delete check value at the end for ISBN10
$testString = substr($testString, 0, -1);
}
if (strpos(preg_replace('[^0-9xX]', '', $foundValue), $testString) !== false) {
$foundMatch = true;
}
}
}
}
if (!$searchISBN || $foundMatch) {
$outputString .= $doc->saveXML($record);
array_push($outputArray, $doc->saveXML($record));
}
}
$outputString .= "</collection>";

$map = STANDARD_MARC_MAP;
$map['sw'] = array(
'mainPart' => '//datafield[starts-with(@tag,"689")]',
'value' => './subfield[@code="a"]',
'subvalues' => './subfield[@code="b" or @code="t"]',
'additional' => './subfield[@code="g" or @code="z"]',
'key' => './subfield[@code="0" and contains(text(), "(DE-588)")]'
);

if (!isset($_GET['format'])) {
header('Content-type: text/xml');
echo $outputString;
} elseif ($_GET['format'] == 'json') {
$outputXml = simplexml_load_string($outputString);

$outputMap = performMapping($map, $outputXml);
$outputIndividualMap = [];
for ($j = 0; $j < count($outputArray); $j++) {
$outputXml = simplexml_load_string($outputArray[$j]);
$outputSingleMap = performMapping($map, $outputXml);
array_push($outputIndividualMap, $outputSingleMap);
}
$outputMap["einzelaufnahmen"] = $outputIndividualMap;


header('Content-type: application/json');
echo json_encode($outputMap, JSON_PRETTY_PRINT);
}
// Redirect
$url = str_replace('/obvsg.php', '/alma-sru.php', $_SERVER['REQUEST_URI']) . "&bibliothek=AT-OBVSG";
header('Location: '. $url);
15 changes: 15 additions & 0 deletions isbn/srulibraries.json
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,21 @@
},
"DE-HBZ" : {
"name": "HBZ (keine Bestandsdaten)",
"holdings": false,
"doku": "https://service-wiki.hbz-nrw.de/display/VDBE/Zugriff+auf+die+hbz-Verbunddatenbank+via+SRU",
"sru": "https://eu04.alma.exlibrisgroup.com/view/sru/49HBZ_NETWORK"
},
"CH-SWISS" : {
"name": "SWISS (keine Bestandsdaten)",
"holdings": false,
"doku": "https://slsp.ch/de/metadata",
"sru": "https://swisscovery.slsp.ch/view/sru/41SLSP_NETWORK"
},
"AT-OBVSG" : {
"name": "OBVSG (keine Bestandsdaten)",
"holdings": false,
"doku": "https://www.obvsg.at/services/verbundsystem/sru",
"sru": "https://services.obvsg.at/sru/OBV-LIT"
}

}
Loading

0 comments on commit e6f21dd

Please sign in to comment.