From 2a89177b130453e2d76263683fa93a58df066840 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20B=C3=BClte?= Date: Wed, 27 Nov 2024 11:56:20 +0100 Subject: [PATCH 1/3] Enrich records with rvk from callNumber --- src/main/resources/alma/fix/item.fix | 18 ++++++++++++++++++ .../resources/alma-fix/990014830510206441.json | 7 +++++++ .../resources/alma-fix/990016782920206441.json | 7 +++++++ .../resources/alma-fix/990058434730206441.json | 7 +++++++ .../resources/alma-fix/990075429930206441.json | 8 ++++++++ .../resources/alma-fix/990109712970206441.json | 7 +++++++ .../resources/alma-fix/990122511970206441.json | 8 ++++++++ .../resources/alma-fix/990210237770206441.json | 15 +++++++++++++++ .../resources/alma-fix/99370771475306441.json | 7 +++++++ 9 files changed, 84 insertions(+) diff --git a/src/main/resources/alma/fix/item.fix b/src/main/resources/alma/fix/item.fix index 8f39a3fb3..1e91c19cd 100644 --- a/src/main/resources/alma/fix/item.fix +++ b/src/main/resources/alma/fix/item.fix @@ -125,3 +125,21 @@ do list(path: "hasItem[]","var":"$i") add_field("$i.inCollection[].1.label","Bibliotheken ohne zentrale Katalogisierung (IZ Bridge)") end end + +set_array("rvk[]") +do list(path:"hasItem[]","var":"$i") + if any_match("$i.callNumber","[A-Z][A-Z] \\d* .*") + copy_field("$i.callNumber","rvk[].$append") + end +end + +replace_all("rvk[].*","([A-Z][A-Z] \\d*) .*","$1") + +do list(path:"rvk[]","var":"$i") + copy_field("$i","subject[].$append.notation") + set_array("subject[].$last.type[]","Concept") + add_field("subject[].$last.source.label","RVK (Regensburger Verbundklassifikation)") + add_field("subject[].$last.source.id","https://d-nb.info/gnd/4449787-8") +end + +uniq("subject[]") diff --git a/src/test/resources/alma-fix/990014830510206441.json b/src/test/resources/alma-fix/990014830510206441.json index df82560a9..3ad5b2575 100644 --- a/src/test/resources/alma-fix/990014830510206441.json +++ b/src/test/resources/alma-fix/990014830510206441.json @@ -100,6 +100,13 @@ "dateOfDeath" : "1977", "altLabel" : [ "Chaplin, Charles S.", "Spencer Chaplin, Charles", "Chaplin, Charly", "Tsaplin, Tsarl Spenser", "Sa-plin, Sác-li", "Cāplin, Cārlī", "Čaplin, Č'arlz", "Chaplin, Charles Spencer", "Chaplin, Charles", "Čaplin, Čarl'z Spenser", "Spencer-Chaplin, Charles", "Чаплин, Чарльз", "Zhuobielin", "Sarlō", "Charlot", "Sac-lô" ] } ] + }, { + "notation" : "AP 51081", + "type" : [ "Concept" ], + "source" : { + "label" : "RVK (Regensburger Verbundklassifikation)", + "id" : "https://d-nb.info/gnd/4449787-8" + } } ], "subjectslabels" : [ "Chaplin, Charlie" ], "hasItem" : [ { diff --git a/src/test/resources/alma-fix/990016782920206441.json b/src/test/resources/alma-fix/990016782920206441.json index ed49a99bb..dac4463bd 100644 --- a/src/test/resources/alma-fix/990016782920206441.json +++ b/src/test/resources/alma-fix/990016782920206441.json @@ -89,6 +89,13 @@ "id" : "https://www.wikidata.org/wiki/Q47524318" }, "label" : "Klavier / Lehrmittel" + }, { + "notation" : "NC 10", + "type" : [ "Concept" ], + "source" : { + "label" : "RVK (Regensburger Verbundklassifikation)", + "id" : "https://d-nb.info/gnd/4449787-8" + } } ], "subjectslabels" : [ "Klavier / Lehrmittel" ], "hasItem" : [ { diff --git a/src/test/resources/alma-fix/990058434730206441.json b/src/test/resources/alma-fix/990058434730206441.json index ffddba455..3ccdbf5bd 100644 --- a/src/test/resources/alma-fix/990058434730206441.json +++ b/src/test/resources/alma-fix/990058434730206441.json @@ -155,6 +155,13 @@ "dateOfDeath" : "0039", "altLabel" : [ "Herodes Antipatros", "Herodes, Antipas" ] } ] + }, { + "notation" : "BC 8740", + "type" : [ "Concept" ], + "source" : { + "label" : "RVK (Regensburger Verbundklassifikation)", + "id" : "https://d-nb.info/gnd/4449787-8" + } } ], "subjectslabels" : [ "Herodes ", "Makkabäer", "Herrschaftssystem", "Neues Testament / Zeithintergrund", "Herodes Antipas, Galiläa, Tetrarch" ], "hasItem" : [ { diff --git a/src/test/resources/alma-fix/990075429930206441.json b/src/test/resources/alma-fix/990075429930206441.json index a21fd18f9..e9187bc22 100644 --- a/src/test/resources/alma-fix/990075429930206441.json +++ b/src/test/resources/alma-fix/990075429930206441.json @@ -87,6 +87,14 @@ "label" : "Deutsch" } ], "extent" : "64 S.", + "subject" : [ { + "notation" : "BO 4075", + "type" : [ "Concept" ], + "source" : { + "label" : "RVK (Regensburger Verbundklassifikation)", + "id" : "https://d-nb.info/gnd/4449787-8" + } + } ], "hasItem" : [ { "label" : "lobid Bestandsressource", "type" : [ "Item", "PhysicalObject" ], diff --git a/src/test/resources/alma-fix/990109712970206441.json b/src/test/resources/alma-fix/990109712970206441.json index 68e217a14..b1faac40f 100644 --- a/src/test/resources/alma-fix/990109712970206441.json +++ b/src/test/resources/alma-fix/990109712970206441.json @@ -213,6 +213,13 @@ "gndIdentifier" : "2085343-9", "altLabel" : [ "Beethoven-Haus", "Bonner Beethoven-Haus", "Bonner Beethovenhaus" ] } ] + }, { + "notation" : "BM 800", + "type" : [ "Concept" ], + "source" : { + "label" : "RVK (Regensburger Verbundklassifikation)", + "id" : "https://d-nb.info/gnd/4449787-8" + } } ], "spatial" : [ { "id" : "https://nwbib.de/spatial#Q586", diff --git a/src/test/resources/alma-fix/990122511970206441.json b/src/test/resources/alma-fix/990122511970206441.json index 273db2b83..8517b40b4 100644 --- a/src/test/resources/alma-fix/990122511970206441.json +++ b/src/test/resources/alma-fix/990122511970206441.json @@ -83,6 +83,14 @@ "type" : [ "Work" ] }, "extent" : "2 CD : DDD + Beih.", + "subject" : [ { + "notation" : "CD 2410", + "type" : [ "Concept" ], + "source" : { + "label" : "RVK (Regensburger Verbundklassifikation)", + "id" : "https://d-nb.info/gnd/4449787-8" + } + } ], "hasItem" : [ { "label" : "lobid Bestandsressource", "type" : [ "Item", "PhysicalObject" ], diff --git a/src/test/resources/alma-fix/990210237770206441.json b/src/test/resources/alma-fix/990210237770206441.json index 7ae021db8..149d305e6 100644 --- a/src/test/resources/alma-fix/990210237770206441.json +++ b/src/test/resources/alma-fix/990210237770206441.json @@ -91,6 +91,21 @@ } ], "extent" : "1 Partitur (12 Seiten)", "note" : [ "Hier auch später erschienene, unveränderte Nachdrucke" ], + "subject" : [ { + "notation" : "NV 102", + "type" : [ "Concept" ], + "source" : { + "label" : "RVK (Regensburger Verbundklassifikation)", + "id" : "https://d-nb.info/gnd/4449787-8" + } + }, { + "notation" : "NB 2023", + "type" : [ "Concept" ], + "source" : { + "label" : "RVK (Regensburger Verbundklassifikation)", + "id" : "https://d-nb.info/gnd/4449787-8" + } + } ], "hasItem" : [ { "label" : "lobid Bestandsressource", "type" : [ "Item", "PhysicalObject" ], diff --git a/src/test/resources/alma-fix/99370771475306441.json b/src/test/resources/alma-fix/99370771475306441.json index 16ba4004b..09edd862e 100644 --- a/src/test/resources/alma-fix/99370771475306441.json +++ b/src/test/resources/alma-fix/99370771475306441.json @@ -245,6 +245,13 @@ "id" : "https://d-nb.info/gnd/4181939-1", "gndIdentifier" : "4181939-1" } ] + }, { + "notation" : "MS 6020", + "type" : [ "Concept" ], + "source" : { + "label" : "RVK (Regensburger Verbundklassifikation)", + "id" : "https://d-nb.info/gnd/4449787-8" + } } ], "subjectslabels" : [ "Altern", "Berufliche Stellung", "Bildung", "Einkommensungleichheit", "Gesundheit", "Gesundheitspolitik", "Krankheit", "Soziale Benachteiligung", "Soziale Gerechtigkeit", "Soziale Ungleichheit, Armut, Rassismus", "Gesundheitssoziologie, Medizinsoziologie", "Wirtschaftssoziologie, Arbeitssoziologie, Organisationssoziologie", "Soziale Ungleichheit", "Teilhabe", "Gesundheitswesen", "Sozialer Friede" ], "hasItem" : [ { From 8ceb20db9e84196b0c951c89cdadce71775ddd5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20B=C3=BClte?= Date: Thu, 28 Nov 2024 11:01:50 +0100 Subject: [PATCH 2/3] Specify length of regex --- src/main/resources/alma/fix/item.fix | 10 +++++----- src/test/resources/alma-fix/990016782920206441.json | 7 ------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/src/main/resources/alma/fix/item.fix b/src/main/resources/alma/fix/item.fix index 1e91c19cd..034c6d6f8 100644 --- a/src/main/resources/alma/fix/item.fix +++ b/src/main/resources/alma/fix/item.fix @@ -126,16 +126,16 @@ do list(path: "hasItem[]","var":"$i") end end -set_array("rvk[]") +set_array("@callNumberRvk[]") do list(path:"hasItem[]","var":"$i") - if any_match("$i.callNumber","[A-Z][A-Z] \\d* .*") - copy_field("$i.callNumber","rvk[].$append") + if any_match("$i.callNumber","[A-Z][A-Z] \\d{3,6} .*") + copy_field("$i.callNumber","@callNumberRvk[].$append") end end -replace_all("rvk[].*","([A-Z][A-Z] \\d*) .*","$1") +replace_all("@callNumberRvk[].*","([A-Z][A-Z] \\d{3,6}) .*","$1") -do list(path:"rvk[]","var":"$i") +do list(path:"@callNumberRvk[]","var":"$i") copy_field("$i","subject[].$append.notation") set_array("subject[].$last.type[]","Concept") add_field("subject[].$last.source.label","RVK (Regensburger Verbundklassifikation)") diff --git a/src/test/resources/alma-fix/990016782920206441.json b/src/test/resources/alma-fix/990016782920206441.json index dac4463bd..ed49a99bb 100644 --- a/src/test/resources/alma-fix/990016782920206441.json +++ b/src/test/resources/alma-fix/990016782920206441.json @@ -89,13 +89,6 @@ "id" : "https://www.wikidata.org/wiki/Q47524318" }, "label" : "Klavier / Lehrmittel" - }, { - "notation" : "NC 10", - "type" : [ "Concept" ], - "source" : { - "label" : "RVK (Regensburger Verbundklassifikation)", - "id" : "https://d-nb.info/gnd/4449787-8" - } } ], "subjectslabels" : [ "Klavier / Lehrmittel" ], "hasItem" : [ { From f5653e59ab28730aeedb22937a63ee941f0bbb45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20B=C3=BClte?= Date: Thu, 28 Nov 2024 12:45:59 +0100 Subject: [PATCH 3/3] Adjust the filter regex to fetch only valid RVK Signature https://rvk.uni-regensburg.de/images/stories/fruit/sigreg/regeln_fr_die_signaturvergabe_version_2012_02_17.pdf https://www.ub.uni-muenchen.de/suchen/buchaufstellung-rvk/index.html https://www.ub.uni-frankfurt.de/fachinformationen/Info-RVK.pdf https://rvk.uni-regensburg.de/images/stories/RVKO_Informationen/RVK_Nutzung_an_der_ULB_Muenster_akt_2023.pdf --- src/main/resources/alma/fix/item.fix | 2 +- .../resources/alma-fix/990109712970206441.json | 7 ------- .../resources/alma-fix/990122511970206441.json | 8 -------- .../resources/alma-fix/990210237770206441.json | 15 --------------- .../resources/alma-fix/99370771475306441.json | 7 ------- 5 files changed, 1 insertion(+), 38 deletions(-) diff --git a/src/main/resources/alma/fix/item.fix b/src/main/resources/alma/fix/item.fix index 034c6d6f8..2b2273d72 100644 --- a/src/main/resources/alma/fix/item.fix +++ b/src/main/resources/alma/fix/item.fix @@ -128,7 +128,7 @@ end set_array("@callNumberRvk[]") do list(path:"hasItem[]","var":"$i") - if any_match("$i.callNumber","[A-Z][A-Z] \\d{3,6} .*") + if any_match("$i.callNumber","([A-Z]{2} \\d{3,6})(\\.\\d*)?( [A-Z]\\d+)+([\\.\\d\\-\\(\\)+]*)?") copy_field("$i.callNumber","@callNumberRvk[].$append") end end diff --git a/src/test/resources/alma-fix/990109712970206441.json b/src/test/resources/alma-fix/990109712970206441.json index b1faac40f..68e217a14 100644 --- a/src/test/resources/alma-fix/990109712970206441.json +++ b/src/test/resources/alma-fix/990109712970206441.json @@ -213,13 +213,6 @@ "gndIdentifier" : "2085343-9", "altLabel" : [ "Beethoven-Haus", "Bonner Beethoven-Haus", "Bonner Beethovenhaus" ] } ] - }, { - "notation" : "BM 800", - "type" : [ "Concept" ], - "source" : { - "label" : "RVK (Regensburger Verbundklassifikation)", - "id" : "https://d-nb.info/gnd/4449787-8" - } } ], "spatial" : [ { "id" : "https://nwbib.de/spatial#Q586", diff --git a/src/test/resources/alma-fix/990122511970206441.json b/src/test/resources/alma-fix/990122511970206441.json index 8517b40b4..273db2b83 100644 --- a/src/test/resources/alma-fix/990122511970206441.json +++ b/src/test/resources/alma-fix/990122511970206441.json @@ -83,14 +83,6 @@ "type" : [ "Work" ] }, "extent" : "2 CD : DDD + Beih.", - "subject" : [ { - "notation" : "CD 2410", - "type" : [ "Concept" ], - "source" : { - "label" : "RVK (Regensburger Verbundklassifikation)", - "id" : "https://d-nb.info/gnd/4449787-8" - } - } ], "hasItem" : [ { "label" : "lobid Bestandsressource", "type" : [ "Item", "PhysicalObject" ], diff --git a/src/test/resources/alma-fix/990210237770206441.json b/src/test/resources/alma-fix/990210237770206441.json index 149d305e6..7ae021db8 100644 --- a/src/test/resources/alma-fix/990210237770206441.json +++ b/src/test/resources/alma-fix/990210237770206441.json @@ -91,21 +91,6 @@ } ], "extent" : "1 Partitur (12 Seiten)", "note" : [ "Hier auch später erschienene, unveränderte Nachdrucke" ], - "subject" : [ { - "notation" : "NV 102", - "type" : [ "Concept" ], - "source" : { - "label" : "RVK (Regensburger Verbundklassifikation)", - "id" : "https://d-nb.info/gnd/4449787-8" - } - }, { - "notation" : "NB 2023", - "type" : [ "Concept" ], - "source" : { - "label" : "RVK (Regensburger Verbundklassifikation)", - "id" : "https://d-nb.info/gnd/4449787-8" - } - } ], "hasItem" : [ { "label" : "lobid Bestandsressource", "type" : [ "Item", "PhysicalObject" ], diff --git a/src/test/resources/alma-fix/99370771475306441.json b/src/test/resources/alma-fix/99370771475306441.json index 09edd862e..16ba4004b 100644 --- a/src/test/resources/alma-fix/99370771475306441.json +++ b/src/test/resources/alma-fix/99370771475306441.json @@ -245,13 +245,6 @@ "id" : "https://d-nb.info/gnd/4181939-1", "gndIdentifier" : "4181939-1" } ] - }, { - "notation" : "MS 6020", - "type" : [ "Concept" ], - "source" : { - "label" : "RVK (Regensburger Verbundklassifikation)", - "id" : "https://d-nb.info/gnd/4449787-8" - } } ], "subjectslabels" : [ "Altern", "Berufliche Stellung", "Bildung", "Einkommensungleichheit", "Gesundheit", "Gesundheitspolitik", "Krankheit", "Soziale Benachteiligung", "Soziale Gerechtigkeit", "Soziale Ungleichheit, Armut, Rassismus", "Gesundheitssoziologie, Medizinsoziologie", "Wirtschaftssoziologie, Arbeitssoziologie, Organisationssoziologie", "Soziale Ungleichheit", "Teilhabe", "Gesundheitswesen", "Sozialer Friede" ], "hasItem" : [ {