diff --git a/pom.xml b/pom.xml index ad734f0f5..f9798479a 100644 --- a/pom.xml +++ b/pom.xml @@ -64,7 +64,12 @@ org.metafacture - metafacture-triples + metafacture-elasticsearch + 6.0.0 + + + org.metafacture + metafacture-csv 6.0.0 @@ -124,6 +129,21 @@ core 1.47.1 + + org.apache.logging.log4j + log4j-api + 2.9.1 + + + org.apache.logging.log4j + log4j-1.2-api + 2.9.1 + + + commons-validator + commons-validator + 1.5.1 + org.elasticsearch elasticsearch @@ -263,6 +283,7 @@ + tmp/ web/public/javascripts/leaflet.js **/*.woff2 web/conf/context.jsonld diff --git a/src/main/java/org/lobid/resources/run/CulturegraphXmlFilterHbzRvkToCsv.java b/src/main/java/org/lobid/resources/run/CulturegraphXmlFilterHbzRvkToCsv.java new file mode 100644 index 000000000..f1741ff37 --- /dev/null +++ b/src/main/java/org/lobid/resources/run/CulturegraphXmlFilterHbzRvkToCsv.java @@ -0,0 +1,53 @@ +/* Copyright 2020 hbz, Pascal Christoph. Licensed under the EPL 2.0*/ + +package org.lobid.resources.run; + +import java.io.File; +import java.io.IOException; + +import org.metafacture.biblio.marc21.MarcXmlHandler; +import org.metafacture.csv.CsvEncoder; +import org.metafacture.json.JsonDecoder; +import org.metafacture.json.JsonEncoder; +import org.metafacture.io.FileOpener; +import org.metafacture.io.ObjectWriter; +import org.metafacture.xml.XmlDecoder; +import org.metafacture.metafix.Metafix; + +/** + * Filter resources with hbz holdings from culturegraph's MARCXML while tranform it with reject() + * into a CSV file. + * + * @author Pascal Christoph (dr0i) + * @author Tobias Bülte (TobiasNx) + **/ +public final class CulturegraphXmlFilterHbzRvkToCsv { + private static String OUTPUT_FILE="cg-concordance.csv"; + + public static void main(String... args) { + String XML_INPUT_FILE = new File(args[0]).getAbsolutePath(); + + if (args.length > 1) OUTPUT_FILE = args[1]; + + final FileOpener opener = new FileOpener(); + JsonDecoder jsonDecoder = new JsonDecoder(); + jsonDecoder.setRecordPath("records"); + try { + opener.setReceiver(new XmlDecoder()).setReceiver(new MarcXmlHandler()) + .setReceiver(new Metafix("src/main/resources/rvk/cg-to-rvk-csv.fix")) + .setReceiver(new JsonEncoder()) + .setReceiver(jsonDecoder) + .setReceiver(new CsvEncoder()) + .setReceiver(new ObjectWriter<>(OUTPUT_FILE)); + } catch (IOException e) { + e.printStackTrace(); + } + opener.process( + new File(XML_INPUT_FILE).getAbsolutePath()); + try { + opener.closeStream(); + } catch (final NullPointerException e) { + // ignore, see https://github.com/hbz/lobid-resources/issues/1030 + } + } +} diff --git a/src/main/java/org/lobid/resources/run/CulturegraphXmlFilterHbzToJson.java b/src/main/java/org/lobid/resources/run/CulturegraphXmlFilterHbzToJson.java new file mode 100644 index 000000000..0e4ba7e5f --- /dev/null +++ b/src/main/java/org/lobid/resources/run/CulturegraphXmlFilterHbzToJson.java @@ -0,0 +1,51 @@ +/* Copyright 2020 hbz, Pascal Christoph. Licensed under the EPL 2.0*/ + +package org.lobid.resources.run; + +import java.io.File; +import java.io.IOException; + +import org.metafacture.biblio.marc21.MarcXmlHandler; +import org.metafacture.elasticsearch.JsonToElasticsearchBulk; +import org.metafacture.io.FileOpener; +import org.metafacture.io.ObjectWriter; +import org.metafacture.json.JsonEncoder; +import org.metafacture.xml.XmlDecoder; +import org.metafacture.metafix.Metafix; + +/** + * Filter resources with hbz holdings from culturegraph's MARCXML while tranform it with reject() + * into JSON and write this as an elasticsearch bulk json file. + * + * @author Pascal Christoph (dr0i) + * @author Tobias Bülte (TobiasNx) + **/ +public final class CulturegraphXmlFilterHbzToJson { + private static final String ELASTICSEARCH_INDEX_NAME = "cg"; + public static final String ELASTICSEARCH_INDEX_TYPE_NAME="rvk"; + private static String JSON_FILE="bulk.ndjson"; + + public static void main(String... args) { + String XML_INPUT_FILE = new File(args[0]).getAbsolutePath(); + + if (args.length > 1) JSON_FILE = args[1]; + + final FileOpener opener = new FileOpener(); + try { + opener.setReceiver(new XmlDecoder()).setReceiver(new MarcXmlHandler()) + .setReceiver(new Metafix("src/main/resources/rvk/cg-to-rvk-json.fix")) + .setReceiver(new JsonEncoder()) + .setReceiver(new JsonToElasticsearchBulk(ELASTICSEARCH_INDEX_TYPE_NAME, ELASTICSEARCH_INDEX_NAME)) + .setReceiver(new ObjectWriter<>(JSON_FILE)); + } catch (IOException e) { + e.printStackTrace(); + } + opener.process( + new File(XML_INPUT_FILE).getAbsolutePath()); + try { + opener.closeStream(); + } catch (final NullPointerException e) { + // ignore, see https://github.com/hbz/lobid-resources/issues/1030 + } + } +} diff --git a/src/main/resources/rvk/cg-to-rvk-csv.fix b/src/main/resources/rvk/cg-to-rvk-csv.fix new file mode 100644 index 000000000..345ebb85c --- /dev/null +++ b/src/main/resources/rvk/cg-to-rvk-csv.fix @@ -0,0 +1,40 @@ +set_array("records[]") +set_array("@id[]") +set_array("rvk[]") + +do list(path: "084??", "var": "$i") + if any_match("$i.2", "rvk") + copy_field("$i.a","rvk[].$append") + end +end + +uniq("rvk[]") +join_field("rvk[]",",") + + +do list(path: "035??", "var": "$i") + if any_match("$i.a", "^\\(DE-605\\)(.*)") + copy_field("$i.a","@id[].$append") + end +end +replace_all("id[].*","^\\(DE-605\\)(.*)","$1") + +do list(path: "@id[]", "var": "$i") + copy_field("$i","records[].$append.id") + copy_field("rvk[]","records[].$last.rvk[]") +end +replace_all("records[].*.id","^\\(DE-605\\)(.*)","$1") + +vacuum() + +# Filter records without RVK +unless exists("rvk[]") + reject() +end + +# Filter records without hbz ids +unless exists("@id[]") + reject() +end + +retain("records[]") diff --git a/src/main/resources/rvk/cg-to-rvk-json.fix b/src/main/resources/rvk/cg-to-rvk-json.fix new file mode 100644 index 000000000..b070cd6f9 --- /dev/null +++ b/src/main/resources/rvk/cg-to-rvk-json.fix @@ -0,0 +1,29 @@ +set_array("rvk[]") + +do list(path: "084??", "var": "$i") + if any_match("$i.2", "rvk") + copy_field("$i.a","rvk[].$append") + end +end +uniq("rvk[]") +set_array("id") +do list(path: "035??", "var": "$i") + if any_match("$i.a", "^\\(DE-605\\)(.*)") + copy_field("$i.a","id.$append") + end +end +replace_all("id.*","^\\(DE-605\\)(.*)","$1") +join_field("id",", ") + +retain("rvk[]","id") +vacuum() + +# Filter records without RVK +unless exists("rvk[]") + reject() +end + +# Filter records without hbz ids +unless exists("id") + reject() +end diff --git a/src/test/java/UnitTests.java b/src/test/java/UnitTests.java index d0b905acf..7a792da4b 100644 --- a/src/test/java/UnitTests.java +++ b/src/test/java/UnitTests.java @@ -12,7 +12,9 @@ @RunWith(Suite.class) @Suite.SuiteClasses({ TestGenerateContext.class, - org.lobid.resources.AlmaMarc21XmlToLobidJsonMetafixTest.class}) + org.lobid.resources.AlmaMarc21XmlToLobidJsonMetafixTest.class, + org.lobid.resources.CulturegraphXmlFilterHbzRvkToCsvTest.class, + org.lobid.resources.CulturegraphXmlFilterHbzToJsonTest.class}) public final class UnitTests { /* Suite class, groups tests via annotation above */ diff --git a/src/test/java/org/lobid/resources/CulturegraphXmlFilterHbzRvkToCsvTest.java b/src/test/java/org/lobid/resources/CulturegraphXmlFilterHbzRvkToCsvTest.java new file mode 100644 index 000000000..442ac89ae --- /dev/null +++ b/src/test/java/org/lobid/resources/CulturegraphXmlFilterHbzRvkToCsvTest.java @@ -0,0 +1,45 @@ +/* Copyright 2020 hbz, Pascal Christoph. Licensed under the EPL 2.0*/ + +package org.lobid.resources; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.IOException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.junit.Test; +import org.lobid.resources.run.CulturegraphXmlFilterHbzRvkToCsv; + +/** + * Test of filtering resources with hbz holdings from culturegraph MARCXML, + * tranforming into a CSV file. + * + * @author Pascal Christoph(dr0i) + **/ +public final class CulturegraphXmlFilterHbzRvkToCsvTest { + + private static final Logger LOG = + LoggerFactory.getLogger(CulturegraphXmlFilterHbzRvkToCsvTest.class); + + private static final String PATH_TO_TEST = "src/test/resources/"; + public static final String OUTPUT_FILE = + PATH_TO_TEST + "cg/output.csv"; + + private static final String XML_INPUT_FILE = "cg/aggregate_20240507_example.marcxml"; + + @SuppressWarnings("static-method") + @Test + public void testExtractLookupTableFromCgAsHbzRvk() { + CulturegraphXmlFilterHbzRvkToCsv.main(PATH_TO_TEST + XML_INPUT_FILE, + OUTPUT_FILE); + } + + /**private static void ingest() throws IOException { + File jsonFile = new File(OUTPUT_FILE); + }*/ + + +} diff --git a/src/test/java/org/lobid/resources/CulturegraphXmlFilterHbzToJsonTest.java b/src/test/java/org/lobid/resources/CulturegraphXmlFilterHbzToJsonTest.java new file mode 100644 index 000000000..9824a0ace --- /dev/null +++ b/src/test/java/org/lobid/resources/CulturegraphXmlFilterHbzToJsonTest.java @@ -0,0 +1,155 @@ +/* Copyright 2020 hbz, Pascal Christoph. Licensed under the EPL 2.0*/ + +package org.lobid.resources; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Collection; + +import org.apache.http.HttpEntity; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.FileEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.elasticsearch.client.Client; +import org.elasticsearch.common.network.NetworkModule; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.node.InternalSettingsPreparer; +import org.elasticsearch.node.Node; +import org.elasticsearch.node.NodeValidationException; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.transport.Netty4Plugin; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.lobid.resources.run.CulturegraphXmlFilterHbzToJson; + +/** + * Test of filtering resources with hbz holdings from culturegraph marcxml, + * tranforming into JSON, writing as an elasticsearch bulk json file, ingesting + * it and retrieving it via HTTP. + * + * @author Pascal Christoph(dr0i) + **/ +@SuppressWarnings("javadoc") +public final class CulturegraphXmlFilterHbzToJsonTest { + + private static final Logger LOG = + LoggerFactory.getLogger(CulturegraphXmlFilterHbzToJsonTest.class); + + private static final String PATH_TO_TEST = "src/test/resources/"; + public static final String JSON_OUTPUT_FILE = + PATH_TO_TEST + "cg/output-es-bulk.ndjson"; + + private static final String XML_INPUT_FILE = "cg/aggregate_20240507_example.marcxml"; + private static PluginConfigurableNode node; + private static Client client; + private static final int ELASTICSEARCH_HTTP_PORT = 19200; + + private static final String ELASTICSEARCH_BULK_URI = + "http://localhost:" + ELASTICSEARCH_HTTP_PORT + "/_bulk"; + private static final String ELASTICSEARCH_TEST_NODE_NAME = "testNodeCgRvk"; + // classToTest = new CulturegraphXmlFilterHbzToJson(); + + private static final Collection> plugins = + Arrays.asList(Netty4Plugin.class); + + private static class PluginConfigurableNode extends Node { + public PluginConfigurableNode(final Settings settings, + final Collection> classpathPlugins) { + super(InternalSettingsPreparer.prepareEnvironment(settings, null), + classpathPlugins); + } + } + + @BeforeClass + public static void setup() { + try { + Files.deleteIfExists(Paths.get(JSON_OUTPUT_FILE)); + } catch (final IOException e) { + e.printStackTrace(); + } + + node = new PluginConfigurableNode(Settings.builder() + .put(Node.NODE_NAME_SETTING.getKey(), + ELASTICSEARCH_TEST_NODE_NAME) + .put(NetworkModule.TRANSPORT_TYPE_KEY, + NetworkModule.LOCAL_TRANSPORT) + .put("http.enabled", "true").put("path.home", "tmp") + .put("transport.type", "netty4").put("network.host", "_local_") + .put("transport.tcp.port", ELASTICSEARCH_HTTP_PORT + 1) + .put("http.port", ELASTICSEARCH_HTTP_PORT) + .put("discovery.type", "single-node").build(), plugins); + try { + node.start(); + client = node.client(); + client.admin().indices().prepareDelete("_all").execute() + .actionGet(); + client.admin().cluster().prepareHealth().setWaitForYellowStatus() + .execute().actionGet(); + LOG.info( + "Start extraction, transformation and creation of json bulk ... "); + etl(); + LOG.info( + "Done extraction, transformation and creation of json bulk"); + } catch (NodeValidationException e) { + e.printStackTrace(); + } + } + + /* + * Extract and transform + */ + private static void etl() { + CulturegraphXmlFilterHbzToJson.main(PATH_TO_TEST + XML_INPUT_FILE, + JSON_OUTPUT_FILE); + } + + @SuppressWarnings("static-method") + @Test + public void testIngestJsonBulkIntoElasticsearch() { + try { + ingest(); + } catch (final Exception e) { + LOG.error(e.getMessage()); + } + } + + // TODO: create and use metafacture modul "http-writer" + private static void ingest() throws IOException { + File jsonFile = new File(JSON_OUTPUT_FILE); + HttpEntity entity = new FileEntity(jsonFile); + HttpPost post = new HttpPost(ELASTICSEARCH_BULK_URI); + post.setEntity(entity); + post.addHeader("Content-Type", "application/x-ndjson"); + HttpClientBuilder clientBuilder = HttpClientBuilder.create(); + try (CloseableHttpClient httpclient = clientBuilder.build(); + CloseableHttpResponse response = httpclient.execute(post)) { + assertEquals(response.getStatusLine().getStatusCode(), 200); + } + } + + /* + * Tears down the elasticsearch test instance. + */ + @AfterClass + public static void down() { + client.admin().indices().prepareDelete("_all").execute().actionGet(); + try { + node.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + +} diff --git a/src/test/resources/cg/aggregate_20240507_example.marcxml b/src/test/resources/cg/aggregate_20240507_example.marcxml new file mode 100644 index 000000000..1ccc1c174 --- /dev/null +++ b/src/test/resources/cg/aggregate_20240507_example.marcxml @@ -0,0 +1,1918 @@ + + + + 00000nam a2200000 a 4500 + CG_562_2024-05-07T23:29:17.403Z + DE-101 + + (AT-OBV)990000009120203331 + 8\p + + + (DE-101)941881032 + 7\p + + + (DE-602)b3kat_BV009790132 + 12\p + + + (DE-602)gbv_163902283 + 10\p + + + (DE-602)gbv_273136453 + 3\p + + + (DE-602)kobvindex_MMZa0062983 + 1\p + + + (DE-602)kobvindex_ZLB00048198 + 4\p + + + (DE-603)031154247 + 6\p + + + (DE-604)BV009790132 + 11\p + + + (DE-605)990067531130206441 + 5\p + + + (DE-627)163902283 + 9\p + + + (DE-627)273136453 + 2\p + + + 324.2403 + P471v + 20 + 11\p + 12\p + + + 324.2403 + 9\p + 10\p + + + 14 + dnb + 9\p + 10\p + + + 324.2403 + 20/oclc + 6\p + + + MG 11380 + (DE-625)122817:12126 + (DE-603)410454222 + rvk + 6\p + + + MF 3390 + (DE-625)122710: + rvk + 11\p + 12\p + + + MG 11380 + (DE-625)122817:12126 + rvk + 11\p + 12\p + + + MC 7200 + rvk + (DE-625)rvk/122417: + 9\p + 10\p + + + 89.10 + bkl + 8\p + 9\p + 10\p + + + 89.21 + bkl + 8\p + 9\p + 10\p + + + c 75.3 + ifzs + 11\p + 12\p + + + c 88 + ifzs + 11\p + 12\p + + + MF 3390 + rvk + 8\p + + + MG 11380 + rvk + 8\p + + + 16 + 14 + sdnb + 2\p + 3\p + 9\p + 10\p + + + 16 + 14 + 320 + DE-101 + sdnb + 7\p + + + 16 + 14 + DE-101 + sdnb + 6\p + + + POL 145 + sfb + 9\p + 10\p + + + POL 247 + sfb + 9\p + 10\p + + + POL 290 + sfb + 9\p + 10\p + + + POL 320 + sfb + 9\p + 10\p + + + 8.2b + DE-101 + sswd + 7\p + + + POL 450f + stub + 11\p + 12\p + + + Pfahl-Traughber, Armin + 1963- + Verf. + (DE-588)113340567 + (DE-627)561346119 + (DE-576)170020207 + aut + 9\p + 10\p + + + Pfahl-Traughber, Armin + 1963- + (DE-588)113340567 + aut + 8\p + + + Pfahl-Traughber, Armin + aut + 1\p + + + Conservatism + Europe + 8\p + + + Political + Europe + 8\p + + + Populism + Europe + 8\p + + + Radicalism + Europe + 8\p + + + Conservatism + Europe + 11\p + 12\p + + + Political parties + Europe + 11\p + 12\p + + + Populism + Europe + 11\p + 12\p + + + Radicalism + Europe + 11\p + 12\p + + + Radikalismus + 11\p + 12\p + + + (DE-588)7580659-9 + https://d-nb.info/gnd/7580659-9 + (DE-101)985576723 + Rechtspopulismus + rswk-swf + gnd + 7\p + + + Rechtspartei + (DE-588)4121567-9 + (DE-603)085393754 + gnd + 6\p + + + Rechtspopulismus + (DE-609)00915322 + (DE-588)7580659-9 + gnd + 4\p + + + Persönlichkeit, Politik + fes + 11\p + 12\p + + + Rechtspopulismus + fes + 11\p + 12\p + + + Rechtsradikale Partei + fes + 11\p + 12\p + + + Rechtsradikalismus + fes + 11\p + 12\p + + + Extremisme + gtt + 11\p + 12\p + + + Populisme + gtt + 11\p + 12\p + + + Rechts (politiek) + gtt + 11\p + 12\p + + + Rechtsradikalismus + (DE-588)4048829-9 + gnd + rswk-swf + 11\p + 12\p + + + Rechtspartei + (DE-588)4121567-9 + gnd + rswk-swf + 11\p + 12\p + + + Populismus + (DE-588)4129521-3 + gnd + rswk-swf + 11\p + 12\p + + + Europa + 11\p + 12\p + + + Europa + (DE-588)4015701-5 + gnd + rswk-swf + 11\p + 12\p + + + Westeuropa + fes + 11\p + 12\p + + + Conservatism + Europe + 9\p + 10\p + + + Political + Europe + 9\p + 10\p + + + Populism + Europe + 9\p + 10\p + + + Radicalism + Europe + 9\p + 10\p + + + Aufsatzsammlung + (DE-588)4143413-4 + (DE-627)105605727 + (DE-576)209726091 + gnd-content + 9\p + 10\p + 12\p + + + Europa + 5\p + + + Fremdenfeindlichkeit + 5\p + + + Neofaschismus + 5\p + + + Populismus + 5\p + + + Rechtsextremismus + 5\p + + + (DE-627) + 9\p + 10\p + + + AT-OBV + VBKSOM UBS + 8\p + + + DE-101 + DE-101 + 7\p + + + DE-604 + 11\p + 12\p + + + (DE-588)7580659-9 + https://d-nb.info/gnd/7580659-9 + (DE-101)985576723 + s + Rechtspopulismus + 7\p + + + g + (DE-588)4015701-5 + (DE-627)104289007 + (DE-576)208913092 + Europa + gnd + 9\p + 10\p + + + Europa + g + (DE-588)4015701-5 + 8\p + 11\p + 12\p + + + Rechtspartei + s + (DE-588)4121567-9 + 5\p + + + (DE-588)7580659-9 + s + Rechtspopulismus + DE-101 + 4\p + + + s + (DE-588)4048829-9 + (DE-627)104325372 + (DE-576)209079428 + Rechtsradikalismus + gnd + 9\p + 10\p + + + Rechtsradikalismus + (DE-588)4048829-9 + s + 11\p + 12\p + + + Rechtspartei + s + (DE-588)4121567-9 + 8\p + + + AT-OBV + VBKSOM + 8\p + + + DE-604 + 11\p + 12\p + + + Europa + g + (DE-588)4015701-5 + 8\p + + + Rechtspartei + (DE-588)4121567-9 + s + 11\p + 12\p + + + Rechtspopulismus + s + (DE-588)7580659-9 + 8\p + + + DE-604 + 11\p + 12\p + + + Europa + (DE-588)4015701-5 + g + 11\p + 12\p + + + Rechtsradikalismus + (DE-588)4048829-9 + s + 11\p + 12\p + + + Populismus + (DE-588)4129521-3 + s + 11\p + 12\p + + + DE-604 + 11\p + 12\p + + + Europa + (DE-588)4015701-5 + g + 11\p + 12\p + + + Rechtspartei + (DE-588)4121567-9 + s + 11\p + 12\p + + + (DE-588)113340567 + https://d-nb.info/gnd/113340567 + (DE-101)113340567 + Pfahl-Traughber, Armin + 1963- + Mitwirkender + ctb + gnd + 7\p + + + Pfahl-Traughber, Armin + 1963- + (DE-588)113340567 + (DE-627)561346119 + (DE-576)170020207 + oth + 2\p + 3\p + + + Pfahl-Traughber, Armin + 1963- + (DE-603)147739500 + (DE-588)113340567 + gnd + 6\p + + + (DE-588)113340567 + (DE-609)12271979 + Pfahl-Traughber, Armin + oth + 4\p + + + Pfahl-Traughber, Armin + 1963- + (DE-588)113340567 + oth + Sonstige + 5\p + 11\p + 12\p + + + + 00000nam a2200000 a 4500 + CG_563_2024-05-07T23:29:17.403Z + DE-101 + + (AT-OBV)990000009130203331 + 12\p + + + (DE-101)1201182301 + 5\p + + + (DE-101)880967455 + 25\p + + + (DE-101)930949900 + 29\p + + + (DE-602)b3kat_BV001783779 + 21\p + + + (DE-602)b3kat_BV025876512 + 14\p + + + (DE-602)b3kat_BV025929467 + 22\p + + + (DE-602)b3kat_BV026498335 + 15\p + + + (DE-602)gbv_273136526 + 28\p + + + (DE-602)kobvindex_JGB0006744 + 2\p + + + (DE-602)kobvindex_JMB00025300 + 4\p + + + (DE-602)kobvindex_MMZsil00229 + 1\p + + + (DE-602)kobvindex_WAN3799 + 3\p + + + (DE-602)kobvindex_ZBW05010611 + 8\p + + + (DE-602)kobvindex_ZLB04018337 + 13\p + + + (DE-603)008469253 + 10\p + + + (DE-604)BV001783779 + 20\p + + + (DE-604)BV004125935 + 17\p + + + (DE-605)990019247190206441 + 9\p + + + (DE-605)990063057720206441 + 6\p + + + (DE-605)990063668050206441 + 7\p + + + (DE-627)024695009 + 24\p + + + (DE-627)025616374 + 11\p + + + (DE-627)273136526 + 23\p + + + B + OCLC + 20 + 11\p + 24\p + + + 940.53/18/0922 + 22\p + 23\p + 28\p + + + B + 22\p + 23\p + 28\p + + + NQ 2360 + (DE-625)128255: + (DE-603)410912263 + rvk + 10\p + + + NY 4760 + (DE-625)132358: + (DE-603)410953458 + rvk + 10\p + + + BD 7100 + (DE-625)10250: + rvk + 20\p + 21\p + + + NQ 2360 + (DE-625)128255: + rvk + 14\p + 17\p + 20\p + 21\p + 22\p + + + NY 4760 + (DE-625)132358: + rvk + 17\p + 20\p + 21\p + + + NQ 2360 + rvk + (DE-625)rvk/128255: + 23\p + 28\p + + + NY 4760 + rvk + (DE-625)rvk/132358: + 23\p + 28\p + + + 11.20 + bkl + 12\p + + + 71.62 + bkl + 12\p + + + j 10.3 + ifzs + 20\p + 21\p + + + n 10.3 + ifzs + 20\p + 21\p + + + 330 + rpb + 6\p + + + BD 7100 + rvk + 12\p + + + NQ 2360 + rvk + 12\p + + + NY 4760 + rvk + 12\p + + + 63 + 64 + sdnb + 11\p + 23\p + 24\p + 28\p + + + 63 + 64 + 900 + 920 + 930 + 940 + 943 + 950 + 960 + 970 + 980 + 990 + DE-101 + sdnb + 25\p + 29\p + + + 63 + 64 + DE-101 + sdnb + 10\p + + + AHF II/4._3.3._5/__/_4 + sdnb + 20\p + 21\p + + + GESCH 432 (18 NIE) + sfb + 23\p + 24\p + 28\p + + + GESCH 432 (59) + sfb + 23\p + 24\p + 28\p + + + 8.1 + DE-101 + sswd + 25\p + 29\p + + + H.7.1 + 275 + 3\p + + + H.7.6 + 280 + 3\p + + + Jakob, Volker + (DE-609)04003298 + 13\p + + + Jakob, Volker + (DE-B1539)00012221 + aut + 4\p + + + Jakob, Volker + (DE-XXX)05004463 + 8\p + + + Jakob, Volker + aut + 1\p + 11\p + 12\p + + + Jakob, Volker + Verfasser + aut + 2\p + 5\p + 9\p + 14\p + 15\p + 20\p + 21\p + 22\p + + + Jakob, Volker + Verfasser*in + aut + 3\p + + + Anne Frank war nicht allein + niederländ. + 7\p + + + Niederlande + (DE-609)00000445 + (DE-588)4042203-3 + gnd + 13\p + + + Geschichte 1933-1945 + 11\p + 23\p + 24\p + 28\p + + + Geschichte 1933-1945 + (DE-609)00410185 + local + 13\p + + + Geschichte 1933-1945 + gnd + rswk-swf + 14\p + 15\p + 17\p + 20\p + 21\p + 22\p + 25\p + 29\p + + + Geschichte 1933-1945 + gnd + 10\p + + + Niederlande + 3\p + + + Holocaust/Judenvernichtung + 23\p + 24\p + 28\p + + + Netherlands + 23\p + 24\p + 28\p + + + Niederlande + 23\p + 24\p + 28\p + + + biography + 23\p + 24\p + 28\p + + + fehlt + 23\p + 28\p + + + persecution of Jews/Holocaust + 23\p + 24\p + 28\p + + + (DE-588)4015959-0 + https://d-nb.info/gnd/4015959-0 + (DE-101)040159590 + Exil + rswk-swf + gnd + 25\p + 29\p + + + (DE-588)4028808-0 + https://d-nb.info/gnd/4028808-0 + (DE-101)040288080 + Juden + rswk-swf + gnd + 25\p + 29\p + + + (DE-588)4070334-4 + https://d-nb.info/gnd/4070334-4 + (DE-101)040703347 + Deutsche + rswk-swf + gnd + 25\p + 29\p + + + Exil + (DE-588)4015959-0 + (DE-603)085037370 + gnd + 10\p + + + Exil + (DE-609)00100305 + (DE-588)4015959-0 + gnd + 13\p + + + Flüchtling + (DE-XXX)00006475 + (DE-588)4017604-6 + gnd + 8\p + + + Juden + (DE-588)4028808-0 + (DE-603)085075922 + gnd + 10\p + + + Juden + (DE-609)00050195 + (DE-588)4028808-0 + gnd + 13\p + + + Nationalsozialismus + (DE-XXX)00014010 + (DE-588)4041316-0 + gnd + 8\p + + + Deutsche + (DE-588)4070334-4 + (DE-603)085197807 + gnd + 10\p + + + Deutsche + (DE-609)00003224 + (DE-588)4070334-4 + gnd + 13\p + + + Judentum + fes + 20\p + 21\p + + + Judenverfolgung + fes + 20\p + 21\p + + + (DE-588)4015959-0 + (DE-627)106334611 + (DE-576)208914366 + Exil + gnd + 11\p + 24\p + + + (DE-588)4028808-0 + (DE-627)10430670X + (DE-576)208979565 + Juden + gnd + 11\p + 24\p + + + (DE-588)4070334-4 + (DE-627)104798858 + (DE-576)20918020X + Deutsche + gnd + 11\p + 24\p + + + Exil + (DE-588)4015959-0 + gnd + rswk-swf + 14\p + 15\p + 17\p + 20\p + 21\p + 22\p + + + Juden + (DE-588)4028808-0 + gnd + rswk-swf + 14\p + 15\p + 17\p + 20\p + 21\p + 22\p + + + Judenverfolgung + (DE-588)4028814-6 + gnd + rswk-swf + 14\p + 15\p + 17\p + 20\p + 21\p + 22\p + + + Deutsche + (DE-588)4070334-4 + gnd + rswk-swf + 14\p + 15\p + 17\p + 20\p + 21\p + 22\p + + + (DE-588)4042203-3 + https://d-nb.info/gnd/4042203-3 + (DE-101)040422038 + Niederlande + rswk-swf + gnd + 25\p + 29\p + + + (DE-588)4042203-3 + (DE-627)104400765 + (DE-576)209049030 + Niederlande + gnd + 11\p + 24\p + + + Juden + (DE-XXX)00027724 + (DE-588)4028808-0 + gnd + 8\p + + + Niederlande + (DE-588)4042203-3 + (DE-603)085116521 + gnd + 10\p + + + Niederlande + (DE-B1539)00000687 + (DE-588)4042203-3 + gnd + 4\p + + + Niederlande + (DE-XXX)00014208 + (DE-588)4042203-3 + gnd + 8\p + + + Niederlande + (DE-588)4042203-3 + gnd + rswk-swf + 14\p + 15\p + 17\p + 20\p + 21\p + 22\p + + + Niederlande + fes + 20\p + 21\p + + + (AFSB-G)Niederlande + 5\p + 25\p + + + (DE-588)4133254-4 + https://d-nb.info/gnd/4133254-4 + (DE-101)041332547 + Erlebnisbericht + gnd-content + 25\p + 29\p + + + (DE-588)1071854844 + https://d-nb.info/gnd/1071854844 + (DE-101)1071854844 + Fiktionale Darstellung + gnd-content + 5\p + + + Biografie + (DE-588)4006804-3 + (DE-627)104213493 + (DE-576)208867147 + gnd-content + 22\p + 23\p + 24\p + 28\p + + + Erlebnisbericht + (DE-588)4133254-4 + (DE-627)105681490 + (DE-576)209642378 + gnd-content + 11\p + 22\p + 23\p + 24\p + 28\p + + + (DE-588)4006804-3 + Biografie + gnd-content + 22\p + + + Erlebnisbericht + (DE-588)4133254-4 + gnd-content + 6\p + 7\p + 9\p + 14\p + 15\p + 17\p + 20\p + 21\p + 22\p + + + Erlebnisbericht + (DE-609)00002634 + local + 13\p + 22\p + 28\p + + + Biographischer Beitrag + (DE-XXX)00002722 + local + 8\p + 22\p + 28\p + + + Erlebnisbericht + rswk-swf + gnd + 25\p + 29\p + + + Erlebnisbericht + gnd + 10\p + + + Erlebnisbericht + 13\p + 22\p + 28\p + + + (DE-627) + 24\p + + + AT-OBV + UBI + 12\p + + + DE-101 + 23\p + 28\p + + + DE-101 + DE-101 + 25\p + 29\p + + + DE-604 + 14\p + 15\p + 18\p + 17\p + 20\p + 21\p + 22\p + + + (DE-588)4042203-3 + https://d-nb.info/gnd/4042203-3 + (DE-101)040422038 + g + Niederlande + 25\p + 29\p + + + g + (DE-588)4042203-3 + (DE-627)104400765 + (DE-576)209049030 + Niederlande + gnd + 23\p + 24\p + 28\p + + + Niederlande + g + (DE-588)4042203-3 + 6\p + 7\p + 9\p + 12\p + 13\p + 14\p + 15\p + 17\p + 20\p + 21\p + 22\p + + + (DE-588)4015959-0 + https://d-nb.info/gnd/4015959-0 + (DE-101)040159590 + s + Exil + 25\p + 29\p + + + s + (DE-588)4006020-2 + (DE-627)104162597 + (DE-576)208864016 + Besetzung + gnd + 24\p + + + s + (DE-588)4015959-0 + (DE-627)106334611 + (DE-576)208914366 + Exil + gnd + 23\p + 28\p + + + Exil + s + (DE-588)4015959-0 + 6\p + 7\p + 9\p + 13\p + 14\p + 15\p + 17\p + 20\p + 21\p + 22\p + + + Juden + s + (DE-588)4028808-0 + 12\p + + + (DE-588)4070334-4 + https://d-nb.info/gnd/4070334-4 + (DE-101)040703347 + s + Deutsche + 25\p + 29\p + + + s + (DE-588)4028814-6 + (DE-627)104427108 + (DE-576)208979603 + Judenverfolgung + gnd + 24\p + + + s + (DE-588)4070334-4 + (DE-627)104798858 + (DE-576)20918020X + Deutsche + gnd + 23\p + 28\p + + + Juden + s + (DE-588)4028808-0 + 9\p + 14\p + 20\p + 21\p + 22\p + + + Deutsche + s + (DE-588)4070334-4 + 6\p + 7\p + 13\p + 15\p + 17\p + + + Geschichte 1933-1945 + z + 12\p + + + (DE-588)4028808-0 + https://d-nb.info/gnd/4028808-0 + (DE-101)040288080 + s + Juden + 25\p + 29\p + + + s + (DE-588)4028808-0 + (DE-627)10430670X + (DE-576)208979565 + Juden + gnd + 23\p + 28\p + + + Juden + s + (DE-588)4028808-0 + 6\p + 7\p + 13\p + 15\p + 17\p + + + Deutsche + s + (DE-588)4070334-4 + 9\p + 14\p + 20\p + 21\p + 22\p + + + z + gnd + Geschichte 1940-1944 + 24\p + + + z + gnd + Geschichte 1933-1945 + 23\p + 28\p + + + s + Geschichte 1933-1945 + 13\p + + + Geschichte 1933-1945 + z + 6\p + 7\p + 9\p + 14\p + 15\p + 17\p + 20\p + 21\p + 22\p + 25\p + 29\p + + + s + Erlebnisbericht + DE-101 + 13\p + + + f + Erlebnisbericht + 25\p + 29\p + + + (DE-627) + 23\p + 28\p + + + AT-OBV + UBI + 12\p + + + DE-604 + 14\p + 16\p + 15\p + 19\p + 17\p + 20\p + 21\p + 22\p + + + s + (DE-588)4006020-2 + (DE-627)104162597 + (DE-576)208864016 + Besetzung + gnd + 23\p + 28\p + + + Niederlande + g + (DE-588)4042203-3 + 9\p + 12\p + 14\p + 15\p + 17\p + 20\p + 21\p + 22\p + + + s + (DE-588)4028814-6 + (DE-627)104427108 + (DE-576)208979603 + Judenverfolgung + gnd + 23\p + 28\p + + + Judenverfolgung + s + (DE-588)4028814-6 + 9\p + 12\p + 14\p + 15\p + 17\p + 20\p + 21\p + 22\p + + + z + gnd + Geschichte 1940-1944 + 23\p + 28\p + + + Geschichte 1933-1945 + z + 12\p + + + AT-OBV + UBI + 12\p + + + Niederlande + g + (DE-588)4042203-3 + 12\p + + + Antisemitismus + s + (DE-588)4002333-3 + 12\p + + + Geschichte 1933-1945 + z + 12\p + + + Frank, Anne + 1929-1945 + (DE-588)118534734 + (DE-627)079334962 + (DE-576)160991358 + oth + 24\p + + + Voort, Annet van der + 1950- + (DE-588)123961742 + (DE-627)085550329 + (DE-576)215107667 + oth + 24\p + + + (DE-588)123961742 + (DE-609)00088971 + Voort, Annet van der + aut + 13\p + + + Voort, Annet <<van der>> + 1950- + (DE-588)123961742 + aut + Verfasser + 9\p + + + Voort, Annet van der + 1950- + Verfasser + (DE-588)123961742 + aut + 14\p + 15\p + + + Jakob, Volker + Hrsg. + edt + (DE-603)145133761 + 10\p + + + (DE-B1539)00012222 + Voort, Annet van der + aut + 4\p + + + (DE-XXX)05004464 + Voort, Annet van der + aut + 8\p + + + Voort, Annet <<van der>> + aut + 12\p + + + Voort, Annet van der + aut + 11\p + + + Voort, Annet van der + Verfasser + aut + 22\p + + + Voort, Annet van der + Verfasser*in + aut + 3\p + + + Voort,Annet van der + Verfasser + aut + 5\p + + + van der Voort, Annet + aut + 1\p + + + Jakob, Volker + edt + Herausgeber + 6\p + 7\p + 27\p + 25\p + 30\p + 29\p + + + Jakob, Volker + Hrsg. + edt + 23\p + 28\p + + + Van der Voort, Annet + Hrsg. + edt + 2\p + + + Jakob, Volker + oth + 24\p + + + Jakob, Volker + Sonstige + oth + 17\p + + + Voort, Annet van der + oth + 23\p + 28\p + + + Brouwer, Carlien + Übersetzer + trl + 5\p + 31\p + 29\p + + + Böttner, Annegret + Übersetzer + trl + 5\p + + + Anne Frank war nicht allein + niederländ. + 29\p + + + 16\p + cgwrk + 20201028 + DE-101 + https://d-nb.info/provenance/plan#cgwrk + 18\p + 19\p + + + 27\p + dnb + 20200613 + DE-101 + https://d-nb.info/provenance/plan#dnb + 30\p + 31\p + + + 26\p + ka003lc + 20191012 + DE-101 + https://d-nb.info/provenance/plan#ka003lc + + + diff --git a/src/test/resources/cg/output-es-bulk.ndjson b/src/test/resources/cg/output-es-bulk.ndjson new file mode 100644 index 000000000..82ad1c37d --- /dev/null +++ b/src/test/resources/cg/output-es-bulk.ndjson @@ -0,0 +1,4 @@ +{"index":{"_index":"cg","_type":"rvk"}} +{"rvk":["MG 11380","MF 3390","MG 11380","MC 7200","MF 3390","MG 11380"],"id":"990067531130206441"} +{"index":{"_index":"cg","_type":"rvk"}} +{"rvk":["NQ 2360","NY 4760","BD 7100","NQ 2360","NY 4760","NQ 2360","NY 4760","BD 7100","NQ 2360","NY 4760"],"id":"990019247190206441, 990063057720206441, 990063668050206441"} diff --git a/src/test/resources/cg/output.csv b/src/test/resources/cg/output.csv new file mode 100644 index 000000000..0503461d4 --- /dev/null +++ b/src/test/resources/cg/output.csv @@ -0,0 +1,4 @@ +"990067531130206441","MG 11380,MF 3390,MC 7200" +"990019247190206441","NQ 2360,NY 4760,BD 7100" +"990063057720206441","NQ 2360,NY 4760,BD 7100" +"990063668050206441","NQ 2360,NY 4760,BD 7100"