diff --git a/CHANGES.rst b/CHANGES.rst index d422eba9..d6979832 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,31 @@ Changelog ========= +.. _changes-1_5_0: + +1.5.0 (2024-10-11) +~~~~~~~~~~~~~~~~~~ + +New features +------------ + ++ `#160`_, `#161`_, `#163`_: Add class attributes to + :class:`icat.ingest.IngestReader` to make some prescribed values in + the transformation to ICAT data file format configurable. + +Bug fixes and minor changes +--------------------------- + ++ `#162`_: Minor updates in the tool chain ++ `#164`_: Fix `dumpinvestigation.py` example script + +.. _#160: https://github.com/icatproject/python-icat/issues/160 +.. _#161: https://github.com/icatproject/python-icat/pull/161 +.. _#162: https://github.com/icatproject/python-icat/pull/162 +.. _#163: https://github.com/icatproject/python-icat/pull/163 +.. _#164: https://github.com/icatproject/python-icat/pull/164 + + .. _changes-1_4_0: 1.4.0 (2024-08-30) diff --git a/doc/examples/dumpinvestigation.py b/doc/examples/dumpinvestigation.py index d415d0c9..6dcbe574 100644 --- a/doc/examples/dumpinvestigation.py +++ b/doc/examples/dumpinvestigation.py @@ -17,130 +17,192 @@ logging.basicConfig(level=logging.INFO) -formats = icat.dumpfile.Backends.keys() -config = icat.config.Config() -config.add_variable('file', ("-o", "--outputfile"), - dict(help="output file name or '-' for stdout"), - default='-') -config.add_variable('format', ("-f", "--format"), - dict(help="output file format", choices=formats), - default='YAML') -config.add_variable('investigation', ("investigation",), - dict(help="name and optionally visit id " - "(separated by a colon) of the investigation")) -client, conf = config.getconfig() - -if client.apiversion < '4.4': - raise RuntimeError("Sorry, ICAT version %s is too old, need 4.4.0 or newer." - % client.apiversion) -client.login(conf.auth, conf.credentials) - - # ------------------------------------------------------------ # helper # ------------------------------------------------------------ -def getinvestigation(invid): +def get_investigation_id(client, invid): """Search the investigation id from name and optionally visitid.""" + query = Query(client, "Investigation", attributes=["id"]) l = invid.split(':') - if len(l) == 1: - # No colon, invid == name - searchexp = "Investigation.id [name='%s']" % tuple(l) - elif len(l) == 2: + query.addConditions({"name": "= '%s'" % l[0]}) + if len(l) == 2: # one colon, invid == name:visitId - searchexp = "Investigation.id [name='%s' AND visitId='%s']" % tuple(l) + query.addConditions({"visitId": "= '%s'" % l[1]}) else: # too many colons raise RuntimeError("Invalid investigation identifier '%s'" % invid) - return (client.assertedSearch(searchexp)[0]) + return client.assertedSearch(query)[0] -def mergesearch(sexps): +def mergesearch(client, queries): """Do many searches and merge the results in one list excluding dups.""" objs = set() - for se in sexps: + for se in queries: objs.update(client.search(se)) return list(objs) +# The following helper functions control what ICAT objects are written +# in each of the dumpfile chunks. There are three options for the +# items in each list: either queries expressed as Query objects, or +# queries expressed as string expressions, or lists of objects. In +# the first two cases, the search results will be written, in the last +# case, the objects are written as provided. + +def get_auth_types(client, invid): + """Users and groups related to the investigation. + """ + # We need the users related to our investigation via + # InvestigationUser, the users member of one of the groups related + # via InvestigationGroup, and the instrument scientists from the + # instruments related to the investigations. These are + # independent searches, but the results are likely to overlap. So + # we need to search and merge results first. + usersearch = [ + Query(client, "User", conditions={ + "investigationUsers." + "investigation.id": "= %d" % invid, + }), + Query(client, "User", conditions={ + "userGroups.grouping.investigationGroups." + "investigation.id": "= %d" % invid, + }), + Query(client, "User", conditions={ + "instrumentScientists.instrument.investigationInstruments." + "investigation.id": "= %d" % invid, + }), + ] + return [ + mergesearch(client, usersearch), + Query(client, "Grouping", conditions={ + "investigationGroups.investigation.id": "= %d" % invid, + }, includes=["userGroups.user"], aggregate="DISTINCT", order=True), + ] + +def get_static_types(client, invid): + """Static stuff that exists independently of the investigation in ICAT. + """ + # Similar situation for ParameterType as for User: need to merge + # ParameterType used for InvestigationParameter, SampleParameter, + # DatasetParameter, and DatafileParameter. + ptsearch = [ + Query(client, "ParameterType", conditions={ + "investigationParameters." + "investigation.id": "= %d" % invid, + }, includes=["facility", "permissibleStringValues"]), + Query(client, "ParameterType", conditions={ + "sampleParameters.sample." + "investigation.id": "= %d" % invid, + }, includes=["facility", "permissibleStringValues"]), + Query(client, "ParameterType", conditions={ + "datasetParameters.dataset." + "investigation.id": "= %d" % invid, + }, includes=["facility", "permissibleStringValues"]), + Query(client, "ParameterType", conditions={ + "datafileParameters.datafile.dataset." + "investigation.id": "= %d" % invid, + }, includes=["facility", "permissibleStringValues"]), + ] + return [ + Query(client, "Facility", + conditions={ + "investigations.id": "= %d" % invid, + }, + order=True), + Query(client, "Instrument", + conditions={ + "investigationInstruments.investigation.id": "= %d" % invid, + }, + includes=["facility", "instrumentScientists.user"], + order=True), + mergesearch(client, ptsearch), + Query(client, "InvestigationType", + conditions={ + "investigations.id": "= %d" % invid, + }, + includes=["facility"], + order=True), + Query(client, "SampleType", + conditions={ + "samples.investigation.id": "= %d" % invid, + }, + includes=["facility"], + aggregate="DISTINCT", + order=True), + Query(client, "DatasetType", + conditions={ + "datasets.investigation.id": "= %d" % invid, + }, + includes=["facility"], + aggregate="DISTINCT", + order=True), + Query(client, "DatafileFormat", + conditions={ + "datafiles.dataset.investigation.id": "= %d" % invid, + }, + includes=["facility"], + aggregate="DISTINCT", + order=True), + ] + +def get_investigation_types(client, invid): + """The investigation and all the stuff that belongs to it. + """ + # The set of objects to be included in the Investigation. + inv_includes = { + "facility", "type.facility", "investigationInstruments", + "investigationInstruments.instrument.facility", "shifts", + "keywords", "publications", "investigationUsers", + "investigationUsers.user", "investigationGroups", + "investigationGroups.grouping", "parameters", + "parameters.type.facility" + } + return [ + Query(client, "Investigation", + conditions={"id":"in (%d)" % invid}, + includes=inv_includes), + Query(client, "Sample", + conditions={"investigation.id":"= %d" % invid}, + includes={"investigation", "type.facility", + "parameters", "parameters.type.facility"}, + order=True), + Query(client, "Dataset", + conditions={"investigation.id":"= %d" % invid}, + includes={"investigation", "type.facility", "sample", + "parameters", "parameters.type.facility"}, + order=True), + Query(client, "Datafile", + conditions={"dataset.investigation.id":"= %d" % invid}, + includes={"dataset", "datafileFormat.facility", + "parameters", "parameters.type.facility"}, + order=True) + ] # ------------------------------------------------------------ # Do it # ------------------------------------------------------------ -invid = getinvestigation(conf.investigation) - +formats = icat.dumpfile.Backends.keys() +config = icat.config.Config() +config.add_variable('file', ("-o", "--outputfile"), + dict(help="output file name or '-' for stdout"), + default='-') +config.add_variable('format', ("-f", "--format"), + dict(help="output file format", choices=formats), + default='YAML') +config.add_variable('investigation', ("investigation",), + dict(help="name and optionally visit id " + "(separated by a colon) of the investigation")) +client, conf = config.getconfig() -# We need the users related to our investigation via -# InvestigationUser, the users member of one of the groups related via -# InvestigationGroup, and the instrument scientists from the -# instruments related to the investigations. These are independent -# searches, but the results are likely to overlap. So we need to -# search and merge results first. Similar situation for ParameterType. -usersearch = [("User <-> InvestigationUser <-> Investigation [id=%d]"), - ("User <-> UserGroup <-> Grouping <-> InvestigationGroup " - "<-> Investigation [id=%d]"), - ("User <-> InstrumentScientist <-> Instrument " - "<-> InvestigationInstrument <-> Investigation [id=%d]")] -ptsearch = [("ParameterType INCLUDE Facility, PermissibleStringValue " - "<-> InvestigationParameter <-> Investigation [id=%d]"), - ("ParameterType INCLUDE Facility, PermissibleStringValue " - "<-> SampleParameter <-> Sample <-> Investigation [id=%d]"), - ("ParameterType INCLUDE Facility, PermissibleStringValue " - "<-> DatasetParameter <-> Dataset <-> Investigation [id=%d]"), - ("ParameterType INCLUDE Facility, PermissibleStringValue " - "<-> DatafileParameter <-> Datafile <-> Dataset " - "<-> Investigation [id=%d]"), ] +if client.apiversion < '4.4': + raise RuntimeError("Sorry, ICAT version %s is too old, need 4.4.0 or newer." + % client.apiversion) +client.login(conf.auth, conf.credentials) -# The set of objects to be included in the Investigation. -inv_includes = { "facility", "type.facility", "investigationInstruments", - "investigationInstruments.instrument.facility", "shifts", - "keywords", "publications", "investigationUsers", - "investigationUsers.user", "investigationGroups", - "investigationGroups.grouping", "parameters", - "parameters.type.facility" } -# The following lists control what ICAT objects are written in each of -# the dumpfile chunks. There are three options for the items in each -# list: either queries expressed as Query objects, or queries -# expressed as string expressions, or lists of objects. In the first -# two cases, the seacrh results will be written, in the last case, the -# objects are written as provided. We assume that there is only one -# relevant facility, e.g. that all objects related to the -# investigation are related to the same facility. We may thus ommit -# the facility from the ORDER BY clauses. -authtypes = [mergesearch([s % invid for s in usersearch]), - ("Grouping ORDER BY name INCLUDE UserGroup, User " - "<-> InvestigationGroup <-> Investigation [id=%d]" % invid)] -statictypes = [("Facility ORDER BY name"), - ("Instrument ORDER BY name " - "INCLUDE Facility, InstrumentScientist, User " - "<-> InvestigationInstrument <-> Investigation [id=%d]" - % invid), - (mergesearch([s % invid for s in ptsearch])), - ("InvestigationType ORDER BY name INCLUDE Facility " - "<-> Investigation [id=%d]" % invid), - ("SampleType ORDER BY name, molecularFormula INCLUDE Facility " - "<-> Sample <-> Investigation [id=%d]" % invid), - ("DatasetType ORDER BY name INCLUDE Facility " - "<-> Dataset <-> Investigation [id=%d]" % invid), - ("DatafileFormat ORDER BY name, version INCLUDE Facility " - "<-> Datafile <-> Dataset <-> Investigation [id=%d]" % invid)] -investtypes = [Query(client, "Investigation", - conditions={"id":"in (%d)" % invid}, - includes=inv_includes), - Query(client, "Sample", order=["name"], - conditions={"investigation.id":"= %d" % invid}, - includes={"investigation", "type.facility", - "parameters", "parameters.type.facility"}), - Query(client, "Dataset", order=["name"], - conditions={"investigation.id":"= %d" % invid}, - includes={"investigation", "type.facility", "sample", - "parameters", "parameters.type.facility"}), - Query(client, "Datafile", order=["dataset.name", "name"], - conditions={"dataset.investigation.id":"= %d" % invid}, - includes={"dataset", "datafileFormat.facility", - "parameters", "parameters.type.facility"})] +invid = get_investigation_id(client, conf.investigation) with open_dumpfile(client, conf.file, conf.format, 'w') as dumpfile: - dumpfile.writedata(authtypes) - dumpfile.writedata(statictypes) - dumpfile.writedata(investtypes) + dumpfile.writedata(get_auth_types(client, invid)) + dumpfile.writedata(get_static_types(client, invid)) + dumpfile.writedata(get_investigation_types(client, invid)) diff --git a/doc/examples/ingest.py b/doc/examples/ingest.py index 83a2333d..82880807 100644 --- a/doc/examples/ingest.py +++ b/doc/examples/ingest.py @@ -24,7 +24,7 @@ The script takes the name of an investigation as argument. The investigation MUST exist in ICAT beforehand and all datasets in the input directory MUST belong to this investigation. The script will -create tha datasets in ICAT, e.g. they MUST NOT exist in ICAT +create the datasets in ICAT, e.g. they MUST NOT exist in ICAT beforehand. The metadata input file may contain attributes and related objects (datasetInstrument, datasetTechnique, datasetParameter) for the datasets provided in the input directory. diff --git a/doc/src/conf.py b/doc/src/conf.py index bd8fdc6f..317e88ed 100644 --- a/doc/src/conf.py +++ b/doc/src/conf.py @@ -6,6 +6,7 @@ # full list see the documentation: # http://www.sphinx-doc.org/en/master/config +import os from pathlib import Path import sys @@ -135,6 +136,15 @@ def make_meta_rst(last_release): # # html_theme_options = {} +# Define the canonical URL if you are using a custom domain on Read the Docs +html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "") + +# Tell Jinja2 templates the build is running on Read the Docs +if os.environ.get("READTHEDOCS", "") == "True": + if "html_context" not in globals(): + html_context = {} + html_context["READTHEDOCS"] = True + # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". diff --git a/doc/src/ingest.rst b/doc/src/ingest.rst index a12eca44..8a33b08e 100644 --- a/doc/src/ingest.rst +++ b/doc/src/ingest.rst @@ -23,6 +23,14 @@ format of the input files may be customized to some extent by providing custom versions of XSD and XSLT files, see :ref:`ingest-customize` below. +Some attributes and relations of the ``Dataset`` objects are +prescribed during the transformation into ICAT data file format, +namely the ``complete`` attribute and the name of the ``DatasetType`` +to relate them to. The prescribed values are set in class attributes +:attr:`~icat.ingest.IngestReader.Dataset_complete` and +:attr:`~icat.ingest.IngestReader.DatasetType_name` respectively. They +may be customized by overriding these class attributes. + The ``Dataset`` objects in the input will not be created by :class:`~icat.ingest.IngestReader`, because it is assumed that a separate workflow in the caller will copy the content of datafiles to diff --git a/etc/ingest.xslt b/etc/ingest.xslt index ad14d715..d13487fe 100644 --- a/etc/ingest.xslt +++ b/etc/ingest.xslt @@ -23,14 +23,22 @@ - false + + + - + + + + + diff --git a/setup.py b/setup.py index adf1bbf8..82a24711 100755 --- a/setup.py +++ b/setup.py @@ -11,6 +11,7 @@ from setuptools import setup import setuptools.command.build_py import distutils.command.sdist +import distutils.dist from distutils import log from pathlib import Path import string @@ -33,6 +34,15 @@ docstring = __doc__ +# Enforcing of PEP 625 has been added in setuptools 69.3.0. We don't +# want this, we want to keep control on the name of the sdist +# ourselves. Disable it. +def _fixed_get_fullname(self): + return "%s-%s" % (self.get_name(), self.get_version()) + +distutils.dist.DistributionMetadata.get_fullname = _fixed_get_fullname + + class meta(setuptools.Command): description = "generate meta files" diff --git a/src/icat/ingest.py b/src/icat/ingest.py index 55a10b5a..9504afc0 100644 --- a/src/icat/ingest.py +++ b/src/icat/ingest.py @@ -94,6 +94,21 @@ class IngestReader(XMLDumpFileReader): .. versionadded:: 1.3.0 """ + Dataset_complete = "false" + """Value to prescribe in the `complete` attribute of datasets. + + .. note:: + The value for this class attribute is subject to change in + version 2.0. You might want to override it in order to pin it + to a value that is suitable for you. + + .. versionadded:: 1.5.0 + """ + DatasetType_name = "raw" + """Name of the `DatasetType` to relate datasets to. + + .. versionadded:: 1.5.0 + """ def __init__(self, client, metadata, investigation): self.investigation = investigation @@ -188,6 +203,20 @@ def get_environment(self, client): Subclasses may override this method to control the attributes set in the environment. + .. note:: + If you override this method, it is advisable to call the + inherited method from the parent class and augment the + result. This avoids inadvertently dropping environment + settings added in future versions. E.g. do something + like the following in your subclass: + + .. code-block:: python + + def get_environment(self, client): + env = super().get_environment(client) + env['mykey'] = 'value' + return env + :param client: the client object being used by this IngestReader. :type client: :class:`icat.client.Client` @@ -196,7 +225,12 @@ def get_environment(self, client): .. versionadded:: 1.3.0 """ - return dict(icat_version=str(client.apiversion)) + env = dict( + icat_version=str(client.apiversion), + dataset_complete=self.Dataset_complete, + datasettype_name=self.DatasetType_name, + ) + return env def add_environment(self, client, ingest_data): """Inject environment information into input data. diff --git a/tests/data/ingest-env.xslt b/tests/data/ingest-env.xslt index 8e0eb4e7..26b858d5 100644 --- a/tests/data/ingest-env.xslt +++ b/tests/data/ingest-env.xslt @@ -18,7 +18,9 @@ - ingest-env.xslt + + + diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py index baa0c73c..480a877a 100644 --- a/tests/test_06_ingest.py +++ b/tests/test_06_ingest.py @@ -17,6 +17,13 @@ logger = logging.getLogger(__name__) +# There seem to be a bug in older icat.server versions when searching +# for a single boolean attribute in a query. +skip_single_boolean = icat_version < "4.11.0" + +def print_xml(root): + print('\n', etree.tostring(root, pretty_print=True).decode(), sep='') + def get_test_investigation(client): query = Query(client, "Investigation", conditions={ "name": "= '12100409-ST'", @@ -66,8 +73,24 @@ def schemadir(monkeypatch): monkeypatch.setattr(IngestReader, "SchemaDir", testdatadir) +class EnvironmentIngestReader(IngestReader): + """Modified version of IngestReader + - Allow custom environment settings to be included. + - Capture the ingest data after injection of the environment in an + attribute. + """ + _add_env = dict() + def get_environment(self, client): + env = super().get_environment(client) + env.update(self._add_env) + return env + def add_environment(self, client, ingest_data): + super().add_environment(client, ingest_data) + self._ingest_data = ingest_data + + class MyIngestReader(IngestReader): - """Testting a customized IngestReader + """Testing a customized IngestReader """ XSD_Map = { ('icatingest', '1.0'): "ingest-10.xsd", @@ -92,6 +115,11 @@ class MyIngestReader(IngestReader): metadata = gettestdata("metadata-4.4-inl.xml"), checks = { "testingest_inl_1": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + (("SELECT t.name FROM DatasetType t JOIN t.datasets AS ds " + "WHERE ds.id = %d"), + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 2.7 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -108,6 +136,11 @@ class MyIngestReader(IngestReader): 2.74103), ], "testingest_inl_2": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + (("SELECT t.name FROM DatasetType t JOIN t.datasets AS ds " + "WHERE ds.id = %d"), + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 5.1 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -131,6 +164,11 @@ class MyIngestReader(IngestReader): metadata = gettestdata("metadata-5.0-inl.xml"), checks = { "testingest_inl5_1": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + (("SELECT t.name FROM DatasetType t JOIN t.datasets AS ds " + "WHERE ds.id = %d"), + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 2.7 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -155,6 +193,11 @@ class MyIngestReader(IngestReader): 2.74103), ], "testingest_inl5_2": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + (("SELECT t.name FROM DatasetType t JOIN t.datasets AS ds " + "WHERE ds.id = %d"), + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 5.1 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -189,6 +232,11 @@ class MyIngestReader(IngestReader): metadata = gettestdata("metadata-4.4-sep.xml"), checks = { "testingest_sep_1": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + (("SELECT t.name FROM DatasetType t JOIN t.datasets AS ds " + "WHERE ds.id = %d"), + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 2.7 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -205,6 +253,11 @@ class MyIngestReader(IngestReader): 2.74103), ], "testingest_sep_2": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + (("SELECT t.name FROM DatasetType t JOIN t.datasets AS ds " + "WHERE ds.id = %d"), + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 5.1 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -228,6 +281,11 @@ class MyIngestReader(IngestReader): metadata = gettestdata("metadata-5.0-sep.xml"), checks = { "testingest_sep5_1": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + (("SELECT t.name FROM DatasetType t JOIN t.datasets AS ds " + "WHERE ds.id = %d"), + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 2.7 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -252,6 +310,11 @@ class MyIngestReader(IngestReader): 2.74103), ], "testingest_sep5_2": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + (("SELECT t.name FROM DatasetType t JOIN t.datasets AS ds " + "WHERE ds.id = %d"), + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "Dy01Cp02 at 5.1 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -287,6 +350,11 @@ class MyIngestReader(IngestReader): metadata = gettestdata("metadata-sample.xml"), checks = { "testingest_sample_1": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + (("SELECT t.name FROM DatasetType t JOIN t.datasets AS ds " + "WHERE ds.id = %d"), + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "ab3465 at 2.7 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -301,6 +369,11 @@ class MyIngestReader(IngestReader): "ab3465"), ], "testingest_sample_2": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + (("SELECT t.name FROM DatasetType t JOIN t.datasets AS ds " + "WHERE ds.id = %d"), + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "ab3465 at 5.1 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -315,6 +388,11 @@ class MyIngestReader(IngestReader): "ab3465"), ], "testingest_sample_3": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + (("SELECT t.name FROM DatasetType t JOIN t.datasets AS ds " + "WHERE ds.id = %d"), + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "ab3466 at 2.7 K"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -329,6 +407,11 @@ class MyIngestReader(IngestReader): "ab3466"), ], "testingest_sample_4": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + False), + (("SELECT t.name FROM DatasetType t JOIN t.datasets AS ds " + "WHERE ds.id = %d"), + "raw"), ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", "reference"), ("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d", @@ -354,7 +437,9 @@ def test_ingest_schema(client, investigation, schemadir, case): datasets = [] for name in case.data: datasets.append(client.new("Dataset", name=name)) - reader = IngestReader(client, case.metadata, investigation) + reader = EnvironmentIngestReader(client, case.metadata, investigation) + print_xml(reader._ingest_data) + print_xml(reader.infile) with get_icatdata_schema().open("rb") as f: schema = etree.XMLSchema(etree.parse(f)) schema.assertValid(reader.infile) @@ -378,6 +463,8 @@ def test_ingest(client, investigation, samples, schemadir, case): }) ds = client.assertedSearch(query)[0] for query, res in case.checks[name]: + if skip_single_boolean and isinstance(res, bool): + continue assert client.assertedSearch(query % ds.id)[0] == res io_metadata = NamedBytesIO(""" @@ -439,6 +526,8 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case): }) ds = client.assertedSearch(query)[0] for query, res in case.checks[name]: + if skip_single_boolean and isinstance(res, bool): + continue assert client.assertedSearch(query % ds.id)[0] == res @@ -630,6 +719,65 @@ def test_ingest_error_searcherr(client, investigation, schemadir, case): logger.info("Raised %s: %s", exc.type.__name__, exc.value) +classattr_metadata = NamedBytesIO(""" + + + 2024-10-11T10:51:26+02:00 + metadata-writer 0.27a + + + + testingest_classattr_1 + Auxiliary data + 2022-02-03T15:40:12+01:00 + 2022-02-03T17:04:22+01:00 + + + +""".encode("utf8"), "classattr_metadata") +classattr_cases = [ + Case( + data = ["testingest_classattr_1"], + metadata = classattr_metadata, + checks = { + "testingest_classattr_1": [ + ("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d", + True), + (("SELECT t.name FROM DatasetType t JOIN t.datasets AS ds " + "WHERE ds.id = %d"), + "other"), + ], + }, + marks = (), + ), +] +@pytest.mark.parametrize("case", [ + pytest.param(c, id=c.metadata.name, marks=c.marks) for c in classattr_cases +]) +@pytest.mark.skipif(skip_single_boolean, reason="Bug in icat.server") +def test_ingest_classattr(monkeypatch, client, investigation, schemadir, case): + """Test overriding prescribed values set in IngestReader class attributes. + """ + monkeypatch.setattr(IngestReader, "Dataset_complete", "true") + monkeypatch.setattr(IngestReader, "DatasetType_name", "other") + datasets = [] + for name in case.data: + datasets.append(client.new("Dataset", name=name)) + reader = IngestReader(client, case.metadata, investigation) + reader.ingest(datasets, dry_run=True, update_ds=True) + for ds in datasets: + ds.create() + reader.ingest(datasets) + for name in case.checks.keys(): + query = Query(client, "Dataset", conditions={ + "name": "= '%s'" % name, + "investigation.id": "= %d" % investigation.id, + }) + ds = client.assertedSearch(query)[0] + for query, res in case.checks[name]: + assert client.assertedSearch(query % ds.id)[0] == res + + customcases = [ Case( data = ["testingest_custom_icatingest_1"], @@ -703,6 +851,8 @@ def test_custom_ingest(client, investigation, samples, schemadir, case): }) ds = client.assertedSearch(query)[0] for query, res in case.checks[name]: + if skip_single_boolean and isinstance(res, bool): + continue assert client.assertedSearch(query % ds.id)[0] == res @@ -720,22 +870,30 @@ def test_custom_ingest(client, investigation, samples, schemadir, case): def test_ingest_env(monkeypatch, client, investigation, schemadir, case): """Test using the _environment element. - Applying a custom XSLT that extracts an attribute from the - _environment element that is injected by IngestReader into the - input data and puts that values into the head element of the - transformed input. This is to test that adding the _environment - element works and it is in principle possible to make use of the - values in the XSLT. + Add a custom attribute to the _environment that is injected by + IngestReader into the input data. Apply a custom XSLT that + extracts attributes from the _environment element and puts the + values into the head element of the transformed input. This is to + test that adding the _environment element works and it is in + principle possible to make use of the values in the XSLT. """ - monkeypatch.setattr(IngestReader, + generator = "test_ingest_env (python-icat %s)" % icat.__version__ + monkeypatch.setattr(EnvironmentIngestReader, + "_add_env", dict(generator=generator)) + monkeypatch.setattr(EnvironmentIngestReader, "XSLT_Map", dict(icatingest="ingest-env.xslt")) datasets = [] for name in case.data: datasets.append(client.new("Dataset", name=name)) - reader = IngestReader(client, case.metadata, investigation) + reader = EnvironmentIngestReader(client, case.metadata, investigation) + print_xml(reader._ingest_data) + print_xml(reader.infile) with get_icatdata_schema().open("rb") as f: schema = etree.XMLSchema(etree.parse(f)) schema.assertValid(reader.infile) version_elem = reader.infile.xpath("/icatdata/head/apiversion") assert version_elem assert version_elem[0].text == str(client.apiversion) + generator_elem = reader.infile.xpath("/icatdata/head/generator") + assert generator_elem + assert generator_elem[0].text == generator