From 783715b8386604887c140872207f842a08982af8 Mon Sep 17 00:00:00 2001 From: jgaff Date: Thu, 16 May 2019 15:31:15 -0500 Subject: [PATCH] Add match_dois, search_by_dois, and change match_repositories to match_organizations Closes #39 --- .../5-Field-Specific_Helper_Functions.ipynb | 274 +++++++++++++----- mdf_forge/forge.py | 76 ++++- setup.py | 4 +- tests/test_forge.py | 43 ++- 4 files changed, 305 insertions(+), 92 deletions(-) diff --git a/docs/tutorials/5-Field-Specific_Helper_Functions.ipynb b/docs/tutorials/5-Field-Specific_Helper_Functions.ipynb index f501904..5b27731 100644 --- a/docs/tutorials/5-Field-Specific_Helper_Functions.ipynb +++ b/docs/tutorials/5-Field-Specific_Helper_Functions.ipynb @@ -50,7 +50,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -129,7 +129,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -201,7 +201,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 7, @@ -223,26 +223,28 @@ { "data": { "text/plain": [ - "{'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", - " 'filename': 'nist_xps_27469.json',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/nist_xps_db_v1/nist_xps_27469.json',\n", - " 'length': 1196,\n", + "{'cip': {'bv': '30535.3',\n", + " 'energy': '-0.37',\n", + " 'forcefield': 'Fe-P.eam.fs',\n", + " 'gv': '-1878841.7',\n", + " 'mpid': 'mp-130',\n", + " 'totenergy': '-17.804778'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", " 'mime_type': 'text/plain',\n", - " 'sha512': 'da4cac24fce125d061d6726a43439024dcb6be4ddf6664a93ff30c36755d0383ad47da76a4b9002dc754a3b2784c664a21432f292021a6c19c7d71f08940df3e',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/nist_xps_db_v1/nist_xps_27469.json'}],\n", - " 'material': {'composition': 'InP', 'elements': ['In', 'P']},\n", - " 'mdf': {'ingest_date': '2018-11-06T16:57:59.847843Z',\n", - " 'mdf_id': '5be1c8512ef3883312755ed3',\n", - " 'parent_id': '5be1c8172ef388331274efdf',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'P48', 'elements': ['P']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cd2ef3880b0f21369b',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 28404,\n", - " 'source_id': 'nist_xps_db_v1',\n", - " 'source_name': 'nist_xps_db',\n", - " 'version': 1},\n", - " 'nist_xps_db': {'binding_energy_ev': '186.3',\n", - " 'energy_uncertainty_ev': '',\n", - " 'notes': '',\n", - " 'temperature_k': ''}}" + " 'scroll_id': 202,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}}" ] }, "execution_count": 8, @@ -271,7 +273,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 9, @@ -294,26 +296,28 @@ { "data": { "text/plain": [ - "[{'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", - " 'filename': 'nist_xps_27079.json',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/nist_xps_db_v1/nist_xps_27079.json',\n", - " 'length': 1083,\n", + "[{'cip': {'bv': '59842.7',\n", + " 'energy': '-3.29',\n", + " 'forcefield': 'Fe-P.eam.fs',\n", + " 'gv': '-28195.7',\n", + " 'mpid': 'mp-778',\n", + " 'totenergy': '-237.05557'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", " 'mime_type': 'text/plain',\n", - " 'sha512': '2ffed70fbcc42c97119ced5905a51b651a86186586da5bc0b434f5904552d8482bfbdffead338aeab3e9db59bd3bcfbeb5a9e4e8d745736601c5d7beab84e0fe',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/nist_xps_db_v1/nist_xps_27079.json'}],\n", - " 'material': {'composition': 'ZnI2', 'elements': ['I', 'Zn']},\n", - " 'mdf': {'ingest_date': '2018-11-06T16:57:59.847843Z',\n", - " 'mdf_id': '5be1c8512ef3883312755ed4',\n", - " 'parent_id': '5be1c8172ef388331274efdf',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'P24 Fe48', 'elements': ['Fe', 'P']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cd2ef3880b0f2136a1',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 28405,\n", - " 'source_id': 'nist_xps_db_v1',\n", - " 'source_name': 'nist_xps_db',\n", - " 'version': 1},\n", - " 'nist_xps_db': {'binding_energy_ev': '1022.9',\n", - " 'energy_uncertainty_ev': '',\n", - " 'notes': '',\n", - " 'temperature_k': ''}}]" + " 'scroll_id': 208,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}}]" ] }, "execution_count": 10, @@ -345,7 +349,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 11, @@ -440,7 +444,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 13, @@ -535,7 +539,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 15, @@ -551,7 +555,7 @@ "cell_type": "code", "execution_count": 16, "metadata": { - "scrolled": false + "scrolled": true }, "outputs": [ { @@ -598,8 +602,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### match_repositories\n", - "`match_repositories()` matches values against the `\"mdf.repositories\"` field. It is equivalent to chaining `match_field(\"mdf.repositories\", value)` for each value." + "### match_organizations\n", + "`match_organizations()` matches values against the `\"mdf.organizations\"` field. It is equivalent to chaining `match_field(\"mdf.organizations\", value)` for each value." ] }, { @@ -608,7 +612,8 @@ "metadata": {}, "outputs": [], "source": [ - "mdf.match_repositories([\"NIST\", \"DOE\"], match_all=False)" + "# Organizations are currently migrating from the old \"repositories\" field, so some may not be set up correctly just yet.\n", + "# mdf.match_organizations([\"NIST\", \"DOE\"], match_all=False)" ] }, { @@ -617,21 +622,79 @@ "metadata": { "scrolled": true }, + "outputs": [], + "source": [ + "# res = mdf.search(limit=10)\n", + "# res[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### match_dois\n", + "`match_dois()` searches for datasets with one of the given Digital Object Identifiers. It is equivalent to chaining `match_field(\"dc.identifier.identifier\", doi)` for each doi." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mdf.match_dois(\"https://dx.doi.org/10.13011/M3B36G\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { "text/plain": [ - "{'files': [{'data_type': 'TIFF image data, big-endian, direntries=13, height=0, bps=8, PhotometricIntepretation=BlackIsZero, description=ImageJ=1.50i, width=0',\n", - " 'filename': 'eds mapping 2 - pure ti, 250 nm sio2, 2h, 800c - ti map.tif',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/Early_oxidation_behavior_of_Si-coated_titanium/Dataset/Titanium Oxidation/eds mapping 2 - pure ti, 250 nm sio2, 2h, 800c - ti map.tif',\n", - " 'length': 213607,\n", - " 'mime_type': 'image/tiff',\n", - " 'sha512': '8460d1b7f8543bd909245d0e3456e5e58207245e619172e8b0631f7d7da7a00e6af94d0f9fcaf47be8e09984ef96826e098172c2ac968fb25c237abd223f62f1',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/Early_oxidation_behavior_of_Si-coated_titanium/Dataset/Titanium Oxidation/eds mapping 2 - pure ti, 250 nm sio2, 2h, 800c - ti map.tif'}],\n", - " 'image': {'height': 393, 'megapixels': 0.213399, 'width': 543},\n", + "{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/',\n", + " 'link': 'https://www.globus.org/app/transfer?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/'},\n", + " 'dc': {'creators': [{'affiliations': ['University of Michigan'],\n", + " 'creatorName': 'Chou, Kathleen',\n", + " 'familyName': 'Chou',\n", + " 'givenName': 'Kathleen'},\n", + " {'affiliations': ['University of Michigan'],\n", + " 'creatorName': 'Chu, Peng-Wei',\n", + " 'familyName': 'Chu',\n", + " 'givenName': 'Peng-Wei'},\n", + " {'affiliations': ['University of Michigan'],\n", + " 'creatorName': 'Marquis, Emmanuelle',\n", + " 'familyName': 'Marquis',\n", + " 'givenName': 'Emmanuelle'}],\n", + " 'descriptions': [{'description': \"This dataset compiles images and data from oxidation studies of pure titanium coated with a pure Si coating at 800 degrees C that supplement submitted manuscript 'Early oxidation behavior of Si-coated titanium'. Please note, the meta-data provided may be updated over time. We reserve the right to update this data without notification. If you would like to be notified of changes, please email Kathleen Chou at kachou@umich.edu.\",\n", + " 'descriptionType': 'Other'}],\n", + " 'identifier': {'identifier': 'https://dx.doi.org/10.13011/M3B36G',\n", + " 'identifierType': 'DOI'},\n", + " 'publicationYear': '2018',\n", + " 'publisher': 'Materials Commons',\n", + " 'resourceType': {'resourceType': 'Dataset',\n", + " 'resourceTypeGeneral': 'Dataset'},\n", + " 'subjects': [{'subject': 'Titanium'},\n", + " {'subject': 'Oxidation'},\n", + " {'subject': 'Silicon coatings'},\n", + " {'subject': 'Transmission electron microscopy'}],\n", + " 'titles': [{'title': 'Early oxidation behavior of Si-coated titanium'}]},\n", " 'mdf': {'ingest_date': '2018-10-30T21:44:27.860096Z',\n", - " 'mdf_id': '5bd8d0bb2ef38836a62135d2',\n", - " 'parent_id': '5bd8d0bb2ef38836a62135d1',\n", + " 'mdf_id': '5bd8d0bb2ef38836a62135d1',\n", " 'repositories': ['MCPub',\n", " 'DOE',\n", " 'Department of Energy Software Innovation Center for Integrated Multi-Scale Modeling of Structural Metals',\n", @@ -639,20 +702,21 @@ " 'Materials Commons',\n", " 'Center for Predictive Integrated Structural Materials Science',\n", " 'PRISMS'],\n", - " 'resource_type': 'record',\n", - " 'scroll_id': 1,\n", + " 'resource_type': 'dataset',\n", + " 'scroll_id': 0,\n", " 'source_id': 'si_ti_oxidation_v1',\n", " 'source_name': 'si_ti_oxidation',\n", - " 'version': 1}}" + " 'version': 1},\n", + " 'services': {'mdf_search': 'This dataset was ingested to MDF Search.'}}" ] }, - "execution_count": 18, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "res = mdf.search(limit=10)\n", + "res = mdf.search()\n", "res[0]" ] }, @@ -673,7 +737,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, "metadata": { "scrolled": true }, @@ -713,7 +777,7 @@ " 'volume_pa': {'units': 'angstrom^3/atom', 'value': 12.3364}}}" ] }, - "execution_count": 19, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -728,12 +792,12 @@ "metadata": {}, "source": [ "### search_by_titles\n", - "`search_by_titles()` executes a search for the provided elements in the provided sources and returns the results." + "`search_by_titles()` executes a search for the provided titles and returns the results." ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 22, "metadata": { "scrolled": true }, @@ -785,7 +849,7 @@ " 'services': {'mdf_search': 'This dataset was ingested to MDF Search.'}}" ] }, - "execution_count": 20, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -795,6 +859,78 @@ "res[0]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### search_by_dois\n", + "`search_by_dois()` executes a search for the given Digital Object Identifiers and returns the results." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/',\n", + " 'link': 'https://www.globus.org/app/transfer?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/'},\n", + " 'dc': {'creators': [{'affiliations': ['University of Michigan'],\n", + " 'creatorName': 'Chou, Kathleen',\n", + " 'familyName': 'Chou',\n", + " 'givenName': 'Kathleen'},\n", + " {'affiliations': ['University of Michigan'],\n", + " 'creatorName': 'Chu, Peng-Wei',\n", + " 'familyName': 'Chu',\n", + " 'givenName': 'Peng-Wei'},\n", + " {'affiliations': ['University of Michigan'],\n", + " 'creatorName': 'Marquis, Emmanuelle',\n", + " 'familyName': 'Marquis',\n", + " 'givenName': 'Emmanuelle'}],\n", + " 'descriptions': [{'description': \"This dataset compiles images and data from oxidation studies of pure titanium coated with a pure Si coating at 800 degrees C that supplement submitted manuscript 'Early oxidation behavior of Si-coated titanium'. Please note, the meta-data provided may be updated over time. We reserve the right to update this data without notification. If you would like to be notified of changes, please email Kathleen Chou at kachou@umich.edu.\",\n", + " 'descriptionType': 'Other'}],\n", + " 'identifier': {'identifier': 'https://dx.doi.org/10.13011/M3B36G',\n", + " 'identifierType': 'DOI'},\n", + " 'publicationYear': '2018',\n", + " 'publisher': 'Materials Commons',\n", + " 'resourceType': {'resourceType': 'Dataset',\n", + " 'resourceTypeGeneral': 'Dataset'},\n", + " 'subjects': [{'subject': 'Titanium'},\n", + " {'subject': 'Oxidation'},\n", + " {'subject': 'Silicon coatings'},\n", + " {'subject': 'Transmission electron microscopy'}],\n", + " 'titles': [{'title': 'Early oxidation behavior of Si-coated titanium'}]},\n", + " 'mdf': {'ingest_date': '2018-10-30T21:44:27.860096Z',\n", + " 'mdf_id': '5bd8d0bb2ef38836a62135d1',\n", + " 'repositories': ['MCPub',\n", + " 'DOE',\n", + " 'Department of Energy Software Innovation Center for Integrated Multi-Scale Modeling of Structural Metals',\n", + " 'U.S. Department of Energy',\n", + " 'Materials Commons',\n", + " 'Center for Predictive Integrated Structural Materials Science',\n", + " 'PRISMS'],\n", + " 'resource_type': 'dataset',\n", + " 'scroll_id': 0,\n", + " 'source_id': 'si_ti_oxidation_v1',\n", + " 'source_name': 'si_ti_oxidation',\n", + " 'version': 1},\n", + " 'services': {'mdf_search': 'This dataset was ingested to MDF Search.'}}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res = mdf.search_by_dois(\"https://dx.doi.org/10.13011/M3B36G\")\n", + "res[0]" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -805,7 +941,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 24, "metadata": { "scrolled": true }, @@ -835,7 +971,7 @@ " 'version': 1}}" ] }, - "execution_count": 21, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } diff --git a/mdf_forge/forge.py b/mdf_forge/forge.py index ff2e536..b7b9bee 100644 --- a/mdf_forge/forge.py +++ b/mdf_forge/forge.py @@ -115,7 +115,7 @@ def match_source_names(self, source_names): return self def match_ids(self, mdf_ids): - """Match all the IDs in the given ``mdf_id`` list. + """Match the IDs in the given ``mdf_id`` list. Arguments: mdf_ids (str or list of str): The IDs to match. @@ -253,13 +253,12 @@ def match_resource_types(self, types): self.match_field(field="mdf.resource_type", value=rt, required=False, new_group=False) return self - def match_repositories(self, repositories, match_all=True): - """Match the given repositories. - Repositories are MDF-identified collections of datasets from a group - or organization. + def match_organizations(self, organizations, match_all=True): + """Match the given Organizations. + Organizations are MDF-registered groups that can apply rules to datasets. Arguments: - repositories (str or list of str): The repositories to match. + organizations (str or list of str): The organizations to match. match_all (bool): If ``True``, will add with ``AND``. If ``False``, will use ``OR``. **Default:** ``True``. @@ -267,18 +266,42 @@ def match_repositories(self, repositories, match_all=True): Returns: Forge: Self """ - # If no repos, nothing to match - if not repositories: + # If no orgs, nothing to match + if not organizations: return self - if isinstance(repositories, str): - repositories = [repositories] - # First repo should be in new group and required - self.match_field(field="mdf.repositories", value=repositories[0], + if isinstance(organizations, str): + organizations = [organizations] + # First org should be in new group and required + self.match_field(field="mdf.organizations", value=organizations[0], required=True, new_group=True) # Other elements should stay in that group - for repo in repositories[1:]: - self.match_field(field="mdf.repositories", value=repo, required=match_all, + for org in organizations[1:]: + self.match_field(field="mdf.organizations", value=org, required=match_all, new_group=False) + return self + + def match_dois(self, dois): + """Match the given Digital Object Identifiers. + + Arguments: + dois (str or list of str): DOIs to match and return. + + Returns: + Forge: self + """ + if not dois: + return self + if isinstance(dois, str): + dois = [dois] + # Sanitize DOIs - usually contain problem characters + # First doi should be in new group and required + self.match_field(field="dc.identifier.identifier", value=dois[0], + required=True, new_group=True) + # Other sources should stay in that group, and not be required + for doi in dois[1:]: + self.match_field(field="dc.identifier.identifier", value=doi, + required=False, new_group=False) + return self # *********************************************** # * Premade searches @@ -343,6 +366,31 @@ def search_by_titles(self, titles, index=None, limit=None, info=False): """ return self.match_titles(titles).search(limit=limit, info=info) + def search_by_dois(self, dois, index=None, limit=None, info=False): + """Execute a search for the given Digital Object Identifiers. + ``search_by_dois([x])`` is equivalent to ``match_dois([x]).search()`` + + Note: + This method will use terms from the current query, and resets the current query. + + Arguments: + dois (list of str): The DOIs to find. + index (str): The Search index to search on. **Default:** The current index. + limit (int): The maximum number of results to return. + The max for this argument is the ``SEARCH_LIMIT`` imposed by Globus Search. + **Default:** ``SEARCH_LIMIT``. + info (bool): If ``False``, search will return a list of the results. + If ``True``, search will return a tuple containing the results list + and other information about the query. + **Default:** ``False``. + + Returns: + If ``info`` is ``False``, *list*: The search results. + If ``info`` is ``True``, *tuple*: The search results, + and a dictionary of query information. + """ + return self.match_dois(dois).search(limit=limit, info=info) + def aggregate_sources(self, source_names, index=None): """Aggregate all records with the given ``source_name`` values. There is no limit to the number of results returned. diff --git a/setup.py b/setup.py index 35e85cc..2f10818 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='mdf_forge', - version='0.7.0', + version='0.7.1', packages=['mdf_forge'], description='Materials Data Facility python package', long_description=("Forge is the Materials Data Facility Python package" @@ -10,7 +10,7 @@ "Forge allows users to perform simple queries and " "facilitiates moving and synthesizing results."), install_requires=[ - "mdf-toolbox>=0.4.0", + "mdf-toolbox>=0.4.6", "globus-sdk>=1.7.0", "requests>=2.18.4", "tqdm>=4.19.4" diff --git a/tests/test_forge.py b/tests/test_forge.py index 69ab7d5..8dab396 100644 --- a/tests/test_forge.py +++ b/tests/test_forge.py @@ -295,23 +295,46 @@ def test_forge_match_resource_types(): assert f.match_resource_types("") == f -def test_forge_match_repositories(): +# TODO: Enable this test once Organizations are deployed and actually in-use on Prod index +@pytest.mark.xfail +def test_forge_match_organizations(): f = Forge(index="mdf") # One repo - f.match_repositories("DOE") + f.match_organizations("DOE") res1 = f.search() assert res1 != [] - check_val1 = check_field(res1, "mdf.repositories", "DOE") + check_val1 = check_field(res1, "mdf.organizations", "DOE") assert check_val1 == 1 # Multi-repo - f.match_repositories(["NIST", "DOE"], match_all=False) + f.match_organizations(["NIST", "DOE"], match_all=False) res2 = f.search() - assert check_field(res2, "mdf.repositories", "DOE") == 2 - assert check_field(res2, "mdf.repositories", "NIST") == 2 + assert check_field(res2, "mdf.organizations", "DOE") == 2 + assert check_field(res2, "mdf.organizations", "NIST") == 2 # No repos - assert f.match_repositories("") == f + assert f.match_organizations("") == f + + +def test_forge_match_dois(): + f = Forge(index="mdf") + # One doi + f.match_dois("https://dx.doi.org/10.13011/M3B36G") + res1 = f.search() + assert res1 != [] + assert check_field(res1, "dc.identifier.identifier", "https://dx.doi.org/10.13011/M3B36G") == 0 + + # Multiple dois + f.match_dois(["https://dx.doi.org/10.13011/M3B36G", "10.test/1"]) + res2 = f.search() + + # res1 is a subset of res2 + assert len(res2) > len(res1) + assert all([r1 in res2 for r1 in res1]) + assert check_field(res2, "dc.identifier.identifier", "10.test/1") == 2 + + # No doi + assert f.match_dois("") == f def test_forge_search_by_elements(): @@ -339,6 +362,12 @@ def test_forge_search_by_titles(): "NIST X-ray Photoelectron Spectroscopy Database") == 2 +def test_forge_search_by_dois(): + f = Forge(index="mdf") + res1 = f.search_by_dois("https://dx.doi.org/10.13011/M3B36G") + assert check_field(res1, "dc.identifier.identifier", "https://dx.doi.org/10.13011/M3B36G") == 0 + + def test_forge_aggregate_sources(): # Test limit f = Forge(index="mdf")