From 2045d4d11fb788687e9f1c3a1e61853fd97afa33 Mon Sep 17 00:00:00 2001 From: jgaff Date: Thu, 13 Dec 2018 16:40:43 -0600 Subject: [PATCH 1/5] Add match_repositories helper --- .../5 - Field-Specific Helper Functions.ipynb | 559 +++++++++++------- mdf_forge/forge.py | 28 + tests/test_forge.py | 21 +- 3 files changed, 407 insertions(+), 201 deletions(-) diff --git a/docs/tutorials/5 - Field-Specific Helper Functions.ipynb b/docs/tutorials/5 - Field-Specific Helper Functions.ipynb index 003694e..b309a30 100644 --- a/docs/tutorials/5 - Field-Specific Helper Functions.ipynb +++ b/docs/tutorials/5 - Field-Specific Helper Functions.ipynb @@ -36,12 +36,14 @@ { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -53,26 +55,77 @@ "mdf.match_source_names(\"oqmd\")" ] }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'crystal_structure': {'number_of_atoms': 4,\n", + " 'space_group_number': 225,\n", + " 'volume': 93.2374},\n", + " 'dft': {'converged': True,\n", + " 'cutoff_energy': 520.0,\n", + " 'exchange_correlation_functional': 'PBE'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': '332513.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/oqmd_v13/332513.json',\n", + " 'length': 10403,\n", + " 'mime_type': 'text/plain',\n", + " 'sha512': 'a4732793bff687ea39f43f741d6e3f380bb3608d38114420569282005efaee92e21799e6d6c3a4c63d7cf8b42d6388a81bc992d2a71709fd4d4f05cb3e6bb077',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/oqmd_v13/332513.json'}],\n", + " 'material': {'composition': 'Mn1Pa1Pm2', 'elements': ['Mn', 'Pa', 'Pm']},\n", + " 'mdf': {'ingest_date': '2018-11-09T19:44:43.687681Z',\n", + " 'mdf_id': '5be5e3af2ef388650efd6705',\n", + " 'parent_id': '5be5e3ab2ef388650efd6704',\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 1,\n", + " 'source_id': 'oqmd_v13',\n", + " 'source_name': 'oqmd',\n", + " 'version': 13},\n", + " 'oqmd': {'band_gap': {'units': 'eV', 'value': 0.0},\n", + " 'configuration': 'standard',\n", + " 'delta_e': {'units': 'eV/atom', 'value': 0.729781857801725},\n", + " 'magnetic_moment': {'units': 'bohr/atom'},\n", + " 'stability': {'units': 'eV/atom', 'value': 0.729382215301725},\n", + " 'total_energy': {'units': 'eV/atom', 'value': -6.27350557},\n", + " 'volume_pa': {'units': 'angstrom^3/atom', 'value': 23.3093}}}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res = mdf.search(limit=10)\n", + "res[0]" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ "### match_elements\n", - "`match_elements()` matches values against the `\"mdf.elements\"` field. It is equivalent to chaining `match_field(\"mdf.elements\", value)` for each value." + "`match_elements()` matches values against the `\"materials.elements\"` field. It is equivalent to chaining `match_field(\"materials.elements\", value)` for each value." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -81,6 +134,50 @@ "mdf.match_elements([\"Al\", \"Cu\"])" ] }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'cip': {'bv': '95.5',\n", + " 'energy': '-3.48',\n", + " 'forcefield': 'AlCu.eam.alloy',\n", + " 'gv': '14.3',\n", + " 'mpid': 'mp-998',\n", + " 'totenergy': '-333.69096'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", + " 'mime_type': 'text/plain',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'Al64 Cu32', 'elements': ['Al', 'Cu']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747d32ef3880b0f2142a6',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 3285,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res = mdf.search(limit=10)\n", + "res[0]" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -91,16 +188,16 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -111,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": { "scrolled": true }, @@ -119,37 +216,29 @@ { "data": { "text/plain": [ - "{'crystal_structure': {'cross_reference': {'icsd': 42517},\n", - " 'number_of_atoms': 6,\n", - " 'space_group_number': 140,\n", - " 'volume': 88.5788},\n", - " 'dft': {'converged': True,\n", - " 'cutoff_energy': 520.0,\n", - " 'exchange_correlation_functional': 'PBE'},\n", - " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", - " 'filename': '5724.json',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/oqmd_v3/metadata-files/5724.json',\n", - " 'length': 11547,\n", + "{'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'nist_xps_27469.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/nist_xps_db_v1/nist_xps_27469.json',\n", + " 'length': 1196,\n", " 'mime_type': 'text/plain',\n", - " 'sha512': 'f3e8743e64697ab5c6021b815ca4f780940f7ea4b50e2e31278216c3bd8bec16677d436f3b0e46e1e1cf9ea4a415899ed65dba1b4d4c7420d6d2ff4eca125990',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/oqmd_v3/metadata-files/5724.json'}],\n", - " 'material': {'composition': 'Al2Cu1', 'elements': ['Cu', 'Al']},\n", - " 'mdf': {'ingest_date': '2018-04-11T18:00:55.808133Z',\n", - " 'mdf_id': '5ace4f2334a2265849f63ff7',\n", - " 'parent_id': '5ace4d5734a2265849f44fba',\n", + " 'sha512': 'da4cac24fce125d061d6726a43439024dcb6be4ddf6664a93ff30c36755d0383ad47da76a4b9002dc754a3b2784c664a21432f292021a6c19c7d71f08940df3e',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/nist_xps_db_v1/nist_xps_27469.json'}],\n", + " 'material': {'composition': 'InP', 'elements': ['In', 'P']},\n", + " 'mdf': {'ingest_date': '2018-11-06T16:57:59.847843Z',\n", + " 'mdf_id': '5be1c8512ef3883312755ed3',\n", + " 'parent_id': '5be1c8172ef388331274efdf',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 127037,\n", - " 'source_name': 'oqmd_v3'},\n", - " 'oqmd_v3': {'band_gap': {'units': 'eV', 'value': 0.0},\n", - " 'configuration': 'static',\n", - " 'delta_e': {'units': 'eV/atom', 'value': -0.155698471666667},\n", - " 'magnetic_moment': {'units': 'bohr/atom'},\n", - " 'stability': {'units': 'eV/atom', 'value': 0.019296661666666},\n", - " 'total_energy': {'units': 'eV/atom', 'value': -3.891511245},\n", - " 'volume_pa': {'units': 'angstrom^3/atom', 'value': 14.7631}}}" + " 'scroll_id': 28404,\n", + " 'source_id': 'nist_xps_db_v1',\n", + " 'source_name': 'nist_xps_db',\n", + " 'version': 1},\n", + " 'nist_xps_db': {'binding_energy_ev': '186.3',\n", + " 'energy_uncertainty_ev': '',\n", + " 'notes': '',\n", + " 'temperature_k': ''}}" ] }, - "execution_count": 6, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -169,16 +258,16 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -190,7 +279,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": { "scrolled": true }, @@ -198,34 +287,29 @@ { "data": { "text/plain": [ - "[{'crystal_structure': {'cross_reference': {'icsd': 40332},\n", - " 'number_of_atoms': 10,\n", - " 'space_group_number': 12,\n", - " 'volume': 134.855},\n", - " 'dft': {'converged': True,\n", - " 'cutoff_energy': 520.0,\n", - " 'exchange_correlation_functional': 'PBE'},\n", - " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", - " 'filename': '16290.json',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/oqmd_v3/metadata-files/16290.json',\n", - " 'length': 11470,\n", + "[{'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'nist_xps_27079.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/nist_xps_db_v1/nist_xps_27079.json',\n", + " 'length': 1083,\n", " 'mime_type': 'text/plain',\n", - " 'sha512': '953f706130e358a03be6bfcecdbeca503cfc6f5592756a06294cc691e42282e1c5cbb1553ce55f6ac5ce4ec758d00f51dac3bb8389e71d1a551379785dd3fc6c',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/oqmd_v3/metadata-files/16290.json'}],\n", - " 'material': {'composition': 'Al1Cu1', 'elements': ['Cu', 'Al']},\n", - " 'mdf': {'ingest_date': '2018-04-11T18:00:55.808133Z',\n", - " 'mdf_id': '5ace50a534a2265849f7dc53',\n", - " 'parent_id': '5ace4d5734a2265849f44fba',\n", + " 'sha512': '2ffed70fbcc42c97119ced5905a51b651a86186586da5bc0b434f5904552d8482bfbdffead338aeab3e9db59bd3bcfbeb5a9e4e8d745736601c5d7beab84e0fe',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/nist_xps_db_v1/nist_xps_27079.json'}],\n", + " 'material': {'composition': 'ZnI2', 'elements': ['I', 'Zn']},\n", + " 'mdf': {'ingest_date': '2018-11-06T16:57:59.847843Z',\n", + " 'mdf_id': '5be1c8512ef3883312755ed4',\n", + " 'parent_id': '5be1c8172ef388331274efdf',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 232601,\n", - " 'source_name': 'oqmd_v3'},\n", - " 'oqmd_v3': {'band_gap': {'units': 'eV', 'value': 0.0},\n", - " 'magnetic_moment': {'units': 'bohr/atom'},\n", - " 'total_energy': {'units': 'eV/atom', 'value': -3.933290274},\n", - " 'volume_pa': {'units': 'angstrom^3/atom', 'value': 13.4855}}}]" + " 'scroll_id': 28405,\n", + " 'source_id': 'nist_xps_db_v1',\n", + " 'source_name': 'nist_xps_db',\n", + " 'version': 1},\n", + " 'nist_xps_db': {'binding_energy_ev': '1022.9',\n", + " 'energy_uncertainty_ev': '',\n", + " 'notes': '',\n", + " 'temperature_k': ''}}]" ] }, - "execution_count": 8, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -246,7 +330,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": { "scrolled": true }, @@ -254,10 +338,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 9, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -268,7 +352,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": { "scrolled": true }, @@ -276,12 +360,9 @@ { "data": { "text/plain": [ - "[{'dc': {'contributors': [{'affiliations': ['Northwestern University'],\n", - " 'contributorName': 'Wolverton, Chris',\n", - " 'contributorType': 'ContactPerson',\n", - " 'familyName': 'Wolverton',\n", - " 'givenName': 'Chris'}],\n", - " 'creators': [{'affiliations': ['Northwestern University'],\n", + "[{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/oqmd_v13/',\n", + " 'link': 'https://www.globus.org/app/transfer?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/oqmd_v13/'},\n", + " 'dc': {'creators': [{'affiliations': ['Northwestern University'],\n", " 'creatorName': 'Wolverton, Chris',\n", " 'familyName': 'Wolverton',\n", " 'givenName': 'Chris'},\n", @@ -297,29 +378,37 @@ " 'creatorName': 'Ward, Logan',\n", " 'familyName': 'Ward',\n", " 'givenName': 'Logan'}],\n", - " 'dates': [{'date': '2017-08-04T14:18:51.560728Z', 'dateType': 'Collected'}],\n", " 'descriptions': [{'description': 'The OQMD is a database of DFT-calculated thermodynamic and structural properties.',\n", " 'descriptionType': 'Other'}],\n", " 'publicationYear': '2013',\n", - " 'publisher': 'MDF (placeholder)',\n", + " 'publisher': 'Materials Data Facility',\n", " 'relatedIdentifiers': [{'relatedIdentifier': 'http://dx.doi.org/10.1007/s11837-013-0755-4',\n", " 'relatedIdentifierType': 'DOI',\n", " 'relationType': 'IsPartOf'},\n", " {'relatedIdentifier': 'http://dx.doi.org/10.1038/npjcompumats.2015.10',\n", " 'relatedIdentifierType': 'DOI',\n", " 'relationType': 'IsPartOf'}],\n", - " 'resourceType': {'resourceType': 'JSON', 'resourceTypeGeneral': 'Dataset'},\n", - " 'subjects': [{'subject': 'dft'}],\n", + " 'resourceType': {'resourceType': 'Dataset',\n", + " 'resourceTypeGeneral': 'Dataset'},\n", " 'titles': [{'title': 'The Open Quantum Materials Database'}]},\n", - " 'mdf': {'ingest_date': '2018-04-11T18:00:55.808133Z',\n", - " 'mdf_id': '5ace4d5734a2265849f44fba',\n", + " 'mdf': {'ingest_date': '2018-11-09T19:44:43.687681Z',\n", + " 'mdf_id': '5be5e3ab2ef388650efd6704',\n", " 'resource_type': 'dataset',\n", " 'scroll_id': 0,\n", - " 'source_name': 'oqmd_v3',\n", - " 'version': 8}}]" + " 'source_id': 'oqmd_v13',\n", + " 'source_name': 'oqmd',\n", + " 'version': 13},\n", + " 'oqmd': {'__custom.band_gap_desc': 'Band gap energy (eV)',\n", + " '__custom.configuration_desc': 'OQMD configuration profile used in calculation',\n", + " '__custom.delta_e_desc': 'Formation enthalpy (eV/atom)',\n", + " '__custom.magnetic_moment_desc': 'Magnetic moment (Bohr/atom)',\n", + " '__custom.stability_desc': 'Distance from formation enthalpy convex hull. Unstable compounds have positive values (eV/atom)',\n", + " '__custom.total_energy_desc': 'Total enregy (eV/atom)',\n", + " '__custom.volume_pa_desc': 'Volume per atom (Angstom^3/atom)'},\n", + " 'services': {'mdf_search': 'This dataset was ingested to MDF Search.'}}]" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -338,16 +427,16 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 11, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -358,7 +447,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": { "scrolled": true }, @@ -366,44 +455,55 @@ { "data": { "text/plain": [ - "{'dc': {'contributors': [{'affiliations': ['Imperial College London'],\n", - " 'contributorName': 'Corsini, Niccolo',\n", - " 'contributorType': 'ContactPerson',\n", - " 'familyName': 'Corsini',\n", - " 'givenName': 'Niccolo'}],\n", - " 'creators': [{'affiliations': ['Imperial College London'],\n", - " 'creatorName': 'Corsini, Niccolo',\n", - " 'familyName': 'Corsini',\n", - " 'givenName': 'Niccolo'}],\n", - " 'dates': [{'date': '2017-08-09T19:44:38.397091Z', 'dateType': 'Collected'}],\n", - " 'descriptions': [{'description': 'Over the last two decades, it has been demonstrated that size effects have significant consequences for the atomic arrangements and phase behavior of matter under extreme pressure. Furthermore, it has been shown that an understanding of how size affects critical pressure–temperature conditions provides vital guidance in the search for materials with novel properties. Here, we report on the remarkable behavior of small (under ∼5 nm) matrix-free Ge nanoparticles under hydrostatic compression that is drastically different from both larger nanoparticles and bulk Ge. We discover that the application of pressure drives surface-induced amorphization leading to Ge–Ge bond overcompression and eventually to a polyamorphic semiconductor-to-metal transformation.',\n", - " 'descriptionType': 'Other'}],\n", + "{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/mdr_item_571_v1/',\n", + " 'link': 'https://www.globus.org/app/transfer?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/mdr_item_571_v1/'},\n", + " 'dc': {'alternateIdentifiers': [{'alternateIdentifier': 'http://hdl.handle.net/11256/272',\n", + " 'alternateIdentifierType': 'Handle'},\n", + " {'alternateIdentifier': '571',\n", + " 'alternateIdentifierType': 'NIST DSpace ID'}],\n", + " 'creators': [{'affiliations': ['University of Maryland'],\n", + " 'creatorName': 'Joost, William J.',\n", + " 'familyName': 'Joost',\n", + " 'givenName': 'William J.'},\n", + " {'affiliations': ['University of Maryland'],\n", + " 'creatorName': 'Ankem, Sreeramamurthy',\n", + " 'familyName': 'Ankem',\n", + " 'givenName': 'Sreeramamurthy'},\n", + " {'affiliations': ['University of Maryland'],\n", + " 'creatorName': 'Kuklja, Maija M.',\n", + " 'familyName': 'Kuklja',\n", + " 'givenName': 'Maija M.'}],\n", " 'publicationYear': '2015',\n", - " 'publisher': 'MDF (placeholder)',\n", - " 'relatedIdentifiers': [{'relatedIdentifier': 'http://pubs.acs.org/doi/abs/10.1021/acs.nanolett.5b02627',\n", - " 'relatedIdentifierType': 'DOI',\n", - " 'relationType': 'IsPartOf'}],\n", - " 'resourceType': {'resourceType': 'JSON', 'resourceTypeGeneral': 'Dataset'},\n", - " 'rightsList': [{'rights': 'https://creativecommons.org/publicdomain/zero/1.0/',\n", - " 'rightsURI': 'https://creativecommons.org/publicdomain/zero/1.0/'}],\n", - " 'subjects': [{'subject': 'amorphization'},\n", - " {'subject': 'density functional theory calculations'},\n", - " {'subject': 'Ge nanoparticles'},\n", - " {'subject': 'high pressure'},\n", - " {'subject': 'phase transformation'},\n", - " {'subject': 'Raman'},\n", - " {'subject': 'X-ray absorption'},\n", - " {'subject': 'zip'}],\n", - " 'titles': [{'title': 'Pressure-induced amorphisation and a new high density amorphous metallic phase in matrix-free Ge nanoparticles: simulation data'}]},\n", - " 'mdf': {'ingest_date': '2018-03-27T20:11:39.629371Z',\n", - " 'mdf_id': '5abaa57b34a2263dfa3d08ae',\n", + " 'publisher': 'NIST Materials Data Repository',\n", + " 'resourceType': {'resourceType': 'Dataset',\n", + " 'resourceTypeGeneral': 'Dataset'},\n", + " 'rightsList': [{'rights': 'Attribution 3.0 United States',\n", + " 'rightsURI': 'http://creativecommons.org/licenses/by/3.0/us/'}],\n", + " 'subjects': [{'subject': 'Titanium'},\n", + " {'subject': ' Oxygen'},\n", + " {'subject': ' Diffusion'},\n", + " {'subject': ' Twin'}],\n", + " 'titles': [{'title': 'Interaction Between Oxygen Interstitials and Deformation Twins in alpha-Titanium'}]},\n", + " 'mdf': {'ingest_date': '2018-11-15T18:46:11.094146Z',\n", + " 'mdf_id': '5bedbef32ef388392f953c09',\n", + " 'repositories': ['National Institute of Standards and Technology',\n", + " 'U.S. Department of Commerce',\n", + " 'DOC',\n", + " 'MDR',\n", + " 'NIST',\n", + " 'NIST Materials Data Repository',\n", + " 'NIST MDR'],\n", " 'resource_type': 'dataset',\n", " 'scroll_id': 0,\n", - " 'source_name': 'ge_nanoparticles_v1',\n", - " 'version': 1}}" + " 'source_id': 'mdr_item_571_v1',\n", + " 'source_name': 'mdr_item_571',\n", + " 'version': 1},\n", + " 'services': {'citrine': 'https://citrination.com/datasets/168832/',\n", + " 'mdf_search': 'This dataset was ingested to MDF Search.',\n", + " 'mrr': 'This dataset was registered with the MRR.'}}" ] }, - "execution_count": 12, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -422,16 +522,16 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 13, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -442,52 +542,104 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": { - "scrolled": true + "scrolled": false }, "outputs": [ { "data": { "text/plain": [ - "{'dc': {'contributors': [{'affiliations': ['Imperial College London'],\n", - " 'contributorName': 'Corsini, Niccolo',\n", + "{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/khazana_vasp_v4/',\n", + " 'link': 'https://www.globus.org/app/transfer?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/khazana_vasp_v4/'},\n", + " 'dc': {'contributors': [{'affiliations': ['University of Connecticut'],\n", + " 'contributorName': 'Ramprasad, Rampi',\n", " 'contributorType': 'ContactPerson',\n", - " 'familyName': 'Corsini',\n", - " 'givenName': 'Niccolo'}],\n", - " 'creators': [{'affiliations': ['Imperial College London'],\n", - " 'creatorName': 'Corsini, Niccolo',\n", - " 'familyName': 'Corsini',\n", - " 'givenName': 'Niccolo'}],\n", - " 'dates': [{'date': '2017-08-09T19:44:38.397091Z', 'dateType': 'Collected'}],\n", - " 'descriptions': [{'description': 'Over the last two decades, it has been demonstrated that size effects have significant consequences for the atomic arrangements and phase behavior of matter under extreme pressure. Furthermore, it has been shown that an understanding of how size affects critical pressure–temperature conditions provides vital guidance in the search for materials with novel properties. Here, we report on the remarkable behavior of small (under ∼5 nm) matrix-free Ge nanoparticles under hydrostatic compression that is drastically different from both larger nanoparticles and bulk Ge. We discover that the application of pressure drives surface-induced amorphization leading to Ge–Ge bond overcompression and eventually to a polyamorphic semiconductor-to-metal transformation.',\n", + " 'familyName': 'Ramprasad',\n", + " 'givenName': 'Rampi'}],\n", + " 'creators': [{'affiliations': ['University of Connecticut'],\n", + " 'creatorName': 'Ramprasad, Rampi'}],\n", + " 'dates': [{'date': '2017-08-04T19:25:05.718973Z', 'dateType': 'Collected'}],\n", + " 'descriptions': [{'description': 'A computational materials knowledgebase',\n", " 'descriptionType': 'Other'}],\n", - " 'publicationYear': '2015',\n", + " 'publicationYear': '2016',\n", " 'publisher': 'MDF (placeholder)',\n", - " 'relatedIdentifiers': [{'relatedIdentifier': 'http://pubs.acs.org/doi/abs/10.1021/acs.nanolett.5b02627',\n", - " 'relatedIdentifierType': 'DOI',\n", - " 'relationType': 'IsPartOf'}],\n", " 'resourceType': {'resourceType': 'JSON', 'resourceTypeGeneral': 'Dataset'},\n", - " 'rightsList': [{'rights': 'https://creativecommons.org/publicdomain/zero/1.0/',\n", - " 'rightsURI': 'https://creativecommons.org/publicdomain/zero/1.0/'}],\n", - " 'subjects': [{'subject': 'amorphization'},\n", - " {'subject': 'density functional theory calculations'},\n", - " {'subject': 'Ge nanoparticles'},\n", - " {'subject': 'high pressure'},\n", - " {'subject': 'phase transformation'},\n", - " {'subject': 'Raman'},\n", - " {'subject': 'X-ray absorption'},\n", - " {'subject': 'zip'}],\n", - " 'titles': [{'title': 'Pressure-induced amorphisation and a new high density amorphous metallic phase in matrix-free Ge nanoparticles: simulation data'}]},\n", - " 'mdf': {'ingest_date': '2018-03-27T20:11:39.629371Z',\n", - " 'mdf_id': '5abaa57b34a2263dfa3d08ae',\n", + " 'subjects': [{'subject': 'DFT'}, {'subject': 'VASP'}],\n", + " 'titles': [{'title': 'Khazana (VASP)'}]},\n", + " 'mdf': {'ingest_date': '2018-11-05T21:42:40.557765Z',\n", + " 'mdf_id': '5be0b9502ef388136874efdf',\n", " 'resource_type': 'dataset',\n", " 'scroll_id': 0,\n", - " 'source_name': 'ge_nanoparticles_v1',\n", + " 'source_id': 'khazana_vasp_v4',\n", + " 'source_name': 'khazana_vasp',\n", + " 'version': 4},\n", + " 'services': {'mdf_search': 'This dataset was ingested to MDF Search.'}}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res = mdf.search(limit=10)\n", + "res[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### match_repositories\n", + "`match_repositories()` matches values against the `\"mdf.repositories\"` field. It is equivalent to chaining `match_field(\"mdf.repositories\", value)` for each value." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "mdf.match_repositories([\"NIST\", \"DOE\"], match_all=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'files': [{'data_type': 'TIFF image data, big-endian, direntries=13, height=0, bps=8, PhotometricIntepretation=BlackIsZero, description=ImageJ=1.50i, width=0',\n", + " 'filename': 'eds mapping 2 - pure ti, 250 nm sio2, 2h, 800c - ti map.tif',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/Early_oxidation_behavior_of_Si-coated_titanium/Dataset/Titanium Oxidation/eds mapping 2 - pure ti, 250 nm sio2, 2h, 800c - ti map.tif',\n", + " 'length': 213607,\n", + " 'mime_type': 'image/tiff',\n", + " 'sha512': '8460d1b7f8543bd909245d0e3456e5e58207245e619172e8b0631f7d7da7a00e6af94d0f9fcaf47be8e09984ef96826e098172c2ac968fb25c237abd223f62f1',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/Early_oxidation_behavior_of_Si-coated_titanium/Dataset/Titanium Oxidation/eds mapping 2 - pure ti, 250 nm sio2, 2h, 800c - ti map.tif'}],\n", + " 'image': {'height': 393, 'megapixels': 0.213399, 'width': 543},\n", + " 'mdf': {'ingest_date': '2018-10-30T21:44:27.860096Z',\n", + " 'mdf_id': '5bd8d0bb2ef38836a62135d2',\n", + " 'parent_id': '5bd8d0bb2ef38836a62135d1',\n", + " 'repositories': ['MCPub',\n", + " 'DOE',\n", + " 'Department of Energy Software Innovation Center for Integrated Multi-Scale Modeling of Structural Metals',\n", + " 'U.S. Department of Energy',\n", + " 'Materials Commons',\n", + " 'Center for Predictive Integrated Structural Materials Science',\n", + " 'PRISMS'],\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 1,\n", + " 'source_id': 'si_ti_oxidation_v1',\n", + " 'source_name': 'si_ti_oxidation',\n", " 'version': 1}}" ] }, - "execution_count": 14, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -514,7 +666,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 19, "metadata": { "scrolled": true }, @@ -522,37 +674,39 @@ { "data": { "text/plain": [ - "{'crystal_structure': {'cross_reference': {'icsd': 42517},\n", - " 'number_of_atoms': 6,\n", - " 'space_group_number': 140,\n", - " 'volume': 88.5788},\n", + "{'crystal_structure': {'cross_reference': {'icsd': 150823},\n", + " 'number_of_atoms': 4,\n", + " 'space_group_number': 225,\n", + " 'volume': 49.3454},\n", " 'dft': {'converged': True,\n", " 'cutoff_energy': 520.0,\n", " 'exchange_correlation_functional': 'PBE'},\n", " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", - " 'filename': '5724.json',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/oqmd_v3/metadata-files/5724.json',\n", - " 'length': 11547,\n", + " 'filename': '1815.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/oqmd_v13/1815.json',\n", + " 'length': 11693,\n", " 'mime_type': 'text/plain',\n", - " 'sha512': 'f3e8743e64697ab5c6021b815ca4f780940f7ea4b50e2e31278216c3bd8bec16677d436f3b0e46e1e1cf9ea4a415899ed65dba1b4d4c7420d6d2ff4eca125990',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/oqmd_v3/metadata-files/5724.json'}],\n", - " 'material': {'composition': 'Al2Cu1', 'elements': ['Cu', 'Al']},\n", - " 'mdf': {'ingest_date': '2018-04-11T18:00:55.808133Z',\n", - " 'mdf_id': '5ace4f2334a2265849f63ff7',\n", - " 'parent_id': '5ace4d5734a2265849f44fba',\n", + " 'sha512': '3f26300e0c9d4ce4a53ac5169b3cb8720927263f34d3654e3134fe52e3c0069c41c2db2d38f26cffd28d48073e266914d9a1fd517c344e022b11d4dfe94876a8',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/oqmd_v13/1815.json'}],\n", + " 'material': {'composition': 'Al1Cu3', 'elements': ['Al', 'Cu']},\n", + " 'mdf': {'ingest_date': '2018-11-09T19:44:43.687681Z',\n", + " 'mdf_id': '5be5e4122ef388650efdf50d',\n", + " 'parent_id': '5be5e3ab2ef388650efd6704',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 127037,\n", - " 'source_name': 'oqmd_v3'},\n", - " 'oqmd_v3': {'band_gap': {'units': 'eV', 'value': 0.0},\n", + " 'scroll_id': 36361,\n", + " 'source_id': 'oqmd_v13',\n", + " 'source_name': 'oqmd',\n", + " 'version': 13},\n", + " 'oqmd': {'band_gap': {'units': 'eV', 'value': 0.0},\n", " 'configuration': 'static',\n", - " 'delta_e': {'units': 'eV/atom', 'value': -0.155698471666667},\n", + " 'delta_e': {'units': 'eV/atom', 'value': -0.1675233825},\n", " 'magnetic_moment': {'units': 'bohr/atom'},\n", - " 'stability': {'units': 'eV/atom', 'value': 0.019296661666666},\n", - " 'total_energy': {'units': 'eV/atom', 'value': -3.891511245},\n", - " 'volume_pa': {'units': 'angstrom^3/atom', 'value': 14.7631}}}" + " 'stability': {'units': 'eV/atom', 'value': 0.02138741875},\n", + " 'total_energy': {'units': 'eV/atom', 'value': -3.8909277975},\n", + " 'volume_pa': {'units': 'angstrom^3/atom', 'value': 12.3364}}}" ] }, - "execution_count": 15, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -572,7 +726,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 20, "metadata": { "scrolled": true }, @@ -580,7 +734,9 @@ { "data": { "text/plain": [ - "{'dc': {'contributors': [{'affiliations': ['University of Wisconsin-Madison'],\n", + "{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/ab_initio_solute_database_v1-2/',\n", + " 'link': 'https://www.globus.org/app/transfer?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/ab_initio_solute_database_v1-2/'},\n", + " 'dc': {'contributors': [{'affiliations': ['University of Wisconsin-Madison'],\n", " 'contributorName': 'Morgan, Dane',\n", " 'contributorType': 'ContactPerson',\n", " 'familyName': 'Morgan',\n", @@ -612,15 +768,17 @@ " {'subject': 'diffusion'},\n", " {'subject': 'dataset'}],\n", " 'titles': [{'title': 'High-throughput Ab-initio Dilute Solute Diffusion Database'}]},\n", - " 'mdf': {'ingest_date': '2018-03-30T00:32:34.081287Z',\n", - " 'mdf_id': '5abd85a234a22620d109ae2b',\n", + " 'mdf': {'ingest_date': '2018-11-24T08:12:11.852893Z',\n", + " 'mdf_id': '5bf907db2ef3885ee1191ae0',\n", " 'resource_type': 'dataset',\n", " 'scroll_id': 0,\n", - " 'source_name': 'ab_initio_solute_database_v3',\n", - " 'version': 3}}" + " 'source_id': 'ab_initio_solute_database_v1-2',\n", + " 'source_name': 'ab_initio_solute_database',\n", + " 'version': 1},\n", + " 'services': {'mdf_search': 'This dataset was ingested to MDF Search.'}}" ] }, - "execution_count": 16, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -640,7 +798,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 21, "metadata": { "scrolled": true }, @@ -648,26 +806,29 @@ { "data": { "text/plain": [ - "{'crystal_structure': {'number_of_atoms': 2,\n", - " 'space_group_number': 227,\n", - " 'volume': 42.060419436652815},\n", + "{'crystal_structure': {'number_of_atoms': 131.0,\n", + " 'space_group_number': 1,\n", + " 'stoichiometry': 'A60B71',\n", + " 'volume': 8000.0},\n", " 'files': [{'data_type': 'ASCII text',\n", - " 'filename': 'ge.castep',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/ge_nanoparticles_v1/CASTEP_DFPT/Bulk_diamond/ge.castep',\n", - " 'length': 358076,\n", + " 'filename': 'ge.cell',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/ge_nanoparticles_v1/CASTEP_DFPT/Ge71H60/50GPa/ge.cell',\n", + " 'length': 5686,\n", " 'mime_type': 'text/plain',\n", - " 'sha512': 'cff4dfe39b1ebe9de2ad3bdf99be4eecfcd3308f8c365567e2c601d1db2f0972c9f9524c01128adca251a81498f69c3992c8105560cb23f72e3ccb818cbef16f',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/ge_nanoparticles_v1/CASTEP_DFPT/Bulk_diamond/ge.castep'}],\n", - " 'material': {'composition': 'Ge2', 'elements': ['Ge']},\n", - " 'mdf': {'ingest_date': '2018-03-27T20:11:39.629371Z',\n", - " 'mdf_id': '5abaa57b34a2263dfa3d08b0',\n", - " 'parent_id': '5abaa57b34a2263dfa3d08ae',\n", + " 'sha512': '979c4f3b9cc84424d174f5f3faa5bc9c977f526b7f29c52bc99f2e5c2eeb80e98accdecd37b905b84061310f3fd810c93b756e4aaa80f3bd6c30bdf3389b5a3f',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/ge_nanoparticles_v1/CASTEP_DFPT/Ge71H60/50GPa/ge.cell'}],\n", + " 'material': {'composition': 'Ge71H60', 'elements': ['Ge', 'H']},\n", + " 'mdf': {'ingest_date': '2018-11-06T17:25:23.949096Z',\n", + " 'mdf_id': '5be1ce832ef388345774efe1',\n", + " 'parent_id': '5be1ce832ef388345774efdf',\n", " 'resource_type': 'record',\n", " 'scroll_id': 2,\n", - " 'source_name': 'ge_nanoparticles_v1'}}" + " 'source_id': 'ge_nanoparticles_v1',\n", + " 'source_name': 'ge_nanoparticles',\n", + " 'version': 1}}" ] }, - "execution_count": 17, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } diff --git a/mdf_forge/forge.py b/mdf_forge/forge.py index 9b021fe..444ba94 100644 --- a/mdf_forge/forge.py +++ b/mdf_forge/forge.py @@ -277,6 +277,7 @@ def reset_query(self): """ del self.__query self.__query = Query(self.__search_client) + return # *********************************************** # * Expanded functions @@ -568,6 +569,33 @@ def match_resource_types(self, types): self.match_field(field="mdf.resource_type", value=rt, required=False, new_group=False) return self + def match_repositories(self, repositories, match_all=True): + """Match the given repositories. + Repositories are MDF-identified collections of datasets from a group + or organization. + + Args: + repositories (str or list of str): The repositories to match. + match_all (bool): If **True**, will add with AND. + If **False**, will use OR. + Default **True**. + + Returns: + self (Forge): For chaining. + """ + # If no repos, nothing to match + if not repositories: + return self + if isinstance(repositories, str): + repositories = [repositories] + # First repo should be in new group and required + self.match_field(field="mdf.repositories", value=repositories[0], + required=True, new_group=True) + # Other elements should stay in that group + for repo in repositories[1:]: + self.match_field(field="mdf.repositories", value=repo, required=match_all, + new_group=False) + # *********************************************** # * Premade searches # *********************************************** diff --git a/tests/test_forge.py b/tests/test_forge.py index 2587da1..4dfcf93 100644 --- a/tests/test_forge.py +++ b/tests/test_forge.py @@ -624,13 +624,30 @@ def test_forge_match_resource_types(): f.match_resource_types(["collection", "dataset"]) res2 = f.search() assert check_field(res2, "mdf.resource_type", "record") == -1 - # TODO: Re-enable this assert after we get collections in MDF -# assert check_field(res2, "mdf.resource_type", "dataset") == 2 # Test zero types assert f.match_resource_types("") == f +def test_forge_match_repositories(): + f = forge.Forge(index="mdf") + # One repo + f.match_repositories("DOE") + res1 = f.search() + assert res1 != [] + check_val1 = check_field(res1, "mdf.repositories", "DOE") + assert check_val1 == 1 + + # Multi-repo + f.match_repositories(["NIST", "DOE"], match_all=False) + res2 = f.search() + assert check_field(res2, "mdf.repositories", "DOE") == 2 + assert check_field(res2, "mdf.repositories", "NIST") == 2 + + # No repos + assert f.match_repositories("") == f + + def test_forge_search(capsys): # Error on no query f = forge.Forge(index="mdf") From 2e09f718b3ce32151c196304026f46d8f0072b5a Mon Sep 17 00:00:00 2001 From: jgaff Date: Tue, 18 Dec 2018 15:23:06 -0600 Subject: [PATCH 2/5] Refactor Forge Sphinx pages, rework Forge docstrings for RTD documentation Jupyter notebook display not yet functional --- .gitignore | 4 +- README.md | 4 +- ...tions.ipynb => Example_Aggregations.ipynb} | 0 ... => Example_Statistics-MDF_Datasets.ipynb} | 0 docs/sphinx/source/conf.py | 47 +- docs/sphinx/source/example_list.rst | 11 + docs/sphinx/source/examples | 1 + docs/sphinx/source/forge_quickstart.rst | 56 -- docs/sphinx/source/index.rst | 116 +-- docs/sphinx/source/installation_guide.rst | 44 - docs/sphinx/source/mdf_forge.rst | 17 +- docs/sphinx/source/modules.rst | 15 - docs/sphinx/source/requirements_link.rst | 8 - docs/sphinx/source/tutorial_list.rst | 17 + docs/sphinx/source/tutorials | 1 + docs/sphinx/sphinx_requirements.rst | 4 - ...ntroduction.ipynb => 1-Introduction.ipynb} | 0 ...b => 2-Core_Query_Builder_Functions.ipynb} | 0 ... 3-Expanded_Query_Builder_Functions.ipynb} | 0 ...ipynb => 4-General_Helper_Functions.ipynb} | 0 ...> 5-Field-Specific_Helper_Functions.ipynb} | 0 ...ipynb => 6-Data_Retrieval_Functions.ipynb} | 0 mdf_forge/forge.py | 807 ++++++++++-------- test_requirements.txt | 3 + 24 files changed, 524 insertions(+), 631 deletions(-) rename docs/examples/{Example Aggregations.ipynb => Example_Aggregations.ipynb} (100%) rename docs/examples/{Example Statistics - MDF Datasets.ipynb => Example_Statistics-MDF_Datasets.ipynb} (100%) create mode 100644 docs/sphinx/source/example_list.rst create mode 120000 docs/sphinx/source/examples delete mode 100644 docs/sphinx/source/forge_quickstart.rst delete mode 100644 docs/sphinx/source/installation_guide.rst delete mode 100644 docs/sphinx/source/modules.rst delete mode 100644 docs/sphinx/source/requirements_link.rst create mode 100644 docs/sphinx/source/tutorial_list.rst create mode 120000 docs/sphinx/source/tutorials delete mode 100644 docs/sphinx/sphinx_requirements.rst rename docs/tutorials/{1 - Introduction.ipynb => 1-Introduction.ipynb} (100%) rename docs/tutorials/{2 - Core Query Builder Functions.ipynb => 2-Core_Query_Builder_Functions.ipynb} (100%) rename docs/tutorials/{3 - Expanded Query Builder Functions.ipynb => 3-Expanded_Query_Builder_Functions.ipynb} (100%) rename docs/tutorials/{4 - General Helper Functions.ipynb => 4-General_Helper_Functions.ipynb} (100%) rename docs/tutorials/{5 - Field-Specific Helper Functions.ipynb => 5-Field-Specific_Helper_Functions.ipynb} (100%) rename docs/tutorials/{6 - Data Retrieval Functions.ipynb => 6-Data_Retrieval_Functions.ipynb} (100%) diff --git a/.gitignore b/.gitignore index b4ee686..f6a2bb6 100644 --- a/.gitignore +++ b/.gitignore @@ -10,8 +10,8 @@ Untitled*.ipynb *temp/* -*/build/* -*/dist/* +**/build/ +**/dist/ *.egg* travis.tar diff --git a/README.md b/README.md index 95d8824..f911a95 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,8 @@ pip install -e . ``` # Documentation and examples -Forge documentation can be found on [Read the Docs](http://mdf-forge.readthedocs.io/en/master/). -Tutorials and examples can be found in the `docs` directory. The Jupyter notebooks can be viewed on GitHub or run interactively with ![Jupyter](http://jupyter.org/install). +Forge documentation can be found on [Read the Docs](https://mdf-forge.readthedocs.io/en/master/) and [GitHub](https://github.com/materials-data-facility/forge/tree/master/docs/). +Tutorials and examples can be found in the `docs` directory. The Jupyter notebooks can be viewed on GitHub or run interactively with [Jupyter](http://jupyter.org/install). # Requirements * Forge requires Python 3.5 or greater. diff --git a/docs/examples/Example Aggregations.ipynb b/docs/examples/Example_Aggregations.ipynb similarity index 100% rename from docs/examples/Example Aggregations.ipynb rename to docs/examples/Example_Aggregations.ipynb diff --git a/docs/examples/Example Statistics - MDF Datasets.ipynb b/docs/examples/Example_Statistics-MDF_Datasets.ipynb similarity index 100% rename from docs/examples/Example Statistics - MDF Datasets.ipynb rename to docs/examples/Example_Statistics-MDF_Datasets.ipynb diff --git a/docs/sphinx/source/conf.py b/docs/sphinx/source/conf.py index 4234bf5..e8054b9 100644 --- a/docs/sphinx/source/conf.py +++ b/docs/sphinx/source/conf.py @@ -12,23 +12,19 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -import os -import sys -sys.path.insert(0, os.path.abspath('..')) -sys.path.insert(0, os.path.abspath('../../..')) -sys.path.insert(0, os.path.abspath('../../../mdf_forge')) -sys.path.insert(0, os.path.abspath('../../../tests')) +import sphinx_bootstrap_theme + # -- Project information ----------------------------------------------------- project = 'MDF Forge' -copyright = 'Apache License, Version 2.0' -author = 'Jonathon Gaff' +copyright = '2018, The University of Chicago' +author = 'The University of Chicago' # The short X.Y version -version = '0.5' +version = '' # The full version, including alpha/beta/rc tags -release = '0.5.1' +release = '' # -- General configuration --------------------------------------------------- @@ -42,11 +38,15 @@ # ones. extensions = [ 'sphinx.ext.autodoc', - 'sphinx.ext.intersphinx', - 'sphinx.ext.ifconfig', + 'sphinx.ext.coverage', + 'sphinx.ext.viewcode', +# 'sphinx.ext.intersphinx', +# 'sphinx.ext.ifconfig', 'sphinx.ext.napoleon', 'sphinx.ext.mathjax', - 'IPython.sphinxext.ipython_console_highlighting', +# 'IPython.sphinxext.ipython_console_highlighting', + 'm2r', + 'nbsphinx' ] # Add any paths that contain templates here, relative to this directory. @@ -68,7 +68,7 @@ # '.md': CommonMarkParser, # } -source_suffix = ['.rst', '.md'] +source_suffix = ['.rst', '.md', 'ipynb'] # The master toctree document. master_doc = 'index' @@ -83,7 +83,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints'] +exclude_patterns = ['_build', '**.ipynb_checkpoints'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' @@ -93,7 +93,7 @@ # You may only specify the root package of the dependencies themselves and ommit the sub-modules: # See also: # http://www.sphinx-doc.org/en/stable/ext/autodoc.html#confval-autodoc_mock_importshttps://github.com/sphinx-doc/sphinx/issues/4182 -autodoc_mock_imports = ['mdf_toolbox', 'pytest', 'globus_sdk.exc'] +# autodoc_mock_imports = ['mdf_toolbox', 'pytest', 'globus_sdk.exc'] # This value selects what content will be inserted into the main body of an autoclass directive. # The possible values are: @@ -102,7 +102,7 @@ # to autoclass. # “both”: Both the class ’ and the init method’s docstring are concatenated and inserted. # “init”: Only the init method’s docstring is inserted. -autoclass_content = 'both' +# autoclass_content = 'class' # -- Options for HTML output ------------------------------------------------- @@ -111,18 +111,19 @@ # List of themes: basic, alabaster, classic, sphinxdoc, scrolls, agoago, nature, # pyramid, haiku, traditional, epub, bizstyle # -html_theme = 'sphinxdoc' +html_theme = 'bootstrap' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} +html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = [] # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -164,7 +165,7 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'mdf-forge.tex', u'mdf-forge Documentation', + (master_doc, 'mdf-forge.tex', 'mdf-forge Documentation', 'Jonathon Gaff', 'manual'), ] @@ -174,7 +175,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - (master_doc, 'mdf-forge', u'mdf-forge Documentation', + (master_doc, 'mdf-forge', 'mdf-forge Documentation', [author], 1) ] @@ -187,7 +188,7 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'mdf-forge', u'mdf-forge Documentation', + (master_doc, 'mdf-forge', 'mdf-forge Documentation', author, 'mdf-forge', 'One line description of project.', 'Miscellaneous'), ] @@ -208,7 +209,7 @@ # see http://www.sphinx-doc.org/en/stable/ext/napoleon.html napoleon_google_docstring = True napoleon_numpy_docstring = True -napoleon_include_init_with_doc = False +napoleon_include_init_with_doc = True napoleon_include_private_with_doc = False napoleon_include_special_with_doc = True napoleon_use_admonition_for_examples = False diff --git a/docs/sphinx/source/example_list.rst b/docs/sphinx/source/example_list.rst new file mode 100644 index 0000000..a26207c --- /dev/null +++ b/docs/sphinx/source/example_list.rst @@ -0,0 +1,11 @@ +Forge Examples +============== + +.. toctree:: + :maxdepth: 1 + :titlesonly: + + +* `Example Aggregations `_ +* `Example Statistics - MDF Datasets `_ + diff --git a/docs/sphinx/source/examples b/docs/sphinx/source/examples new file mode 120000 index 0000000..d15735c --- /dev/null +++ b/docs/sphinx/source/examples @@ -0,0 +1 @@ +../../examples \ No newline at end of file diff --git a/docs/sphinx/source/forge_quickstart.rst b/docs/sphinx/source/forge_quickstart.rst deleted file mode 100644 index 1e10095..0000000 --- a/docs/sphinx/source/forge_quickstart.rst +++ /dev/null @@ -1,56 +0,0 @@ -Forge Quickstart Guide -====================== - -Install Forge -------------- - -The first step is to install Forge. Detailed instructions are available -in the :doc:`installation_guide`. The “pip” method is recommended. - -Import Forge ------------- - -Once Forge is installed, it is available for your Python scripts or -Jupyter notebooks. - -Import and start Forge like this: - -.. code-block:: python - - from mdf_forge.forge import Forge - # You don't have to use the name "mdf" but we do for consistency. - mdf = Forge() - -Log in ------- - -The first time you use Forge, you will have to log in through Globus -Auth. - -:: - - It looks like this is the first time you're accessing this client. - Please log in to Globus at this link: - https://auth.globus.org/v2/oauth2/.../ - - Copy and paste the authorization code here: ____________________ - Thanks! - -Forge caches your token, so after the first time you log in, you will -only have to redo the process when the token expires or you delete it. - -Search ------- - -Now you can access data in MDF. The simplest way to fetch data is to use -``search()``. - -.. code-block:: python - - mdf.search("DFT") - - ----- - -:doc:`MDF Forge Home ` - diff --git a/docs/sphinx/source/index.rst b/docs/sphinx/source/index.rst index f946132..57866c8 100644 --- a/docs/sphinx/source/index.rst +++ b/docs/sphinx/source/index.rst @@ -1,110 +1,14 @@ -.. highlight:: rst +Index +===== -MDF Forge -========= +.. toctree:: + :maxdepth: 1 + :titlesonly: -Forge is the Materials Data Facility Python package to interface and -leverage the MDF Data Discovery service. Forge allows users to perform -simple queries and facilitiates moving and synthesizing results. - -.. toctree:: - :titlesonly: - - forge_quickstart - -Installation -============ - -.. code-block:: bash - - pip install mdf_forge - -.. toctree:: - :titlesonly: - - installation_guide.rst - -For Developers --------------- - -.. image:: https://img.shields.io/pypi/v/mdf_forge.svg - :target: https://pypi.python.org/pypi/mdf-forge -.. image:: https://travis-ci.org/materials-data-facility/forge.svg?branch=master - :target: https://travis-ci.org/materials-data-facility/forge -.. image:: https://coveralls.io/repos/github/materials-data-facility/forge/badge.svg?branch=master - :target: https://coveralls.io/github/materials-data-facility/forge?branch=master - -.. code-block:: bash - - git clone https://github.com/materials-data-facility/forge.git - cd forge - pip install -e . - -.. toctree:: - :maxdepth: 2 - - requirements_link - mdf_forge - -Documentation and examples -========================== - -Documentation, including tutorials and examples, can be found in the -``docs`` directory. - -Tutorials ---------- - -#. Introduction (`Jupyter Notebook `_) -#. Core Query Builder Functions (`Jupyter Notebook `_) -#. Expanded Query Builder Functions (`Jupyter Notebook `_) -#. General Helper Functions (`Jupyter Notebook `_) -#. Field-Specific Helper Functions (`Jupyter Notebook `_) -#. Data Retrieval Functions (`Jupyter Notebook `_) - -Examples --------- - -#. Example Aggregations (`Jupyter Notebook `_) -#. Example Statistics - MDF Datasets (`Jupyter Notebook `_) - -Requirements -============ - -- Forge requires Python 2.7 or >=3.3 -- To access data in the MDF, you must have an account recognized by - Globus Auth (including Google, ORCiD, many academic institutions, or - a `free Globus ID`_). - -Contributions -============= - -If you find a bug or want a feature, feel free to open an issue here on -GitHub (and please tag it accordingly). If you want to contribute code -yourself, we’re more than happy to accept merge requests. - -Support -======= - -This work was performed under financial assistance award 70NANB14H012 -from U.S. Department of Commerce, National Institute of Standards and -Technology as part of the `Center for Hierarchical Material Design -(CHiMaD)`_. This work was also supported by the National Science -Foundation as part of the `Midwest Big Data Hub`_ under NSF Award -Number: 1636950 “BD Spokes: SPOKE: MIDWEST: Collaborative: Integrative -Materials Design (IMaD): Leverage, Innovate, and Disseminate”. - -.. _free Globus ID: https://www.globusid.org/create -.. _Center for Hierarchical Material Design (CHiMaD): http://chimad.northwestern.edu -.. _Midwest Big Data Hub: http://midwestbigdatahub.org - ----- - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` + MDF Forge Client + Tutorials + Examples +* `Sitemap `_ +.. mdinclude:: ../../../README.md diff --git a/docs/sphinx/source/installation_guide.rst b/docs/sphinx/source/installation_guide.rst deleted file mode 100644 index 8ef7766..0000000 --- a/docs/sphinx/source/installation_guide.rst +++ /dev/null @@ -1,44 +0,0 @@ -Forge Installation -================== - -Via PyPI --------- - -You will need to have Python 3 and pip installed for this method. - -Install Forge with ``pip``: - -.. code-block:: bash - - pip3 install -u mdf_forge - -Via GitHub ----------- - -This method is only recommended if you want to have the docs and -examples locally. - -You will need to have git, Python 3, and pip to install Forge via -GitHub. - -Start by cloning the GitHub repository: - -.. code-block:: bash - - git clone https://github.com/materials-data-facility/forge.git - -Then, enter the ``forge`` directory: - -.. code:: bash - - cd forge - -Finally, use ``pip`` to install Forge: - -.. code-block:: bash - - pip3 install -e . - ----- - -:doc:`MDF Forge Home ` \ No newline at end of file diff --git a/docs/sphinx/source/mdf_forge.rst b/docs/sphinx/source/mdf_forge.rst index 1f1427d..e2c3669 100644 --- a/docs/sphinx/source/mdf_forge.rst +++ b/docs/sphinx/source/mdf_forge.rst @@ -1,11 +1,10 @@ -mdf_forge package -================= +MDF Forge Client +================ +.. autoclass:: mdf_forge.Forge + :members: -.. toctree:: - :titlesonly: +*Advanced*: ``Query`` Class +=============================== +.. autoclass:: mdf_forge.forge.Query + :members: - modules - ----- - -:doc:`MDF Forge Home ` \ No newline at end of file diff --git a/docs/sphinx/source/modules.rst b/docs/sphinx/source/modules.rst deleted file mode 100644 index 10ef4a5..0000000 --- a/docs/sphinx/source/modules.rst +++ /dev/null @@ -1,15 +0,0 @@ - -forge.class.Forge ------------------ -.. autoclass:: forge.Forge - :members: - -forge.class.Query ------------------ - -.. autoclass:: forge.Query - :members: - ----- - -:doc:`MDF Forge Home ` \ No newline at end of file diff --git a/docs/sphinx/source/requirements_link.rst b/docs/sphinx/source/requirements_link.rst deleted file mode 100644 index aa7b3f1..0000000 --- a/docs/sphinx/source/requirements_link.rst +++ /dev/null @@ -1,8 +0,0 @@ -Package Requirements -==================== - -.. include:: ../../../requirements.txt - ----- - -:doc:`MDF Forge Home ` \ No newline at end of file diff --git a/docs/sphinx/source/tutorial_list.rst b/docs/sphinx/source/tutorial_list.rst new file mode 100644 index 0000000..8da5192 --- /dev/null +++ b/docs/sphinx/source/tutorial_list.rst @@ -0,0 +1,17 @@ +Forge Tutorials +=============== + +.. toctree:: + :maxdepth: 2 + :titlesonly: + + ABC + + +* `Part 1 - Introduction `_ +* `Part 2 - Core Query Builder Functions `_ +* `Part 3 - Expanded Query Builder Functions `_ +* `Part 4 - General Helper Functions `_ +* `Part 5 - Field-Specific Helper Functions `_ +* `Part 6 - Data Retrieval Functions `_ + diff --git a/docs/sphinx/source/tutorials b/docs/sphinx/source/tutorials new file mode 120000 index 0000000..8b8f5e2 --- /dev/null +++ b/docs/sphinx/source/tutorials @@ -0,0 +1 @@ +../../tutorials \ No newline at end of file diff --git a/docs/sphinx/sphinx_requirements.rst b/docs/sphinx/sphinx_requirements.rst deleted file mode 100644 index 5b72db4..0000000 --- a/docs/sphinx/sphinx_requirements.rst +++ /dev/null @@ -1,4 +0,0 @@ -Sphinx build and requirements ------------------------------ - -* sphinx \ No newline at end of file diff --git a/docs/tutorials/1 - Introduction.ipynb b/docs/tutorials/1-Introduction.ipynb similarity index 100% rename from docs/tutorials/1 - Introduction.ipynb rename to docs/tutorials/1-Introduction.ipynb diff --git a/docs/tutorials/2 - Core Query Builder Functions.ipynb b/docs/tutorials/2-Core_Query_Builder_Functions.ipynb similarity index 100% rename from docs/tutorials/2 - Core Query Builder Functions.ipynb rename to docs/tutorials/2-Core_Query_Builder_Functions.ipynb diff --git a/docs/tutorials/3 - Expanded Query Builder Functions.ipynb b/docs/tutorials/3-Expanded_Query_Builder_Functions.ipynb similarity index 100% rename from docs/tutorials/3 - Expanded Query Builder Functions.ipynb rename to docs/tutorials/3-Expanded_Query_Builder_Functions.ipynb diff --git a/docs/tutorials/4 - General Helper Functions.ipynb b/docs/tutorials/4-General_Helper_Functions.ipynb similarity index 100% rename from docs/tutorials/4 - General Helper Functions.ipynb rename to docs/tutorials/4-General_Helper_Functions.ipynb diff --git a/docs/tutorials/5 - Field-Specific Helper Functions.ipynb b/docs/tutorials/5-Field-Specific_Helper_Functions.ipynb similarity index 100% rename from docs/tutorials/5 - Field-Specific Helper Functions.ipynb rename to docs/tutorials/5-Field-Specific_Helper_Functions.ipynb diff --git a/docs/tutorials/6 - Data Retrieval Functions.ipynb b/docs/tutorials/6-Data_Retrieval_Functions.ipynb similarity index 100% rename from docs/tutorials/6 - Data Retrieval Functions.ipynb rename to docs/tutorials/6-Data_Retrieval_Functions.ipynb diff --git a/mdf_forge/forge.py b/mdf_forge/forge.py index 444ba94..2230567 100644 --- a/mdf_forge/forge.py +++ b/mdf_forge/forge.py @@ -17,14 +17,10 @@ class Forge: - """Fetch metadata from Globus Search and files from the Materials Data Facility. + """Forge fetches metadata and files from the Materials Data Facility. Forge is intended to be the best way to access MDF data for all users. An internal Query object is used to make queries. From the user's perspective, an instantiation of Forge will black-box searching. - - **Public Variables**: - * **local_ep** is the endpoint ID of the local Globus Connect Personal endpoint. - * **index** is the Globus Search index to be used. """ __default_index = "mdf" __auth_services = ["data_mdf", "transfer", "search", "petrel"] @@ -35,36 +31,42 @@ class Forge: def __init__(self, index=__default_index, local_ep=None, anonymous=False, clear_old_tokens=False, **kwargs): - """**Initialize the Forge instance.** + """Create an MDF Forge Client. - Args: - index (str): The Globus Search index to search on. Default "mdf". + Arguments: + index (str): The Search index to search on. **Default:** ``"mdf"``. local_ep (str): The endpoint ID of the local Globus Connect Personal endpoint. - If not provided, may be autodetected as possible. - anonymous (bool): If **True**, will not authenticate with Globus Auth. - If **False**, will require authentication. - clear_old_tokens (bool): If **True**, will force reauthentication - If **False**, will use existing tokens if possible. - - Keyword Args: - **Advanced users only.** - services (list of str): The services to authenticate for. - An empty list will disable authenticating with Toolbox. - _Advanced users only._ - clients (dict): Clients or authorizers to use instead of the defaults. + If needed but not provided, the local endpoint will be autodetected + if possible. + anonymous (bool): If ``True``, will not authenticate with Globus Auth. + If ``False``, will require authentication. + **Default:** ``False``. + + Caution: + Authentication is required for some Forge functionality, + including viewing private datasets and using Globus Transfer. + + clear_old_tokens (bool): If ``True``, will force reauthentication. + If ``False``, will use existing tokens if possible. + Has no effect if ``anonymous`` is ``True``. + **Default:** ``False``. + + Keyword Arguments: + services (list of str): *Advanced users only.* The services to authenticate with, + using Toolbox. An empty list will disable authenticating with Toolbox. + clients (dict): *Advanced users only.* Clients or authorizers to use instead + of the defaults. + Overwritable clients: - search (globus_sdk.SearchClient) - transfer (globus_sdk.TransferClient) - data_mdf (Authorizer for MDF NCSA endpoint) - petrel (Authorizer for MDF Petrel endpoint) + + * ``search`` (*globus_sdk.SearchClient*) + * ``transfer`` (*globus_sdk.TransferClient*) + * ``data_mdf`` (*GlobusAuthorizer* for MDF NCSA endpoint) + * ``petrel`` (*GlobusAuthorizer* for MDF Petrel endpoint) + The clients/authorizers must be properly authenticated. Forge will still attempt to authenticate with Toolbox in accordance with the services keyword argument. - _Advanced users only._ - - Note: - Authentication is required for some Forge functionality, - including using Globus Transfer. """ self.__anonymous = anonymous self.index = index @@ -93,40 +95,29 @@ def __init__(self, index=__default_index, local_ep=None, anonymous=False, self.__query = Query(self.__search_client) - @property - def search_client(self): - return self.__search_client - - @property - def transfer_client(self): - return self.__transfer_client - - @property - def mdf_authorizer(self): - return self.__data_mdf_authorizer - # *********************************************** # * Core functions # *********************************************** def match_field(self, field, value, required=True, new_group=False): - """Add a field:value term to the query. - Matches will have field == value. + """Add a ``field:value`` term to the query. + Matches will have the ``value`` in the ``field``. - Args: + Arguments: field (str): The field to check for the value. The field must be namespaced according to Elasticsearch rules using the dot syntax. - Ex. "mdf.source_name" is the "source_name" field of the "mdf" dictionary. + For example, ``"mdf.source_name"`` is the ``source_name`` field + of the ``mdf`` dictionary. value (str): The value to match. - required (bool): If **True**, will add term with AND. If **False**, will use OR. - Default **True**. - new_group (bool): If **True**, will separate term into new parenthetical group. - If **False**, will not. - Default **False**. + required (bool): If ``True``, will add term with ``AND``. + If ``False``, will use ``OR``. **Default:** ``True``. + new_group (bool): If ``True``, will separate the term into a new parenthetical group. + If ``False``, will not. + **Default:** ``False``. Returns: - self (Forge): For chaining. + Forge: Self """ # No-op on missing arguments if not field and not value: @@ -141,22 +132,22 @@ def match_field(self, field, value, required=True, new_group=False): return self def exclude_field(self, field, value, new_group=False): - """Exclude a field:value term from the query. - Matches will NOT have field == value. + """Exclude a ``field:value`` term from the query. + Matches will NOT have the ``value`` in the ``field``. - Args: + Arguments: field (str): The field to check for the value. The field must be namespaced according to Elasticsearch rules using the dot syntax. - - Ex. "mdf.source_name" is the "source_name" field of the "mdf" dictionary. + For example, ``"mdf.source_name"`` is the ``source_name`` field + of the ``mdf`` dictionary. value (str): The value to exclude. - new_group (bool): If **True**, will separate term into new parenthetical group. - If **False**, will not. - Default **False**. + new_group (bool): If ``True``, will separate term the into a new parenthetical group. + If ``False``, will not. + **Default:** ``False``. Returns: - self (Forge): For chaining. + Forge: Self """ # No-op on missing arguments if not field and not value: @@ -170,33 +161,33 @@ def exclude_field(self, field, value, new_group=False): def search(self, q=None, index=None, advanced=False, limit=SEARCH_LIMIT, info=False, reset_query=True): - """Execute a search and return the results. + """Execute a search and return the results, up to the ``SEARCH_LIMIT``. - Args: - q (str): The query to execute. Defaults to the current query, if any. + Arguments: + q (str): The query to execute. **Default:** The current helper-formed query, if any. There must be some query to execute. - index (str): The Globus Search index to search on. Defaults to the current index. - advanced (bool): If **True**, will submit query in "advanced" mode - to enable field matches. - If **False**, only basic fulltext term matches will be supported. - Default **False**. - This value will change to **True** automatically - if the query is built with helpers. + index (str): The Search index to search on. **Default:** The current index. + advanced (bool): If ``True``, will submit query in "advanced" mode + to enable field matches and other advanced features. + If ``False``, only basic fulltext term matches will be supported. + **Default:** ``False`` if no helpers have been used to build the query, or + ``True`` if helpers have been used. limit (int): The maximum number of results to return. - The max for this argument is the SEARCH_LIMIT imposed by Globus Search. - info (bool): If **False**, search will return a list of the results. - If **True**, search will return a tuple containing the results list + The max for this argument is the ``SEARCH_LIMIT`` imposed by Globus Search. + **Default:** ``SEARCH_LIMIT``. + info (bool): If ``False``, search will return a list of the results. + If ``True``, search will return a tuple containing the results list and other information about the query. - Default **False**. - reset_query (bool): If **True**, will destroy the query after execution + **Default:** ``False``. + reset_query (bool): If ``True``, will destroy the current query after execution and start a fresh one. - If **False**, keeps the current query alive. - Default **True**. + If ``False``, will keep the current query set. + **Default:** ``True``. Returns: - list (if info=False): The results. - Returns: - tuple (if info=True): The results, and a dictionary of query information. + If ``info`` is ``False``, *list*: The search results. + If ``info`` is ``True``, *tuple*: The search results, + and a dictionary of query information. """ if not index: index = self.index @@ -206,25 +197,26 @@ def search(self, q=None, index=None, advanced=False, limit=SEARCH_LIMIT, info=Fa return res def aggregate(self, q=None, index=None, scroll_size=SEARCH_LIMIT, reset_query=True): - """Perform an advanced query, and return all matching results. - Will automatically preform multiple queries in order to retrieve all results. + """Perform an advanced query, and return *all* matching results. + Will automatically perform multiple queries in order to retrieve all results. - Args: - q (str): The query to execute. Defaults to the current query, if any. - There must be some query to execute. - index (str): The Globus Search index to search on. Defaults to the current index. - scroll_size (int): Minimum number of records returned per query - reset_query (bool): - If **True**, will destroy the query after execution and start a fresh one. - If **False**, will keep the current query alive. - Default **True**. + Note: + All ``aggregate`` queries run in advanced mode, and ``info`` is not available. + Arguments: + q (str): The query to execute. **Default:** The current helper-formed query, if any. + There must be some query to execute. + index (str): The Search index to search on. **Default:** The current index. + scroll_size (int): Maximum number of records returned per query. Must be + between one and the ``SEARCH_LIMIT`` (inclusive). + **Default:** ``SEARCH_LIMIT``. + reset_query (bool): If ``True``, will destroy the current query after execution + and start a fresh one. + If ``False``, will keep the current query set. + **Default:** ``True``. Returns: - list of dict: All matching records - - Note: - All aggregate queries run in advanced mode. + list of dict: All matching records. """ if not index: index = self.index @@ -236,18 +228,22 @@ def aggregate(self, q=None, index=None, scroll_size=SEARCH_LIMIT, reset_query=Tr def show_fields(self, block=None, index=None): """Retrieve and return the mapping for the given metadata block. - Args: - block (str): The top-level field to fetch the mapping for. - Default **None**, which lists just the blocks. - index (str): The Globus Search index to map. Defaults to the current index. + Arguments: + block (str): The top-level field to fetch the mapping for (for example, ``"mdf"``), + or the special values ``"all"`` for everything or `None` for just the + top-level fields. + **Default:** ``None``. + index (str): The Search index to map. **Default:** The current index. Returns: - dict: A set of field:datatype pairs. + dict: ``field:datatype`` pairs. """ if not index: index = self.index mapping = self.__query.mapping(index=index) - if not block: + if block == "all": + return mapping + elif not block: blocks = set() for key in mapping.keys(): blocks.add(key.split(".")[0]) @@ -265,15 +261,16 @@ def current_query(self): """Return the current query string. Returns: - str: The current query string. + str: The current query. """ return self.__query.clean_query() def reset_query(self): """Destroy the current query and create a fresh one. + This method should not be chained. Returns: - None: Does not return self because this method should not be chained. + None """ del self.__query self.__query = Query(self.__search_client) @@ -283,108 +280,157 @@ def reset_query(self): # * Expanded functions # *********************************************** - def match_range(self, field, start="*", stop="*", inclusive=True, + def exists(self, field, required=True, new_group=False): + """Require a field to exist in the results. + Matches will have some value in ``field``. + + Arguments: + field (str): The field to check. + The field must be namespaced according to Elasticsearch rules + using the dot syntax. + For example, ``"mdf.source_name"`` is the ``source_name`` field + of the ``mdf`` dictionary. + required (bool): If ``True``, will add term with ``AND``. + If ``False``, will use ``OR``. **Default:** ``True``. + new_group (bool): If ``True``, will separate the term into a new parenthetical group. + If ``False``, will not. + **Default:** ``False``. + + Returns: + Forge: Self + """ + return self.match_field(field, "*", required=required, new_group=new_group) + + def not_exists(self, field, required=True, new_group=False): + """Require a field to not exist in the results. + Matches will not have ``field`` present. + + Arguments: + field (str): The field to check. + The field must be namespaced according to Elasticsearch rules + using the dot syntax. + For example, ``"mdf.source_name"`` is the ``source_name`` field + of the ``mdf`` dictionary. + required (bool): If ``True``, will add term with ``AND``. + If ``False``, will use ``OR``. **Default:** ``True``. + new_group (bool): If ``True``, will separate the term into a new parenthetical group. + If ``False``, will not. + **Default:** ``False``. + + Returns: + Forge: Self + """ + return self.exclude_field(field, "*", required=required, new_group=new_group) + + def match_range(self, field, start=None, stop=None, inclusive=True, required=True, new_group=False): - """Add a field:[some range] term to the query. - Matches will have field == value in range. + """Add a ``field:[some range]`` term to the query. + Matches will have a ``value`` in the range in the ``field``. - Args: + Arguments: field (str): The field to check for the value. - The field must be namespaced according to Elasticsearch rules using - the dot syntax. - Ex. "mdf.source_name" is the "source_name" field of the "mdf" dictionary. - start (str or int): The starting value. "*" is acceptable to make no lower bound. - stop (str or int): The ending value. "*" is acceptable to have no upper bound. - inclusive (bool): If **True**, the start and stop values will be included + The field must be namespaced according to Elasticsearch rules + using the dot syntax. + For example, ``"mdf.source_name"`` is the ``source_name`` field + of the ``mdf`` dictionary. + start (str or int): The starting value, or ``None`` for no lower bound. + **Default:** ``None``. + stop (str or int): The ending value, or ``None`` for no upper bound. + **Default:** ``None``. + inclusive (bool): If ``True``, the ``start`` and ``stop`` values will be included in the search. - If **False**, the start and stop values will not be included + If ``False``, the start and stop values will not be included in the search. - required (bool): If **True**, will add term with AND. If **False**, will use OR. - Default **True**. - new_group (bool): If **True**, will separate term into new parenthetical group. - If **False**, will not. - Default **False**. + **Default:** ``True``. + required (bool): If ``True``, will add term with ``AND``. + If ``False``, will use ``OR``. **Default:** ``True``. + new_group (bool): If ``True``, will separate the term into a new parenthetical group. + If ``False``, will not. + **Default:** ``False``. Returns: - self (Forge): For chaining. + Forge: Self """ # Accept None as * if start is None: start = "*" if stop is None: stop = "*" - # No-op on *-* + # *-* is the same as field exists if start == "*" and stop == "*": - return self + return self.exists(field, required=required, new_group=new_group) if inclusive: value = "[" + str(start) + " TO " + str(stop) + "]" else: value = "{" + str(start) + " TO " + str(stop) + "}" - self.match_field(field, value, required=required, new_group=new_group) - return self + return self.match_field(field, value, required=required, new_group=new_group) def exclude_range(self, field, start="*", stop="*", inclusive=True, required=True, new_group=False): - """Exclude a field:[some range] term to the query. - Matches will have field != values in range. + """Exclude a ``field:[some range]`` term from the query. + Matches will not have any ``value`` in the range in the ``field``. - Args: + Arguments: field (str): The field to check for the value. - The field must be namespaced according to Elasticsearch rules using - the dot syntax. - Ex. "mdf.source_name" is the "source_name" field of the "mdf" dictionary. - start (str or int): The starting value. "*" is acceptable to make no lower bound. - stop (str or int): The ending value. "*" is acceptable to have no upper bound. - inclusive (bool): If **True**, the start and stop values will not be included - in the search. - If **False**, the start and stop values will be included in the search. - required (bool): Default **True**. - new_group (bool): If **True**, will separate term into new parenthetical group. - If **False**, will not. - Default **False**. + The field must be namespaced according to Elasticsearch rules + using the dot syntax. + For example, ``"mdf.source_name"`` is the ``source_name`` field + of the ``mdf`` dictionary. + start (str or int): The starting value, or ``None`` for no lower bound. + **Default:** ``None``. + stop (str or int): The ending value, or ``None`` for no upper bound. + **Default:** ``None``. + inclusive (bool): If ``True``, the ``start`` and ``stop`` values will be excluded + from the search. + If ``False``, the ``start`` and ``stop`` values will not be excluded + from the search. + **Default:** ``True``. + required (bool): If ``True``, will add term with ``AND``. + If ``False``, will use ``OR``. **Default:** ``True``. + new_group (bool): If ``True``, will separate the term into a new parenthetical group. + If ``False``, will not. + **Default:** ``False``. Returns: - self (Forge): For chaining. + Forge: Self """ # Accept None as * if start is None: start = "*" if stop is None: stop = "*" - # No-op on *-* + # *-* is the same as field doesn't exist if start == "*" and stop == "*": - return self + return self.not_exists(field, required=required, new_group=new_group) if inclusive: value = "[" + str(start) + " TO " + str(stop) + "]" else: value = "{" + str(start) + " TO " + str(stop) + "}" - self.exclude_field(field, value, new_group=new_group) - return self + return self.exclude_field(field, value, new_group=new_group) # *********************************************** - # * Helper functions + # * Specific functions # *********************************************** def exclusive_match(self, field, value): - """Match exactly the given value, with no other data in the field. + """Match exactly the given value(s), with no other data in the field. - Args: + Arguments: field (str): The field to check for the value. The field must be namespaced according to Elasticsearch rules using the dot syntax. - - Ex. "mdf.source_name" is the "source_name" field of the "mdf" - dictionary. - value (str or list of str): The value to match exactly. + For example, ``"mdf.source_name"`` is the ``source_name`` field + of the ``mdf`` dictionary. + value (str or list of str): The value(s) to match exactly. Returns: - self (Forge): For chaining + Forge: Self """ if isinstance(value, str): value = [value] - value.sort() + # Hacky way to get ES to do exclusive search # Essentially have a big range search that matches NOT anything # Except for the actual values @@ -392,6 +438,7 @@ def exclusive_match(self, field, value): # (NOT {* TO foo} AND [foo TO foo] AND NOT {foo to bar} AND [bar TO bar] # AND NOT {bar TO baz} AND [baz TO baz] AND NOT {baz TO *}) # Except it must be sorted to not overlap + value.sort() # Start with removing everything before first value self.exclude_range(field, "*", value[0], inclusive=False, new_group=True) @@ -409,11 +456,11 @@ def exclusive_match(self, field, value): def match_source_names(self, source_names): """Add sources to match to the query. - Args: - source_names (str or list of str): The source_names to match. + Arguments: + source_names (str or list of str): The ``source_name`` values to match. Returns: - self (Forge): For chaining. + Forge: Self """ # If no source_names are supplied, nothing to match if not source_names: @@ -429,13 +476,13 @@ def match_source_names(self, source_names): return self def match_ids(self, mdf_ids): - """Match all the IDs in the given mdf_id list. + """Match all the IDs in the given ``mdf_id`` list. - Args: + Arguments: mdf_ids (str or list of str): The IDs to match. Returns: - self (Forge): For chaining. + Forge: Self """ # If no IDs are supplied, nothing to match if not mdf_ids: @@ -452,14 +499,14 @@ def match_ids(self, mdf_ids): def match_elements(self, elements, match_all=True): """Add elemental abbreviations to the query. - Args: - elements (str or list of str): The elements to match. - match_all (bool): If **True**, will add with AND. - If **False**, will use OR. - Default **True**. + Arguments: + elements (str or list of str): The elements to match. For example, `"Fe"` for iron. + match_all (bool): If ``True``, will add with ``AND``. + If ``False``, will use ``OR``. + Default ``True``. Returns: - self (Forge): For chaining. + Forge: Self """ # If no elements are supplied, nothing to match if not elements: @@ -478,11 +525,11 @@ def match_elements(self, elements, match_all=True): def match_titles(self, titles): """Add titles to the query. - Args: + Arguments: titles (str or list of str): The titles to match. Returns: - self (Forge): For chaining. + Forge: Self """ if not titles: return self @@ -497,16 +544,16 @@ def match_titles(self, titles): def match_years(self, years=None, start=None, stop=None, inclusive=True): """Add years and limits to the query. - Args: - years (int or string, or list of int or strings): The years to match. + Arguments: + years (int or string, or list of int or strings): The years to match. Note that this argument overrides the start, stop, and inclusive arguments. - start (int or string): The lower range of years to match. - stop (int or string): The upper range of years to match. - inclusive (bool): If **True**, the start and stop values will be included in the search. - If **False**, they will be excluded. - Default **True**. + start (int or string): The lower range of years to match. + stop (int or string): The upper range of years to match. + inclusive (bool): If ``True``, the start and stop values will be included + in the search. If ``False``, they will be excluded. + **Default:** ``True``. Returns: - self (Forge): For chaining. + Forge: Self """ # If nothing supplied, nothing to match if years is None and start is None and stop is None: @@ -551,11 +598,11 @@ def match_years(self, years=None, start=None, stop=None, inclusive=True): def match_resource_types(self, types): """Match the given resource types. - Args: - types (str or list of str): The resource_types to match. + Arguments: + types (str or list of str): The ``resource_type`` values to match. Returns: - self (Forge): For chaining. + Forge: Self """ # If no types, nothing to match if not types: @@ -574,14 +621,14 @@ def match_repositories(self, repositories, match_all=True): Repositories are MDF-identified collections of datasets from a group or organization. - Args: + Arguments: repositories (str or list of str): The repositories to match. - match_all (bool): If **True**, will add with AND. - If **False**, will use OR. - Default **True**. + match_all (bool): If ``True``, will add with ``AND``. + If ``False``, will use ``OR``. + **Default:** ``True``. Returns: - self (Forge): For chaining. + Forge: Self """ # If no repos, nothing to match if not repositories: @@ -603,31 +650,32 @@ def match_repositories(self, repositories, match_all=True): def search_by_elements(self, elements, source_names=[], index=None, limit=None, match_all=True, info=False): """Execute a search for the given elements in the given sources. - search_by_elements([x], [y]) is equivalent to - match_elements([x]).match_source_names([y]).search() - Note that this method does use terms from the current query. - - Args: - elements (list of str): The elements to match. Default **[]**. - source_names (list of str): The sources to match. Default **[]**. - index (str): The Globus Search index to search on. Defaults to the current index. + ``search_by_elements([x], [y])`` is equivalent to + ``match_elements([x]).match_source_names([y]).search()``. + + Note: + This method will use terms from the current query, and resets the current query. + + Arguments: + elements (list of str): The elements to match. For example, `"Fe"` for iron. + source_names (list of str): The ``source_name``s to match. + **Default:** ``[]``. + index (str): The Search index to search on. **Default:** The current index. limit (int): The maximum number of results to return. - The max for this argument is the SEARCH_LIMIT imposed by Globus Search. - match_all (bool): If **True**, will add elements with AND. - If **False**, will use OR. - Default **True**. - info (bool): If **False**, search will return a list of the results. - If **True**, search will return a tuple containing the results list, + The max for this argument is the ``SEARCH_LIMIT`` imposed by Globus Search. + **Default:** ``SEARCH_LIMIT``. + match_all (bool): If ``True``, will add elements with ``AND``. + If ``False``, will use ``OR``. + **Default:** ``True``. + info (bool): If ``False``, search will return a list of the results. + If ``True``, search will return a tuple containing the results list and other information about the query. - Default **False**. + **Default:** ``False``. Returns: - list (if info=False): The results. - Returns: - tuple (if info=True): The results, and a dictionary of query information. - - Note: - This method does use terms from the current query. + If ``info`` is ``False``, *list*: The search results. + If ``info`` is ``True``, *tuple*: The search results, + and a dictionary of query information. """ return (self.match_elements(elements, match_all=match_all) .match_source_names(source_names) @@ -635,36 +683,48 @@ def search_by_elements(self, elements, source_names=[], index=None, limit=None, def search_by_titles(self, titles, index=None, limit=None, info=False): """Execute a search for the given titles. - search_by_titles([x]) is equivalent to match_titles([x]).search() + ``search_by_titles([x])`` is equivalent to ``match_titles([x]).search()`` - Args: - titles (list of str): The titles to match. Default []. - index (str): The Globus Search index to search on. Defaults to the current index. + Note: + This method will use terms from the current query, and resets the current query. + + Arguments: + titles (list of str): The titles to match. + index (str): The Search index to search on. **Default:** The current index. limit (int): The maximum number of results to return. - The max for this argument is the SEARCH_LIMIT imposed by Globus Search. - info (bool): If **False**, search will return a list of the results. - If **True**, search will return a tuple containing the results list, + The max for this argument is the ``SEARCH_LIMIT`` imposed by Globus Search. + **Default:** ``SEARCH_LIMIT``. + info (bool): If ``False``, search will return a list of the results. + If ``True``, search will return a tuple containing the results list and other information about the query. - Default **False**. + **Default:** ``False``. Returns: - list (if info=False): The results. - Returns: - tuple (if info=True): The results, and a dictionary of query information. + If ``info`` is ``False``, *list*: The search results. + If ``info`` is ``True``, *tuple*: The search results, + and a dictionary of query information. """ return self.match_titles(titles).search(index=index, limit=limit, info=info) def aggregate_sources(self, source_names, index=None): - """Aggregate all records from a given source. + """Aggregate all records with the given ``source_name`` values. There is no limit to the number of results returned. Please beware of aggregating very large datasets. - Args: - source_names (str or list of str): The source to aggregate. - index (str): The Globus Search index to search on. Defaults to the current index. + Caution: + It is recommended that you check how many entries will be returned from your chosen + datasets by running ``match_source_names(source_names).search(limit=0, info=True)`` + before using ``aggregate_sources()``. + + Note: + This method will use terms from the current query, and resets the current query. + + Arguments: + source_names (str or list of str): The ``source_name`` values to aggregate. + index (str): The Search index to search on. **Default:** The current index. Returns: - list of dict: All of the records from the source. + list of dict: All of the entries from the ``source_name`` matches. """ return self.match_source_names(source_names).aggregate(index=index) @@ -672,23 +732,23 @@ def fetch_datasets_from_results(self, entries=None, query=None, reset_query=True """Retrieve the dataset entries for given records. Note that this method may use the current query. - Args: + Note: + This method will use terms from the current query, and resets the current query. + + Arguments: entries (dict, list of dict, or tuple of dict): The records to parse - to find the datasets. - entries can be a single entry, a list of entries, or a tuple with - a list of entries. - The latter two options support both return values - of the search() method. - If entries is **None**, the current query is executed and those - results are used instead. - query (str): If entries is **None**: - Search using this query instead of the current query. - Default **None**, which uses the current query. - reset_query (bool): If entries is **None** and query is **None**: - If **True**, will reset the current query after searching. - If **False**, will leave the current query in memory. - Default **True**. - Else: Does nothing. + to find the datasets. This argument can be a single entry, + a list of entries, or a tuple with a list of entries. + The latter two options support both return values of the ``search()`` method. + If entries is ``None``, the current query is executed and those + results are used instead. **Default:** ``None``. + query (str): If not ``None``, search for entries using this query + instead of the current query. Has no effect if ``entries`` is not ``None``. + **Default:** ``None``. + reset_query (bool): Has no effect unless ``entries`` and ``query`` are both ``None``. + If ``True``, will reset the current query after searching for entries. + If ``False``, will not reset the current query. + **Default:** ``True``. Returns: list: The dataset entries. @@ -719,10 +779,10 @@ def get_dataset_version(self, source_name): """Get the version of a certain dataset. Arguments: - source_name (string): Name of the dataset + source_name (string): The ``source_name`` of the dataset. Returns: - int: Version of the dataset in question + int: Version of the dataset in question. """ hits = self.search("mdf.source_name:{} AND" @@ -744,27 +804,29 @@ def get_dataset_version(self, source_name): def http_download(self, results, dest=".", preserve_dir=False, verbose=True): """Download data files from the provided results using HTTPS. - For more than HTTP_NUM_LIMIT (defined above) files, you should use globus_download(), + For a large number of files, you should use ``globus_download()`` instead, which uses Globus Transfer. - Args: + Arguments: results (dict): The records from which files should be fetched. This should be the return value of a search method. dest (str): The destination path for the data files on the local machine. - Default current directory. - preserve_dir (bool): If **True**, the directory structure for the data files will be + **Default:** The current directory. + preserve_dir (bool): If ``True``, the directory structure for the data files will be recreated at the destination. - If **False**, only the data files themselves will be saved. - Default **False**. - verbose (bool): If **True**, status and progress messages will be printed. - If **False**, only error messages will be printed. - Default **True**. + If ``False``, only the data files themselves will be saved. + **Default:** ``False``. + verbose (bool): If ``True``, status and progress messages will be printed. + If ``False``, only error messages will be printed. + **Default:** ``True``. Returns: - dict: success (bool): **True** if the operation succeeded. - **False** if it failed (implies message). - Returns: - message (str): The error message. Not present when success is **True**. + *dict*: The status information for the download: + + * **success** (*bool*): ``True`` if the download succeeded. ``False`` + if it failed. + * **message** (*str*): The error message, if the download failed. + """ if self.__anonymous: print("Error: Anonymous HTTP download not yet supported.") @@ -866,36 +928,38 @@ def globus_download(self, results, dest=".", dest_ep=None, preserve_dir=False, """Download data files from the provided results using Globus Transfer. This method requires Globus Connect to be installed on the destination endpoint. - Args: + Arguments: results (dict): The records from which files should be fetched. This should be the return value of a search method. dest (str): The destination path for the data files on the local machine. - Default current directory. - dest_ep (str): The destination endpoint ID. - Default local GCP. - preserve_dir (bool): If **True**, the directory structure for the data files will be + **Default:** The current directory. + dest_ep (str): The destination endpoint ID. **Default:** The autodetected local GCP. + preserve_dir (bool): If ``True``, the directory structure for the data files will be recreated at the destination. The path to the new files - will be relative to the `dest` path - If **False**, only the data files themselves will be saved. - Default **False**. + will be relative to the ``dest`` path + If ``False``, only the data files themselves will be saved. + **Default:** ``False``. inactivity_time (int): Number of seconds the Transfer is allowed to go without progress before being cancelled. - Default **self.__inactivity_time**. - download_datasets (bool): If True, will download the full dataset for any dataset + **Default:** ``self.__inactivity_time``. + download_datasets (bool): If ``True``, will download the full dataset for any dataset entries given. - If False, will skip dataset entries with a notification. - Default False. - Caution: Datasets can be large. Additionally, if you do not - filter out records from a dataset you provide, you may end - up with duplicate files. Use with care. - verbose (bool): If **True**, status and progress messages will be printed, + If ``False``, will skip dataset entries with a notification. + **Default:** ``False``. + + Caution: + Datasets can be large. Additionally, if you do not + filter out records from a dataset you provide, you may end + up with duplicate files. Use with care. + + verbose (bool): If ``True``, status and progress messages will be printed, and errors will prompt for continuation confirmation. - If **False**, only error messages will be printed, + If ``False``, only error messages will be printed, and the Transfer will always continue. - Default **True**. + **Default:** ``True``. Returns: - list of str: task IDs of the Globus transfers + list of str: The task IDs of the Globus transfers. """ if self.__anonymous: print("Error: Anonymous Globus Transfer not supported.") @@ -1035,15 +1099,15 @@ def globus_download(self, results, dest=".", dest_ep=None, preserve_dir=False, def http_stream(self, results, verbose=True): """Yield data files from the provided results using HTTPS, through a generator. - For more than HTTP_NUM_LIMIT (defined above) files, you should use globus_download(), + For a large number of files, you should use ``globus_download()`` instead, which uses Globus Transfer. - Args: + Arguments: results (dict): The records from which files should be fetched. This should be the return value of a search method. - verbose (bool): If **True**, status and progress messages will be printed. - If **False**, only error messages will be printed. - Default **True**. + verbose (bool): If ``True``, status and progress messages will be printed. + If ``False``, only error messages will be printed. + **Default:** ``True``. Yields: str: Text of each data file. @@ -1104,28 +1168,32 @@ def http_stream(self, results, verbose=True): class Query: - """The Query class is meant for internal Forge use. Users should not instantiate - a Query object directly, as Forge already manages a Query, - but advanced users may do so at their own risk. - Using Query directly is an unsupported behavior - and may have unexpected results or unlisted changes in the future. - - Queries may end up wrapped in parentheses, which has no direct effect on the search. - Adding terms must be chained with .and() or .or(). - Terms will not have spaces in between otherwise, and it is desirable to be explicit about - which terms are required. + """ + Danger: + The ``Query`` class is meant for internal ``Forge`` use. General users should not + instantiate a ``Query`` object directly, but advanced users may do so at their own risk. + + Using a ``Query`` directly is an officially unsupported behavior + and may be subject to breaking, unlisted changes in the future. + + Notes: + Query strings may end up wrapped in parentheses, which has no direct effect on the search. + Adding terms must be chained with ``and()`` or ``or()``. + Terms will not have spaces in between otherwise, and it is desirable to be explicit about + which terms are required. """ def __init__(self, search_client, q=None, limit=None, advanced=False): - """**Initialize the Query instance**. + """Create a Query object. - Args: - search_client (SearchClient): The Globus Search client to use for searching. - q (str): The query string to start with. Default nothing. - limit (int): The maximum number of results to return. Default **None**. - advanced (bool): If **True**, will submit query in "advanced" mode to + Arguments: + search_client (globus_sdk.SearchClient): The Globus Search client to use for searching. + q (str): The query string to start with. **Default:** Not set. + limit (int): The maximum number of results to return. **Default:** Not set. + advanced (bool): If ``True``, will submit query in "advanced" mode to enable field matches. - If **False**, only basic fulltext term matches will be supported. - Default **False**. + If ``False``, only basic fulltext term matches will be supported (unless + ``advanced`` is set after instantiation). + **Default:** ``False``. """ self.__search_client = search_client self.query = q or "(" @@ -1136,8 +1204,11 @@ def __init__(self, search_client, q=None, limit=None, advanced=False): self.initialized = not self.query == "(" def __clean_query_string(self, q): - """Clean up a query string. + """Clean up a query string for searching. This method does not access self, so that a search will not change state. + + Returns: + str: The clean query string. """ q = q.replace("()", "").strip() if q.endswith("("): @@ -1157,7 +1228,7 @@ def __clean_query_string(self, q): return q.strip() def clean_query(self): - """Returns the current query, cleaned for user consumption, + """Returns the current query, cleaned for user consumption. Returns: str: The clean current query. @@ -1167,27 +1238,29 @@ def clean_query(self): def term(self, term): """Add a term to the query. - Args: + Arguments: term (str): The term to add. Returns: - self (Query): For chaining. + Query: Self """ self.query += term self.initialized = True return self def field(self, field, value): - """Add a field:value term to the query. - Matches will have field == value. - This method sets advanced=True. + """Add a ``field:value`` term to the query. + Matches will have the ``value`` in the ``field``. - Args: - field (str): The field to look in for the value. + Note: + This method triggers advanced mode. + + Arguments: + field (str): The field to check for the value, in Elasticsearch dot syntax. value (str): The value to match. Returns: - self (Query): For chaining. + Query: Self """ # Cannot add field:value if one is blank if field and value: @@ -1198,20 +1271,22 @@ def field(self, field, value): return self def operator(self, op, close_group=False): - """Add operator between terms. + """Add an operator between terms. There must be a term added before using this method. + All operators have helpers, so this method is usually not necessary to directly invoke. - Args: - op (str): The operator to add. Must be in the OP_LIST defined below. - close_group (bool): If **True**, will end the current parenthetical + Arguments: + op (str): The operator to add. Must be in the OP_LIST. + close_group (bool): If ``True``, will end the current parenthetical group and start a new one. - If **False**, will continue current group. + If ``False``, will continue current group. - Example: "(foo AND bar)" is one group. - "(foo) and (bar)" is two groups. + Example:: + "(foo AND bar)" is one group. + "(foo) AND (bar)" is two groups. Returns: - self (Query): For chaining. + Query: Self """ # List of allowed operators OP_LIST = ["AND", "OR", "NOT"] @@ -1230,18 +1305,18 @@ def and_join(self, close_group=False): """Combine terms with AND. There must be a term added before using this method. - Args: - close_group (bool): If **True**, will end the current group and start a new one. - If **False**, will continue current group. - - Example: If the current query is "(term1" + Arguments: + close_group (bool): If ``True``, will end the current group and start a new one. + If ``False``, will continue current group. - .and(close_group=True) => "(term1) AND (" + Example:: - .and(close_group=False) => "(term1 AND " + If the current query is "(term1" + .and(close_group=True) => "(term1) AND (" + .and(close_group=False) => "(term1 AND " Returns: - self (Query): For chaining. + Query: Self """ if not self.initialized: print("Error: You must add a term before adding an operator.", @@ -1254,18 +1329,18 @@ def or_join(self, close_group=False): """Combine terms with OR. There must be a term added before using this method. - Args: - close_group (bool): If **True**, will end the current group and start a new one. - If **False**, will continue current group. - - Example: If the current query is "(term1" - - .or(close_group=True) => "(term1) OR(" + Arguments: + close_group (bool): If ``True``, will end the current group and start a new one. + If ``False``, will continue current group. - .or(close_group=False) => "(term1 OR " + Example: + + If the current query is "(term1" + .or(close_group=True) => "(term1) OR(" + .or(close_group=False) => "(term1 OR " Returns: - self (Query): For chaining. + Query: Self """ if not self.initialized: print("Error: You must add a term before adding an operator.", @@ -1275,38 +1350,41 @@ def or_join(self, close_group=False): return self def negate(self): - """Negates the next term with NOT.""" + """Negates the next added term with NOT. + + Returns: + Query: Self + """ self.operator("NOT") return self def search(self, q=None, index=None, advanced=None, limit=None, info=False, retries=3): - """Execute a search and return the results. + """Execute a search and return the results, up to the ``SEARCH_LIMIT``. - Args: - q (str): The query to execute. Defaults to the current query, if any. + Arguments: + q (str): The query to execute. **Default:** The current helper-formed query, if any. There must be some query to execute. - index (str): The Globus Search index to search on. Required. - advanced (bool): If **True**, will submit query in "advanced" mode to enable - field matches. - If **False**, only basic fulltext term matches will be supported. - Default **False**. - This value will change to True automatically if - the query is built with helpers. + index (str): The Search index to search on. **Required**. + advanced (bool): If ``True``, will submit query in "advanced" mode + to enable field matches and other advanced features. + If ``False``, only basic fulltext term matches will be supported. + **Default:** ``False`` if no helpers have been used to build the query, or + ``True`` if helpers have been used. limit (int): The maximum number of results to return. - The max for this argument is the SEARCH_LIMIT imposed by Globus Search. - The default for advanced-mode queries is SEARCH_LIMIT. - The default for non-advanced queries is NONADVANCED_LIMIT. - info (bool): If **False**, search will return a list of the results. - If **True**, search will return a tuple containing the results list - and other information about the query. - Default **False**. + The max for this argument is the ``SEARCH_LIMIT`` imposed by Globus Search. + **Default:** ``SEARCH_LIMIT`` for advanced-mode queries, + ``NONADVANCED_LIMIT`` for limited-mode queries. + info (bool): If ``False``, search will return a list of the results. + If ``True``, search will return a tuple containing the results list + and other information about the query. + **Default:** ``False``. retries (int): The number of times to retry a Search query if it fails. - Default 3. + **Default:** 3. Returns: - list (if info=False): The results. - Returns: - tuple (if info=True): The results, and a dictionary of query information. + If ``info`` is ``False``, *list*: The search results. + If ``info`` is ``True``, *tuple*: The search results, + and a dictionary of query information. """ if q is None: @@ -1364,21 +1442,24 @@ def search(self, q=None, index=None, advanced=None, limit=None, info=False, retr return res def aggregate(self, q=None, index=None, retries=1, scroll_size=SEARCH_LIMIT): - """Gather all results that match a specific query + """Perform an advanced query, and return *all* matching results. + Will automatically perform multiple queries in order to retrieve all results. - Args: - q (str): The query to execute. Defaults to the current query, if any. + Note: + All ``aggregate`` queries run in advanced mode, and ``info`` is not available. + + Arguments: + q (str): The query to execute. The current helper-formed query, if any. There must be some query to execute. - index (str): The Globus Search index to search on. Required. + index (str): The Search index to search on. Required. retries (int): The number of times to retry a Search query if it fails. - Default 1. - scroll_size (int): Maximum number of records requested per request. + **Default:** 1. + scroll_size (int): Maximum number of records returned per query. Must be + between one and the ``SEARCH_LIMIT`` (inclusive). + **Default:** ``SEARCH_LIMIT``. Returns: - list of dict: All matching records. - - Note: - All aggregate queries run in advanced mode. + list of dict: All matching entries. """ if q is None: q = self.query @@ -1388,6 +1469,8 @@ def aggregate(self, q=None, index=None, retries=1, scroll_size=SEARCH_LIMIT): if index is None: print("Error: No index specified") return [] + if scroll_size <= 0: + scroll_size = 1 q = self.__clean_query_string(q) @@ -1436,9 +1519,9 @@ def aggregate(self, q=None, index=None, retries=1, scroll_size=SEARCH_LIMIT): return output def mapping(self, index): - """Fetch the mapping for the specified index. + """Fetch the entire mapping for the specified index. - Args: + Arguments: index (str): The index to map. Returns: diff --git a/test_requirements.txt b/test_requirements.txt index 0094c62..357f87a 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,7 +1,10 @@ coveralls>=1.2.0 flake8>=3.5.0 jupyter>=1.0.0 +m2r>=0.2.1 +nbsphinx>=0.4.1 nbval>=0.9.0 pandas>=0.22.0 pytest>=3.4.1 pytest-cov>=2.5.1 +sphinx_bootstrap_theme>=0.6.5 From f608ef7e92553b262491c50e405ec3bbedb8f3c8 Mon Sep 17 00:00:00 2001 From: jgaff Date: Wed, 19 Dec 2018 10:01:30 -0600 Subject: [PATCH 3/5] Rework/fix Jupyter notebook tutorials in Sphinx --- docs/examples/Example_Aggregations.ipynb | 13 ++++++++++--- .../Example_Statistics-MDF_Datasets.ipynb | 5 +++-- docs/sphinx/source/conf.py | 5 +---- docs/sphinx/source/example_list.rst | 7 ++----- docs/sphinx/source/index.rst | 5 +++-- docs/sphinx/source/tutorial_list.rst | 17 ++++++----------- docs/tutorials/1-Introduction.ipynb | 11 +++++++++-- .../2-Core_Query_Builder_Functions.ipynb | 9 ++++++++- .../3-Expanded_Query_Builder_Functions.ipynb | 9 ++++++++- docs/tutorials/4-General_Helper_Functions.ipynb | 13 ++++++++++--- .../5-Field-Specific_Helper_Functions.ipynb | 11 +++++++++-- docs/tutorials/6-Data_Retrieval_Functions.ipynb | 9 ++++++++- mdf_forge/forge.py | 6 ++---- 13 files changed, 79 insertions(+), 41 deletions(-) diff --git a/docs/examples/Example_Aggregations.ipynb b/docs/examples/Example_Aggregations.ipynb index 814b4b0..ca1aee7 100644 --- a/docs/examples/Example_Aggregations.ipynb +++ b/docs/examples/Example_Aggregations.ipynb @@ -4,7 +4,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Aggregating data with MDF" + "# Example Aggregations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aggregating data with MDF" ] }, { @@ -37,7 +44,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## aggregate_source - NIST XPS DB\n", + "### aggregate_source - NIST XPS DB\n", "Example: We want to collect all records from the NIST XPS Database and analyze the binding energies. This database has almost 30,000 records, so we have to use `aggregate()`." ] }, @@ -152,7 +159,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## aggregate - Multiple Datasets\n", + "### aggregate - Multiple Datasets\n", "Example: We want to analyze how often elements are studied with Gallium (Ga), and what the most frequent elemental pairing is. There are more than 10,000 records containing Gallium data." ] }, diff --git a/docs/examples/Example_Statistics-MDF_Datasets.ipynb b/docs/examples/Example_Statistics-MDF_Datasets.ipynb index af5bd42..e6e92f7 100644 --- a/docs/examples/Example_Statistics-MDF_Datasets.ipynb +++ b/docs/examples/Example_Statistics-MDF_Datasets.ipynb @@ -4,10 +4,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Example Statistics\n", + "# Example Statistics - MDF Datasets\n", "Example: We want to know how many datasets are in MDF and which datasets have the most records.\n", "\n", - "## Note: This example is not kept up-to-date with the latest statistics.\n", + "**Note: This example is not kept up-to-date with the latest statistics.**\n", + "\n", "If you want the current MDF statistics, you must run this code yourself." ] }, diff --git a/docs/sphinx/source/conf.py b/docs/sphinx/source/conf.py index e8054b9..87fc39d 100644 --- a/docs/sphinx/source/conf.py +++ b/docs/sphinx/source/conf.py @@ -40,11 +40,8 @@ 'sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.viewcode', -# 'sphinx.ext.intersphinx', -# 'sphinx.ext.ifconfig', 'sphinx.ext.napoleon', 'sphinx.ext.mathjax', -# 'IPython.sphinxext.ipython_console_highlighting', 'm2r', 'nbsphinx' ] @@ -68,7 +65,7 @@ # '.md': CommonMarkParser, # } -source_suffix = ['.rst', '.md', 'ipynb'] +source_suffix = ['.rst', '.md', '.ipynb'] # The master toctree document. master_doc = 'index' diff --git a/docs/sphinx/source/example_list.rst b/docs/sphinx/source/example_list.rst index a26207c..d4a6b9a 100644 --- a/docs/sphinx/source/example_list.rst +++ b/docs/sphinx/source/example_list.rst @@ -2,10 +2,7 @@ Forge Examples ============== .. toctree:: - :maxdepth: 1 - :titlesonly: - -* `Example Aggregations `_ -* `Example Statistics - MDF Datasets `_ + examples/Example_Aggregations + examples/Example_Statistics-MDF_Datasets diff --git a/docs/sphinx/source/index.rst b/docs/sphinx/source/index.rst index 57866c8..496887e 100644 --- a/docs/sphinx/source/index.rst +++ b/docs/sphinx/source/index.rst @@ -1,14 +1,15 @@ + + Index ===== .. toctree:: - :maxdepth: 1 + :maxdepth: 2 :titlesonly: MDF Forge Client Tutorials Examples -* `Sitemap `_ .. mdinclude:: ../../../README.md diff --git a/docs/sphinx/source/tutorial_list.rst b/docs/sphinx/source/tutorial_list.rst index 8da5192..eafcd13 100644 --- a/docs/sphinx/source/tutorial_list.rst +++ b/docs/sphinx/source/tutorial_list.rst @@ -2,16 +2,11 @@ Forge Tutorials =============== .. toctree:: - :maxdepth: 2 - :titlesonly: - ABC - - -* `Part 1 - Introduction `_ -* `Part 2 - Core Query Builder Functions `_ -* `Part 3 - Expanded Query Builder Functions `_ -* `Part 4 - General Helper Functions `_ -* `Part 5 - Field-Specific Helper Functions `_ -* `Part 6 - Data Retrieval Functions `_ + tutorials/1-Introduction + tutorials/2-Core_Query_Builder_Functions + tutorials/3-Expanded_Query_Builder_Functions + tutorials/4-General_Helper_Functions + tutorials/5-Field-Specific_Helper_Functions + tutorials/6-Data_Retrieval_Functions diff --git a/docs/tutorials/1-Introduction.ipynb b/docs/tutorials/1-Introduction.ipynb index b71ddc0..d8cb941 100644 --- a/docs/tutorials/1-Introduction.ipynb +++ b/docs/tutorials/1-Introduction.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Part 1 - Introduction" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -13,7 +20,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Authentication\n", + "## Authentication\n", "Authentication is handled automatically. Just follow the prompt once and let Forge take care of the rest.\n" ] }, @@ -31,7 +38,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Basic Queries" + "## Basic Queries" ] }, { diff --git a/docs/tutorials/2-Core_Query_Builder_Functions.ipynb b/docs/tutorials/2-Core_Query_Builder_Functions.ipynb index 3ac17d9..42c1e35 100644 --- a/docs/tutorials/2-Core_Query_Builder_Functions.ipynb +++ b/docs/tutorials/2-Core_Query_Builder_Functions.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Part 2 - Core Query Builder Functions" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -22,7 +29,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Query builders" + "## Query builders" ] }, { diff --git a/docs/tutorials/3-Expanded_Query_Builder_Functions.ipynb b/docs/tutorials/3-Expanded_Query_Builder_Functions.ipynb index a122642..40b9c9c 100644 --- a/docs/tutorials/3-Expanded_Query_Builder_Functions.ipynb +++ b/docs/tutorials/3-Expanded_Query_Builder_Functions.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Part 3 - Expanded Query Builder Functions" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -22,7 +29,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# More Query Builders" + "## More Query Builders" ] }, { diff --git a/docs/tutorials/4-General_Helper_Functions.ipynb b/docs/tutorials/4-General_Helper_Functions.ipynb index 41f0c21..a998a55 100644 --- a/docs/tutorials/4-General_Helper_Functions.ipynb +++ b/docs/tutorials/4-General_Helper_Functions.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Part 4 - General Helper Functions" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -22,7 +29,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Generally Useful Help" + "## Generally Useful Help" ] }, { @@ -330,7 +337,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Fetching Datasets" + "## Fetching Datasets" ] }, { @@ -486,7 +493,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Aggregations" + "## Aggregations" ] }, { diff --git a/docs/tutorials/5-Field-Specific_Helper_Functions.ipynb b/docs/tutorials/5-Field-Specific_Helper_Functions.ipynb index b309a30..eabe83d 100644 --- a/docs/tutorials/5-Field-Specific_Helper_Functions.ipynb +++ b/docs/tutorials/5-Field-Specific_Helper_Functions.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Part 5 - Field-Specific Helper Functions" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -22,7 +29,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Field-Specific Query Builders" + "## Field-Specific Query Builders" ] }, { @@ -653,7 +660,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Premade Searches" + "## Premade Searches" ] }, { diff --git a/docs/tutorials/6-Data_Retrieval_Functions.ipynb b/docs/tutorials/6-Data_Retrieval_Functions.ipynb index 671460f..0b66071 100644 --- a/docs/tutorials/6-Data_Retrieval_Functions.ipynb +++ b/docs/tutorials/6-Data_Retrieval_Functions.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Part 6 - Data Retrieval Functions" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -22,7 +29,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Data Retrieval" + "## Data Retrieval" ] }, { diff --git a/mdf_forge/forge.py b/mdf_forge/forge.py index 2230567..65c194c 100644 --- a/mdf_forge/forge.py +++ b/mdf_forge/forge.py @@ -822,11 +822,9 @@ def http_download(self, results, dest=".", preserve_dir=False, verbose=True): Returns: *dict*: The status information for the download: - * **success** (*bool*): ``True`` if the download succeeded. ``False`` - if it failed. + if it failed. * **message** (*str*): The error message, if the download failed. - """ if self.__anonymous: print("Error: Anonymous HTTP download not yet supported.") @@ -1334,7 +1332,7 @@ def or_join(self, close_group=False): If ``False``, will continue current group. Example: - + If the current query is "(term1" .or(close_group=True) => "(term1) OR(" .or(close_group=False) => "(term1 OR " From 33fe7687ae8ef58491c799b0317cfc11b6db6db0 Mon Sep 17 00:00:00 2001 From: jgaff Date: Thu, 20 Dec 2018 10:49:22 -0600 Subject: [PATCH 4/5] match_(not)_exists helpers docs and tests, minor fixes --- .../3-Expanded_Query_Builder_Functions.ipynb | 994 ++++++++++++------ mdf_forge/forge.py | 17 +- tests/test_forge.py | 36 +- 3 files changed, 711 insertions(+), 336 deletions(-) diff --git a/docs/tutorials/3-Expanded_Query_Builder_Functions.ipynb b/docs/tutorials/3-Expanded_Query_Builder_Functions.ipynb index 40b9c9c..dd0b72d 100644 --- a/docs/tutorials/3-Expanded_Query_Builder_Functions.ipynb +++ b/docs/tutorials/3-Expanded_Query_Builder_Functions.ipynb @@ -36,8 +36,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### match_range\n", - "`match_range()` is the same as `match_field()` except that the value is a range. Strings are allowed as ranges at your own risk (they're evaluated based on alphabetical order, but can sometimes have unexpected results)." + "### match_exists\n", + "`match_exists()` matches entries where there is any value in the field. As long as the field exists in the entry, it's a match." ] }, { @@ -48,7 +48,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -57,254 +57,338 @@ } ], "source": [ - "mdf.match_range(\"mdf.scroll_id\", 0, 10)" + "mdf.match_exists(\"services.globus_publish\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/surface_diffusion_bcc011_v1/',\n", + " 'link': 'https://www.globus.org/app/transfer?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/surface_diffusion_bcc011_v1/'},\n", + " 'dc': {'creators': [{'affiliations': ['Department of Physics and Materials Research Laboratory, University of Illinois at Urbana-Champaign, 1110 W. Green Street, Urbana, IL 61801, USA'],\n", + " 'creatorName': 'Ondrejcek, Michal',\n", + " 'familyName': 'Ondrejcek',\n", + " 'givenName': 'Michal'},\n", + " {'affiliations': ['Department of Physics and Materials Research Laboratory, University of Illinois at Urbana-Champaign, 1110 W. Green Street, Urbana, IL 61801, USA'],\n", + " 'creatorName': 'Rajappan, Mahesh',\n", + " 'familyName': 'Rajappan',\n", + " 'givenName': 'Mahesh'},\n", + " {'affiliations': ['Department of Physics and Materials Research Laboratory, University of Illinois at Urbana-Champaign, 1110 W. Green Street, Urbana, IL 61801, USA'],\n", + " 'creatorName': 'Swiech, Wacek',\n", + " 'familyName': 'Swiech',\n", + " 'givenName': 'Wacek'},\n", + " {'affiliations': ['Department of Physics and Materials Research Laboratory, University of Illinois at Urbana-Champaign, 1110 W. Green Street, Urbana, IL 61801, USA'],\n", + " 'creatorName': 'Swiech, Wacek',\n", + " 'familyName': 'Swiech',\n", + " 'givenName': 'Wacek'}],\n", + " 'descriptions': [{'description': 'Surface mass diffusion on a smooth surfaces provide basic quantities, such as diffusion coefficients Ds relevant to surface kinetics. By step fluctuation experiments on ~300 nm thick films of four bcc(011) metals grown on (11—20) alpha-Al2O3, using Low Energy Electron Microscopy (LEEM), we determine the coefficient Ds for V, Nb, Ta and Mo in the temperature range above 0.5 Tm, with Tm being the melting temperature. The fluctuation amplitudes and step relaxation times are obtained as function of wave number q by calculating the Fourier components of the step edge displacement. The temperature dependence for Mo and Nb reveals both surface self-diffusion process and bulk vacancy diffusion also. The diffusion coefficients of V, Nb overlap well as functions of T/Tm but are an order of magnitude higher than that of Mo(011). The difference and a factor ~10+3 in prefactor probably arise from the native submonolayer oxidation of the column 5 metals.',\n", + " 'descriptionType': 'Other'}],\n", + " 'publicationYear': '2018',\n", + " 'publisher': 'University of Illinois at Urbana-Champaign',\n", + " 'relatedIdentifiers': [{'relatedIdentifier': '10.1103/PhysRevB.73.035418',\n", + " 'relatedIdentifierType': 'DOI',\n", + " 'relationType': 'IsPartOf'},\n", + " {'relatedIdentifier': '10.1063/1.2356108',\n", + " 'relatedIdentifierType': 'DOI',\n", + " 'relationType': 'IsPartOf'},\n", + " {'relatedIdentifier': '10.1080/14786430500398433',\n", + " 'relatedIdentifierType': 'DOI',\n", + " 'relationType': 'IsPartOf'},\n", + " {'relatedIdentifier': '10.1016/j.susc.2006.07.035',\n", + " 'relatedIdentifierType': 'DOI',\n", + " 'relationType': 'IsPartOf'},\n", + " {'relatedIdentifier': '10.1080/09500830310001646671',\n", + " 'relatedIdentifierType': 'DOI',\n", + " 'relationType': 'IsPartOf'}],\n", + " 'resourceType': {'resourceType': 'Dataset',\n", + " 'resourceTypeGeneral': 'Dataset'},\n", + " 'titles': [{'title': 'Surface Mass Diffusion on bcc(011) Planes; V, Nb, Ta and Mo'}]},\n", + " 'mdf': {'ingest_date': '2018-11-02T19:44:46.339540Z',\n", + " 'mdf_id': '5bdca92e2ef3881fbd3b18f0',\n", + " 'resource_type': 'dataset',\n", + " 'scroll_id': 0,\n", + " 'source_id': 'surface_diffusion_bcc011_v1',\n", + " 'source_name': 'surface_diffusion_bcc011',\n", + " 'version': 1},\n", + " 'services': {'globus_publish': 'https://publish.globus.org/jspui/handle/ITEM/1209',\n", + " 'mdf_search': 'This dataset was ingested to MDF Search.'}}]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mdf.search(limit=10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### exclude_range\n", - "Similarly, you can exclude a range with `exclude_range()`." + "### match_not_exists\n", + "`match_not_exists()` is the opposite of `match_exists()`. Any entry without the given field is a match." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "mdf.exclude_range(\"mdf.scroll_id\", 100, 199)" + "mdf.match_not_exists(\"services.mrr\")" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'dc': {'contributors': [{'affiliations': ['University of Arizona'],\n", - " 'contributorName': 'Downs, Robert',\n", - " 'contributorType': 'ContactPerson',\n", - " 'familyName': 'Downs',\n", - " 'givenName': 'Robert'}],\n", - " 'creators': [{'affiliations': ['University of Arizona'],\n", - " 'creatorName': 'Downs, Robert',\n", - " 'familyName': 'Downs',\n", - " 'givenName': 'Robert'},\n", - " {'affiliations': ['University of Arizona'],\n", - " 'creatorName': 'Hall-Wallace, Michelle',\n", - " 'familyName': 'Hall-Wallace',\n", - " 'givenName': 'Michelle'}],\n", - " 'dates': [{'date': '2017-08-04T18:47:01.096482Z', 'dateType': 'Collected'}],\n", - " 'descriptions': [{'description': 'A crystal structure database that includes every structure published in the American Mineralogist, The Canadian Mineralogist, European Journal of Mineralogy and Physics and Chemistry of Minerals, as well as selected datasets from other journals.',\n", - " 'descriptionType': 'Other'}],\n", - " 'publicationYear': '2003',\n", - " 'publisher': 'MDF (placeholder)',\n", - " 'resourceType': {'resourceType': 'JSON', 'resourceTypeGeneral': 'Dataset'},\n", - " 'subjects': [{'subject': 'crystal structure'}, {'subject': 'minerals'}],\n", - " 'titles': [{'title': 'The American Mineralogist Crystal Structure Database'}]},\n", - " 'mdf': {'ingest_date': '2018-03-26T22:23:18.021883Z',\n", - " 'mdf_id': '5ab972d634a2262cce368d5c',\n", - " 'resource_type': 'dataset',\n", - " 'scroll_id': 0,\n", - " 'source_name': 'amcs_v1',\n", + "[{'cip': {'bv': '141.1',\n", + " 'energy': '-3.19',\n", + " 'forcefield': 'ffield.CuOCH.comb3',\n", + " 'gv': '37.1',\n", + " 'mpid': 'mp-510752',\n", + " 'totenergy': '-1839.6948'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", + " 'mime_type': 'text/plain',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'Cu288 O288', 'elements': ['Cu', 'O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cf2ef3880b0f2139bb',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 1002,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", " 'version': 1}},\n", - " {'crystal_structure': {'number_of_atoms': 8,\n", - " 'space_group_number': 225,\n", - " 'volume': 241.385429855744},\n", - " 'files': [{'data_type': 'ASCII text',\n", - " 'filename': '03294.cif',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/amcs_v1/03294.cif',\n", - " 'length': 4184,\n", + " {'cip': {'bv': '300.9',\n", + " 'energy': '-2.67',\n", + " 'forcefield': 'ffield.CuOCH.comb3',\n", + " 'gv': '103.8',\n", + " 'mpid': 'mp-703914',\n", + " 'totenergy': '-1538.4116'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", " 'mime_type': 'text/plain',\n", - " 'sha512': '37037fe5f5665da7582b178907c598f9527af12b12fcb933a3259fdc68612553385a282749c9b0ce9e1e00fa3cbd6e76896f4dab4f09486aebb4bd990f966fc0',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/amcs_v1/03294.cif'}],\n", - " 'material': {'composition': 'K3.2Na0.8Cl4', 'elements': ['K', 'Cl', 'Na']},\n", - " 'mdf': {'ingest_date': '2018-03-26T22:23:18.021883Z',\n", - " 'mdf_id': '5ab972d634a2262cce368d5d',\n", - " 'parent_id': '5ab972d634a2262cce368d5c',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'Cu192 O384', 'elements': ['Cu', 'O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cf2ef3880b0f2139bf',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 1,\n", - " 'source_name': 'amcs_v1'}},\n", - " {'crystal_structure': {'number_of_atoms': 52,\n", - " 'space_group_number': 12,\n", - " 'volume': 709.8954060972311},\n", - " 'files': [{'data_type': 'ASCII text',\n", - " 'filename': '18904.cif',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/amcs_v1/18904.cif',\n", - " 'length': 1543,\n", + " 'scroll_id': 1006,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '603.0',\n", + " 'energy': '-3.64',\n", + " 'forcefield': 'ffield.CuOCH.comb3',\n", + " 'gv': '317.7',\n", + " 'mpid': 'mp-760432',\n", + " 'totenergy': '-2186.0068'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", " 'mime_type': 'text/plain',\n", - " 'sha512': '6cae6f3deaf8723cc33f3b55faee8c19dc21db919f86977b8f5868a9e2cfc1a754a762d669f27e92a3d8006813d416f55c4d6d29b5e9c910173b0536f305761d',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/amcs_v1/18904.cif'}],\n", - " 'material': {'composition': 'K4Al4.0Si12O32',\n", - " 'elements': ['O', 'K', 'Al', 'Si']},\n", - " 'mdf': {'ingest_date': '2018-03-26T22:23:18.021883Z',\n", - " 'mdf_id': '5ab972d634a2262cce368d5f',\n", - " 'parent_id': '5ab972d634a2262cce368d5c',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'Cu240 O360', 'elements': ['Cu', 'O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cf2ef3880b0f2139c2',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 3,\n", - " 'source_name': 'amcs_v1'}},\n", - " {'crystal_structure': {'number_of_atoms': 56,\n", - " 'space_group_number': 227,\n", - " 'volume': 542.1966878632827},\n", - " 'files': [{'data_type': 'ASCII text',\n", - " 'filename': '02176.cif',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/amcs_v1/02176.cif',\n", - " 'length': 4980,\n", + " 'scroll_id': 1009,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '335.6',\n", + " 'energy': '-3.38',\n", + " 'forcefield': 'ffield.CuOCH.comb3',\n", + " 'gv': '127.0',\n", + " 'mpid': 'mp-760084',\n", + " 'totenergy': '-1369.2882'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", " 'mime_type': 'text/plain',\n", - " 'sha512': 'edaff24b2f3e46f7323d049da9fa19d5e35b01ccd8bb9d619163d1877a735eb20a49b66ed40ac627c0fcafd9d17985523062495bb959773328f5bc8d97c197c3',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/amcs_v1/02176.cif'}],\n", - " 'material': {'composition': 'Mg8Al16O32', 'elements': ['O', 'Mg', 'Al']},\n", - " 'mdf': {'ingest_date': '2018-03-26T22:23:18.021883Z',\n", - " 'mdf_id': '5ab972d634a2262cce368d60',\n", - " 'parent_id': '5ab972d634a2262cce368d5c',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'Cu216 O189', 'elements': ['Cu', 'O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cf2ef3880b0f2139c7',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 4,\n", - " 'source_name': 'amcs_v1'}},\n", - " {'crystal_structure': {'number_of_atoms': 44,\n", - " 'space_group_number': 14,\n", - " 'volume': 571.0766745749353},\n", - " 'files': [{'data_type': 'ASCII text',\n", - " 'filename': '10564.cif',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/amcs_v1/10564.cif',\n", - " 'length': 1355,\n", + " 'scroll_id': 1014,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '5.1',\n", + " 'energy': '-5.66',\n", + " 'forcefield': 'ffield.CuOCH.comb3',\n", + " 'gv': '6.0',\n", + " 'mpid': 'mp-20066',\n", + " 'totenergy': '-4349.2305'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", " 'mime_type': 'text/plain',\n", - " 'sha512': 'acffedef53bc4af7c9fc0b3085adb00e487e1006a5f6be5c5b0c00d6a6135e2467209c5c3f498c99a2de76f6beb51d01b707ef4a0ab738b1c79cfcf01f1134b2',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/amcs_v1/10564.cif'}],\n", - " 'material': {'composition': 'Cu12As4O28', 'elements': ['O', 'Cu', 'As']},\n", - " 'mdf': {'ingest_date': '2018-03-26T22:23:18.021883Z',\n", - " 'mdf_id': '5ab972d634a2262cce368d66',\n", - " 'parent_id': '5ab972d634a2262cce368d5c',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'C256 O512', 'elements': ['C', 'O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cf2ef3880b0f2139cb',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 10,\n", - " 'source_name': 'amcs_v1'}},\n", - " {'dc': {'contributors': [{'affiliations': ['Brigham Young University'],\n", - " 'contributorName': 'Hart, Gus',\n", - " 'contributorType': 'ContactPerson',\n", - " 'familyName': 'Hart',\n", - " 'givenName': 'Gus'}],\n", - " 'creators': [{'affiliations': [''],\n", - " 'creatorName': 'Hart, Gus',\n", - " 'familyName': 'Hart',\n", - " 'givenName': 'Gus'},\n", - " {'affiliations': ['Brigham Young University'],\n", - " 'creatorName': 'Nelson, Lance',\n", - " 'familyName': 'Nelson',\n", - " 'givenName': 'Lance'},\n", - " {'affiliations': [''],\n", - " 'creatorName': 'Ozoliņš, Vidvuds',\n", - " 'familyName': 'Ozoliņš',\n", - " 'givenName': 'Vidvuds'},\n", - " {'affiliations': [''],\n", - " 'creatorName': 'Reese, Shane',\n", - " 'familyName': 'Reese',\n", - " 'givenName': 'Shane'},\n", - " {'affiliations': [''],\n", - " 'creatorName': 'Zhou, Fei',\n", - " 'familyName': 'Zhou',\n", - " 'givenName': 'Fei'}],\n", - " 'dates': [{'date': '2017-08-09T16:34:52.925299Z', 'dateType': 'Collected'}],\n", - " 'descriptions': [{'description': '4k DFT calculations for solid AgPd, CuPt and AgPt FCC superstructures. DFT/PBE energy, forces and stresses for cell sizes 1-16 across all compositions including primitive cells.',\n", - " 'descriptionType': 'Other'}],\n", - " 'publicationYear': '2013',\n", - " 'publisher': 'MDF (placeholder)',\n", - " 'relatedIdentifiers': [{'relatedIdentifier': 'https://journals.aps.org/prb/abstract/10.1103/PhysRevB.88.155105',\n", - " 'relatedIdentifierType': 'DOI',\n", - " 'relationType': 'IsPartOf'}],\n", - " 'resourceType': {'resourceType': 'JSON', 'resourceTypeGeneral': 'Dataset'},\n", - " 'subjects': [{'subject': 'tar_bz2'}],\n", - " 'titles': [{'title': 'Cluster expansion made easy with Bayesian compressive sensing'}]},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:11:28.073432Z',\n", - " 'mdf_id': '5aba511034a226348355ce1b',\n", - " 'resource_type': 'dataset',\n", - " 'scroll_id': 0,\n", - " 'source_name': 'bfcc13_v1',\n", + " 'scroll_id': 1018,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", " 'version': 1}},\n", - " {'crystal_structure': {'number_of_atoms': 5,\n", - " 'space_group_number': 12,\n", - " 'volume': 72.0681447541587},\n", - " 'files': [{'data_type': 'ASCII text',\n", - " 'filename': 'OUTCAR.str36',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/bfcc13_v1/bfcc13/Cu_Pt.OUTCARS/OUTCAR.str36',\n", - " 'length': 3263105,\n", + " {'cip': {'bv': '-1.7',\n", + " 'energy': '-5.6',\n", + " 'forcefield': 'ffield.CuOCH.comb3',\n", + " 'gv': '-0.6',\n", + " 'mpid': 'mp-556660',\n", + " 'totenergy': '-7254.3956'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", " 'mime_type': 'text/plain',\n", - " 'sha512': '6951816745fdf7d2e9173d746a19c1f62deb336bb29c5b5ada640b2f387262bc06f4504f83d23aadb0e91b5b4eb07a2f229f4bc8484527947cd984e9892f1bfa',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/bfcc13_v1/bfcc13/Cu_Pt.OUTCARS/OUTCAR.str36'}],\n", - " 'material': {'composition': 'Cu2Pt3', 'elements': ['Pt', 'Cu']},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:11:28.073432Z',\n", - " 'mdf_id': '5aba511034a226348355ce1c',\n", - " 'parent_id': '5aba511034a226348355ce1b',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'C648 O648', 'elements': ['C', 'O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cf2ef3880b0f2139cc',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 1,\n", - " 'source_name': 'bfcc13_v1'}},\n", - " {'crystal_structure': {'number_of_atoms': 9,\n", - " 'space_group_number': 12,\n", - " 'volume': 134.8999346410581},\n", - " 'files': [{'data_type': 'ASCII text',\n", - " 'filename': 'OUTCAR.str645',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/bfcc13_v1/bfcc13/Cu_Pt.OUTCARS/OUTCAR.str645',\n", - " 'length': 1650783,\n", + " 'scroll_id': 1019,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '168.9',\n", + " 'energy': '-3.22',\n", + " 'forcefield': 'ffield.CuOCH.comb3',\n", + " 'gv': '13.0',\n", + " 'mpid': 'mp-557082',\n", + " 'totenergy': '-5208.6095'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", " 'mime_type': 'text/plain',\n", - " 'sha512': 'a39d988a45785e9312e8dfbd6f9a7f80cd3fe87873324e7c69a854147318fe8cde07bfb8abe0e02eb0d3d87e2c4909d0c873909c7e23508b1496a4787a4b5896',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/bfcc13_v1/bfcc13/Cu_Pt.OUTCARS/OUTCAR.str645'}],\n", - " 'material': {'composition': 'Cu2Pt7', 'elements': ['Pt', 'Cu']},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:11:28.073432Z',\n", - " 'mdf_id': '5aba511034a226348355ce1d',\n", - " 'parent_id': '5aba511034a226348355ce1b',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'H1080 O540', 'elements': ['H', 'O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cf2ef3880b0f2139d2',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 2,\n", - " 'source_name': 'bfcc13_v1'}},\n", - " {'crystal_structure': {'number_of_atoms': 11,\n", - " 'space_group_number': 2,\n", - " 'volume': 156.45812198249544},\n", - " 'files': [{'data_type': 'ASCII text',\n", - " 'filename': 'OUTCAR.str3268',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/bfcc13_v1/bfcc13/Cu_Pt.OUTCARS/OUTCAR.str3268',\n", - " 'length': 18298966,\n", + " 'scroll_id': 1025,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '159.7',\n", + " 'energy': '-3.22',\n", + " 'forcefield': 'ffield.CuOCH.comb3',\n", + " 'gv': '15.1',\n", + " 'mpid': 'mp-558226',\n", + " 'totenergy': '-5209.7568'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", " 'mime_type': 'text/plain',\n", - " 'sha512': 'c6dfc4f286f6877afd6fa31e0a2b4abd501075fee0a3f1dbb47353bf43d8a42f8676bf11b6943daaf271a7e5f070cae0cd95cb651344deb26d523e89f8795fe7',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/bfcc13_v1/bfcc13/Cu_Pt.OUTCARS/OUTCAR.str3268'}],\n", - " 'material': {'composition': 'Cu5Pt6', 'elements': ['Pt', 'Cu']},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:11:28.073432Z',\n", - " 'mdf_id': '5aba511034a226348355ce1f',\n", - " 'parent_id': '5aba511034a226348355ce1b',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'H1080 O540', 'elements': ['H', 'O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cf2ef3880b0f2139d4',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 4,\n", - " 'source_name': 'bfcc13_v1'}},\n", - " {'crystal_structure': {'number_of_atoms': 12,\n", - " 'space_group_number': 1,\n", - " 'volume': 164.19035220804295},\n", - " 'files': [{'data_type': 'ASCII text',\n", - " 'filename': 'OUTCAR.str6788',\n", - " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/bfcc13_v1/bfcc13/Cu_Pt.OUTCARS/OUTCAR.str6788',\n", - " 'length': 1135486,\n", + " 'scroll_id': 1027,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '17996.9',\n", + " 'energy': '-2.5',\n", + " 'forcefield': 'ffield.CuOCH.comb3',\n", + " 'gv': '-3143.8',\n", + " 'mpid': 'mp-673658',\n", + " 'totenergy': '-3847.4315'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", + " 'mime_type': 'text/plain',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'H1024 O512', 'elements': ['H', 'O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cf2ef3880b0f2139d6',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 1029,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '68.9',\n", + " 'energy': '-3.24',\n", + " 'forcefield': 'ffield.CuOCH.comb3',\n", + " 'gv': '-4.2',\n", + " 'mpid': 'mp-696735',\n", + " 'totenergy': '-3147.6329'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", " 'mime_type': 'text/plain',\n", - " 'sha512': 'da8c61f1b6990ef9fde4405e1aadc44a547951f8b50f15378fac3403bb631a337707ef060df3bd3166cd76a0f98d56f3e15a221e321bbcd8358ead12ffd19391',\n", - " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/bfcc13_v1/bfcc13/Cu_Pt.OUTCARS/OUTCAR.str6788'}],\n", - " 'material': {'composition': 'Cu7Pt5', 'elements': ['Pt', 'Cu']},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:11:28.073432Z',\n", - " 'mdf_id': '5aba511034a226348355ce20',\n", - " 'parent_id': '5aba511034a226348355ce1b',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'H648 O324', 'elements': ['H', 'O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cf2ef3880b0f2139dd',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 5,\n", - " 'source_name': 'bfcc13_v1'}}]" + " 'scroll_id': 1036,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}}]" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -317,24 +401,72 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### exclusive_match\n", - "If you want a key to have _exactly_ a certain value, use `exclusive_match()`. This is most useful in the case of lists." + "### match_range\n", + "`match_range()` is the same as `match_field()` except that the value is a range. Strings are allowed as ranges at your own risk (they're evaluated based on alphabetical order, but can sometimes have unexpected results)." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'cip_v1': {'bv': '117.4',\n", - " 'energy': '-2.46',\n", - " 'forcefield': 'ffield.TiO.comb3',\n", - " 'gv': '34.5',\n", - " 'mpid': 'mp-12957',\n", - " 'totenergy': '-2515.3108'},\n", + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mdf.match_range(\"mdf.scroll_id\", 0, 10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### exclude_range\n", + "Similarly, you can exclude a range with `exclude_range()`." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mdf.exclude_range(\"mdf.scroll_id\", 100, 199)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'cip': {'bv': '104.6',\n", + " 'energy': '-2.85',\n", + " 'forcefield': 'CuAg.eam.alloy',\n", + " 'gv': '29.5',\n", + " 'mpid': 'mp-10597',\n", + " 'totenergy': '-45.53844'},\n", " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", " 'filename': 'classical_interatomic_potentials.json',\n", " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", @@ -342,19 +474,21 @@ " 'mime_type': 'text/plain',\n", " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", - " 'material': {'composition': 'O1024', 'elements': ['O']},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:38:34.068719Z',\n", - " 'mdf_id': '5aba576a34a226348355dd4a',\n", - " 'parent_id': '5aba576a34a226348355dcfa',\n", + " 'material': {'composition': 'Ag16', 'elements': ['Ag']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cd2ef3880b0f2135d8',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 80,\n", - " 'source_name': 'cip_v1'}},\n", - " {'cip_v1': {'bv': '0.0',\n", - " 'energy': '-2.58',\n", - " 'forcefield': 'ffield.TiO.comb3',\n", - " 'gv': '0.0',\n", - " 'mpid': 'mp-610917',\n", - " 'totenergy': '-991.44286'},\n", + " 'scroll_id': 7,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '138.4',\n", + " 'energy': '-3.54',\n", + " 'forcefield': 'CuAg.eam.alloy',\n", + " 'gv': '55.2',\n", + " 'mpid': 'mp-30',\n", + " 'totenergy': '-113.28'},\n", " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", " 'filename': 'classical_interatomic_potentials.json',\n", " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", @@ -362,19 +496,205 @@ " 'mime_type': 'text/plain',\n", " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", - " 'material': {'composition': 'O384', 'elements': ['O']},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:38:34.068719Z',\n", - " 'mdf_id': '5aba576a34a226348355dd4b',\n", - " 'parent_id': '5aba576a34a226348355dcfa',\n", + " 'material': {'composition': 'Cu32', 'elements': ['Cu']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cd2ef3880b0f2135d4',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 81,\n", - " 'source_name': 'cip_v1'}},\n", - " {'cip_v1': {'bv': '13565.4',\n", - " 'energy': '-2.08',\n", - " 'forcefield': 'ffield.TiO.comb3',\n", - " 'gv': '757.8',\n", - " 'mpid': 'mp-560602',\n", - " 'totenergy': '-1350.9046'},\n", + " 'scroll_id': 3,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'files': [{'data_type': 'TIFF image data, big-endian, direntries=16, height=0, bps=8, PhotometricIntepretation=RGB Palette, description=ImageJ=1.50i, width=0',\n", + " 'filename': 'eds mapping 2 - pure ti, 250 nm sio2, 32h, 800c - si map.tif',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/Early_oxidation_behavior_of_Si-coated_titanium/Dataset/Titanium Oxidation/eds mapping 2 - pure ti, 250 nm sio2, 32h, 800c - si map.tif',\n", + " 'length': 198582,\n", + " 'mime_type': 'image/tiff',\n", + " 'sha512': '4cce4d98d4e5a32fdb4e346e42df05e44d15e95bfbe70521f03ab806c6aac07c3a45e6095543397300cfd6efe4f468f33cd9700a3aea680468612f044be9d318',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/Early_oxidation_behavior_of_Si-coated_titanium/Dataset/Titanium Oxidation/eds mapping 2 - pure ti, 250 nm sio2, 32h, 800c - si map.tif'}],\n", + " 'image': {'height': 384, 'megapixels': 0.196608, 'width': 512},\n", + " 'mdf': {'ingest_date': '2018-10-30T21:44:27.860096Z',\n", + " 'mdf_id': '5bd8d0bb2ef38836a62135d3',\n", + " 'parent_id': '5bd8d0bb2ef38836a62135d1',\n", + " 'repositories': ['MCPub',\n", + " 'DOE',\n", + " 'Department of Energy Software Innovation Center for Integrated Multi-Scale Modeling of Structural Metals',\n", + " 'U.S. Department of Energy',\n", + " 'Materials Commons',\n", + " 'Center for Predictive Integrated Structural Materials Science',\n", + " 'PRISMS'],\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 2,\n", + " 'source_id': 'si_ti_oxidation_v1',\n", + " 'source_name': 'si_ti_oxidation',\n", + " 'version': 1}},\n", + " {'files': [{'data_type': 'TIFF image data, big-endian, direntries=16, height=0, bps=8, PhotometricIntepretation=RGB Palette, description=ImageJ=1.50i, width=0',\n", + " 'filename': 'eds mapping 1 - ti5si3, 32h, 800c - ti map.tif',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/Early_oxidation_behavior_of_Si-coated_titanium/Dataset/Titanium Oxidation/eds mapping 1 - ti5si3, 32h, 800c - ti map.tif',\n", + " 'length': 198438,\n", + " 'mime_type': 'image/tiff',\n", + " 'sha512': 'a25127f8789c4be2f8277f153102ae3c85e78d374cf6ed6a3346afb29e5625b4d5539a75c8124bc32db183bcd9cb0c6062558f0ed8480ef16a89fbf3190b674d',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/Early_oxidation_behavior_of_Si-coated_titanium/Dataset/Titanium Oxidation/eds mapping 1 - ti5si3, 32h, 800c - ti map.tif'}],\n", + " 'image': {'height': 384, 'megapixels': 0.196608, 'width': 512},\n", + " 'mdf': {'ingest_date': '2018-10-30T21:44:27.860096Z',\n", + " 'mdf_id': '5bd8d0bb2ef38836a62135d8',\n", + " 'parent_id': '5bd8d0bb2ef38836a62135d1',\n", + " 'repositories': ['MCPub',\n", + " 'DOE',\n", + " 'Department of Energy Software Innovation Center for Integrated Multi-Scale Modeling of Structural Metals',\n", + " 'U.S. Department of Energy',\n", + " 'Materials Commons',\n", + " 'Center for Predictive Integrated Structural Materials Science',\n", + " 'PRISMS'],\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 7,\n", + " 'source_id': 'si_ti_oxidation_v1',\n", + " 'source_name': 'si_ti_oxidation',\n", + " 'version': 1}},\n", + " {'files': [{'data_type': 'TIFF image data, little-endian, direntries=17, height=192, bps=8, compression=none, PhotometricIntepretation=RGB Palette, orientation=upper-left, width=256',\n", + " 'filename': 'eds mapping 1 - pure ti, 250 nm si, 64h, 800c - si map.tif',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/Early_oxidation_behavior_of_Si-coated_titanium/Dataset/Titanium Oxidation/eds mapping 1 - pure ti, 250 nm si, 64h, 800c - si map.tif',\n", + " 'length': 51186,\n", + " 'mime_type': 'image/tiff',\n", + " 'sha512': 'c09e04fdff3b905d6e5b5f8671e88bd9c10ef612b240060f363957631a969e9052bb838cb1700bcc4a3d37e70f14774e840165cee79c606650aa2c371020a170',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/si_ti_oxidation_v1/Early_oxidation_behavior_of_Si-coated_titanium/Dataset/Titanium Oxidation/eds mapping 1 - pure ti, 250 nm si, 64h, 800c - si map.tif'}],\n", + " 'image': {'height': 192, 'megapixels': 0.049152, 'width': 256},\n", + " 'mdf': {'ingest_date': '2018-10-30T21:44:27.860096Z',\n", + " 'mdf_id': '5bd8d0bb2ef38836a62135db',\n", + " 'parent_id': '5bd8d0bb2ef38836a62135d1',\n", + " 'repositories': ['MCPub',\n", + " 'DOE',\n", + " 'Department of Energy Software Innovation Center for Integrated Multi-Scale Modeling of Structural Metals',\n", + " 'U.S. Department of Energy',\n", + " 'Materials Commons',\n", + " 'Center for Predictive Integrated Structural Materials Science',\n", + " 'PRISMS'],\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 10,\n", + " 'source_id': 'si_ti_oxidation_v1',\n", + " 'source_name': 'si_ti_oxidation',\n", + " 'version': 1}},\n", + " {'crystal_structure': {'number_of_atoms': 52.0,\n", + " 'space_group_number': 1,\n", + " 'stoichiometry': 'A',\n", + " 'volume': 874.9719553644692},\n", + " 'files': [{'data_type': 'XML 1.0 document, ASCII text, with very long lines',\n", + " 'filename': 'vasprun.xml',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/sluschi_v1/sluschi/Dir_VolSearch/vasprun.xml',\n", + " 'length': 952785,\n", + " 'mime_type': 'text/xml',\n", + " 'sha512': '7bbebd8e6d61acabdd7970e7a9a546e3d17a18acc625c2ee5d0040f55107f29760e24432a9c5d6f63083b78d1afdbdba49f9147963660d7bac0226a469b015b5',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/sluschi_v1/sluschi/Dir_VolSearch/vasprun.xml'}],\n", + " 'material': {'composition': 'Al52', 'elements': ['Al']},\n", + " 'mdf': {'ingest_date': '2018-11-06T16:26:56.068991Z',\n", + " 'mdf_id': '5be1c0d02ef3882dbe74efe3',\n", + " 'parent_id': '5be1c0d02ef3882dbe74efdf',\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 4,\n", + " 'source_id': 'sluschi_v1',\n", + " 'source_name': 'sluschi',\n", + " 'version': 1}},\n", + " {'crystal_structure': {'number_of_atoms': 52.0,\n", + " 'space_group_number': 1,\n", + " 'stoichiometry': 'A',\n", + " 'volume': 869.0963548140206},\n", + " 'files': [{'data_type': 'gzip compressed data, was \"OUTCAR\", last modified: Sat Dec 5 23:59:16 2015, from Unix',\n", + " 'filename': 'OUTCAR.gz',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/sluschi_v1/sluschi/Dir_VolSearch/3/OUTCAR.gz',\n", + " 'length': 1624555,\n", + " 'mime_type': 'application/gzip',\n", + " 'sha512': '22e258209a806cc4cce71bcb48e089e6a3ea5174e1d8833df19b14fd126dcd32dea9fc18836b1e9d4a4cdee329117092a5b6d9c6e02c7896342f68d5fd38ca32',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/sluschi_v1/sluschi/Dir_VolSearch/3/OUTCAR.gz'}],\n", + " 'material': {'composition': 'Al52', 'elements': ['Al']},\n", + " 'mdf': {'ingest_date': '2018-11-06T16:26:56.068991Z',\n", + " 'mdf_id': '5be1c0d02ef3882dbe74efe6',\n", + " 'parent_id': '5be1c0d02ef3882dbe74efdf',\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 7,\n", + " 'source_id': 'sluschi_v1',\n", + " 'source_name': 'sluschi',\n", + " 'version': 1}},\n", + " {'files': [{'data_type': 'TIFF image data, big-endian, direntries=14, height=0, bps=1, compression=none, PhotometricIntepretation=BlackIsZero, width=0',\n", + " 'filename': 'V_Ds.tif',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/surface_diffusion_bcc011_v1/bcc011/bcc011/figures/V_Ds.tif',\n", + " 'length': 579990,\n", + " 'mime_type': 'image/tiff',\n", + " 'sha512': 'db5b1008205a8e3acb627ee42d33efa80614ce0de75a23c50bf772458a6ff995d9841559d1f296d5e50114413a124893f8879da3b771d2adbe2b04fb066bff27',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/surface_diffusion_bcc011_v1/bcc011/bcc011/figures/V_Ds.tif'}],\n", + " 'image': {'height': 2056, 'megapixels': 4.506752, 'width': 2192},\n", + " 'mdf': {'ingest_date': '2018-11-02T19:44:46.339540Z',\n", + " 'mdf_id': '5bdca92e2ef3881fbd3b18f1',\n", + " 'parent_id': '5bdca92e2ef3881fbd3b18f0',\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 1,\n", + " 'source_id': 'surface_diffusion_bcc011_v1',\n", + " 'source_name': 'surface_diffusion_bcc011',\n", + " 'version': 1}},\n", + " {'files': [{'data_type': 'TIFF image data, big-endian',\n", + " 'filename': 'V_Deff.tif',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/surface_diffusion_bcc011_v1/bcc011/bcc011/figures/V_Deff.tif',\n", + " 'length': 14801100,\n", + " 'mime_type': 'image/tiff',\n", + " 'sha512': 'd3696c48de329238b2440ac4679df73f8e680b0f24df7cadda6fc2c2750cd87b34095216c80a0e41281d43e142a1c5330f794bece18fa138ae3b7d62a19ab0cd',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/surface_diffusion_bcc011_v1/bcc011/bcc011/figures/V_Deff.tif'}],\n", + " 'image': {'height': 2112, 'megapixels': 4.933632, 'width': 2336},\n", + " 'mdf': {'ingest_date': '2018-11-02T19:44:46.339540Z',\n", + " 'mdf_id': '5bdca92e2ef3881fbd3b18f3',\n", + " 'parent_id': '5bdca92e2ef3881fbd3b18f0',\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 3,\n", + " 'source_id': 'surface_diffusion_bcc011_v1',\n", + " 'source_name': 'surface_diffusion_bcc011',\n", + " 'version': 1}},\n", + " {'files': [{'data_type': 'TIFF image data, big-endian, direntries=17, height=0, bps=8, compression=none, PhotometricIntepretation=RGB Palette, width=0',\n", + " 'filename': 'bcc_univ.tiff',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/surface_diffusion_bcc011_v1/bcc011/bcc011/figures/bcc_univ.tiff',\n", + " 'length': 4656670,\n", + " 'mime_type': 'image/tiff',\n", + " 'sha512': '3bd43cbec6ec90db04d841c66a28b627ba2e5674cb55be5fd66a2b2a956bed409a8e6442c3269da01a98c73e0dbeef3dfb3b5b92e4e0557259201a538b4be3c4',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/surface_diffusion_bcc011_v1/bcc011/bcc011/figures/bcc_univ.tiff'}],\n", + " 'image': {'height': 2112, 'megapixels': 4.654848, 'width': 2204},\n", + " 'mdf': {'ingest_date': '2018-11-02T19:44:46.339540Z',\n", + " 'mdf_id': '5bdca92e2ef3881fbd3b18f4',\n", + " 'parent_id': '5bdca92e2ef3881fbd3b18f0',\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 4,\n", + " 'source_id': 'surface_diffusion_bcc011_v1',\n", + " 'source_name': 'surface_diffusion_bcc011',\n", + " 'version': 1}}]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mdf.search(limit=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### exclusive_match\n", + "If you want a key to have _exactly_ a certain value, use `exclusive_match()`. This is most useful in the case of lists." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'cip': {'bv': '120859.5',\n", + " 'energy': '-0.73',\n", + " 'forcefield': 'ffield.comb',\n", + " 'gv': '-555602.4',\n", + " 'mpid': 'mp-973916',\n", + " 'totenergy': '-747.15009'},\n", " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", " 'filename': 'classical_interatomic_potentials.json',\n", " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", @@ -382,19 +702,21 @@ " 'mime_type': 'text/plain',\n", " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", - " 'material': {'composition': 'O648', 'elements': ['O']},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:38:34.068719Z',\n", - " 'mdf_id': '5aba576a34a226348355dd4d',\n", - " 'parent_id': '5aba576a34a226348355dcfa',\n", + " 'material': {'composition': 'O1024', 'elements': ['O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cf2ef3880b0f213a21',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 83,\n", - " 'source_name': 'cip_v1'}},\n", - " {'cip_v1': {'bv': '0.0',\n", - " 'energy': '-1.45',\n", + " 'scroll_id': 1104,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '0.0',\n", + " 'energy': '-1.2',\n", " 'forcefield': 'SiO.tersoff',\n", " 'gv': '0.0',\n", - " 'mpid': 'mp-560602',\n", - " 'totenergy': '-277.97382'},\n", + " 'mpid': 'mp-12957',\n", + " 'totenergy': '-153.72849'},\n", " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", " 'filename': 'classical_interatomic_potentials.json',\n", " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", @@ -402,19 +724,43 @@ " 'mime_type': 'text/plain',\n", " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", - " 'material': {'composition': 'O192', 'elements': ['O']},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:38:34.068719Z',\n", - " 'mdf_id': '5aba576a34a226348355de1f',\n", - " 'parent_id': '5aba576a34a226348355dcfa',\n", + " 'material': {'composition': 'O128', 'elements': ['O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747ce2ef3880b0f2136f3',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 293,\n", - " 'source_name': 'cip_v1'}},\n", - " {'cip_v1': {'bv': '0.0',\n", - " 'energy': '-1.38',\n", + " 'scroll_id': 290,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '0.0',\n", + " 'energy': '-1.24',\n", " 'forcefield': 'SiO.tersoff',\n", " 'gv': '0.0',\n", - " 'mpid': 'mp-607540',\n", - " 'totenergy': '-22.135599'},\n", + " 'mpid': 'mp-611836',\n", + " 'totenergy': '-39.651137'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", + " 'mime_type': 'text/plain',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'O32', 'elements': ['O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747ce2ef3880b0f2136f5',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 292,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '0.0',\n", + " 'energy': '-1.2',\n", + " 'forcefield': 'SiO.tersoff',\n", + " 'gv': '0.0',\n", + " 'mpid': 'mp-973916',\n", + " 'totenergy': '-153.4653'},\n", " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", " 'filename': 'classical_interatomic_potentials.json',\n", " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", @@ -422,19 +768,21 @@ " 'mime_type': 'text/plain',\n", " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", - " 'material': {'composition': 'O16', 'elements': ['O']},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:38:34.068719Z',\n", - " 'mdf_id': '5aba576a34a226348355de20',\n", - " 'parent_id': '5aba576a34a226348355dcfa',\n", + " 'material': {'composition': 'O128', 'elements': ['O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747ce2ef3880b0f2136f8',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 294,\n", - " 'source_name': 'cip_v1'}},\n", - " {'cip_v1': {'bv': '0.0',\n", + " 'scroll_id': 295,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '0.0',\n", " 'energy': '-2.58',\n", - " 'forcefield': 'ffield.CuOCH.comb3',\n", + " 'forcefield': 'ffield.TiO.comb3',\n", " 'gv': '0.0',\n", - " 'mpid': 'mp-610917',\n", - " 'totenergy': '-991.44286'},\n", + " 'mpid': 'mp-611836',\n", + " 'totenergy': '-1032.753'},\n", " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", " 'filename': 'classical_interatomic_potentials.json',\n", " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", @@ -442,16 +790,18 @@ " 'mime_type': 'text/plain',\n", " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", - " 'material': {'composition': 'O384', 'elements': ['O']},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:38:34.068719Z',\n", - " 'mdf_id': '5aba576c34a226348355e0b0',\n", - " 'parent_id': '5aba576a34a226348355dcfa',\n", + " 'material': {'composition': 'O400', 'elements': ['O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cd2ef3880b0f213623',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 950,\n", - " 'source_name': 'cip_v1'}},\n", - " {'cip_v1': {'bv': '13565.4',\n", + " 'scroll_id': 82,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '13565.4',\n", " 'energy': '-2.08',\n", - " 'forcefield': 'ffield.CuOCH.comb3',\n", + " 'forcefield': 'ffield.TiO.comb3',\n", " 'gv': '757.8',\n", " 'mpid': 'mp-560602',\n", " 'totenergy': '-1350.9046'},\n", @@ -463,15 +813,17 @@ " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", " 'material': {'composition': 'O648', 'elements': ['O']},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:38:34.068719Z',\n", - " 'mdf_id': '5aba576c34a226348355e0b2',\n", - " 'parent_id': '5aba576a34a226348355dcfa',\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cd2ef3880b0f213624',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 952,\n", - " 'source_name': 'cip_v1'}},\n", - " {'cip_v1': {'bv': '-613291.1',\n", + " 'scroll_id': 83,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '-613291.1',\n", " 'energy': '-1.64',\n", - " 'forcefield': 'ffield.CuOCH.comb3',\n", + " 'forcefield': 'ffield.TiO.comb3',\n", " 'gv': '39619.1',\n", " 'mpid': 'mp-607540',\n", " 'totenergy': '-471.55491'},\n", @@ -483,18 +835,20 @@ " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", " 'material': {'composition': 'O288', 'elements': ['O']},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:38:34.068719Z',\n", - " 'mdf_id': '5aba576c34a226348355e0b3',\n", - " 'parent_id': '5aba576a34a226348355dcfa',\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747cd2ef3880b0f213625',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 953,\n", - " 'source_name': 'cip_v1'}},\n", - " {'cip_v1': {'bv': '-75.4',\n", - " 'energy': '-2.46',\n", - " 'forcefield': 'ffield.CuOCH.comb3',\n", - " 'gv': '-11.8',\n", - " 'mpid': 'mp-973916',\n", - " 'totenergy': '-2522.4124'},\n", + " 'scroll_id': 84,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '11.0',\n", + " 'energy': '-2.47723',\n", + " 'forcefield': 'ffield.reax.cho',\n", + " 'gv': '-1.2',\n", + " 'mpid': 'mp-12957',\n", + " 'totenergy': '-317.1044374'},\n", " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", " 'filename': 'classical_interatomic_potentials.json',\n", " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", @@ -502,19 +856,21 @@ " 'mime_type': 'text/plain',\n", " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", - " 'material': {'composition': 'O1024', 'elements': ['O']},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:38:34.068719Z',\n", - " 'mdf_id': '5aba576c34a226348355e0b4',\n", - " 'parent_id': '5aba576a34a226348355dcfa',\n", + " 'material': {'composition': 'O128', 'elements': ['O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747ce2ef3880b0f21383c',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 954,\n", - " 'source_name': 'cip_v1'}},\n", - " {'cip_v1': {'bv': '0.0',\n", - " 'energy': '-2.56',\n", - " 'forcefield': 'ffield.comb',\n", + " 'scroll_id': 619,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '0.0',\n", + " 'energy': '-2.58',\n", + " 'forcefield': 'ffield.ZnO.comb3',\n", " 'gv': '0.0',\n", - " 'mpid': 'mp-611836',\n", - " 'totenergy': '-1023.0162'},\n", + " 'mpid': 'mp-610917',\n", + " 'totenergy': '-991.44286'},\n", " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", " 'filename': 'classical_interatomic_potentials.json',\n", " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", @@ -522,16 +878,40 @@ " 'mime_type': 'text/plain',\n", " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", - " 'material': {'composition': 'O400', 'elements': ['O']},\n", - " 'mdf': {'ingest_date': '2018-03-27T14:38:34.068719Z',\n", - " 'mdf_id': '5aba576d34a226348355e147',\n", - " 'parent_id': '5aba576a34a226348355dcfa',\n", + " 'material': {'composition': 'O384', 'elements': ['O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747ce2ef3880b0f21386a',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", " 'resource_type': 'record',\n", - " 'scroll_id': 1101,\n", - " 'source_name': 'cip_v1'}}]" + " 'scroll_id': 665,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}},\n", + " {'cip': {'bv': '0.0',\n", + " 'energy': '-2.58',\n", + " 'forcefield': 'ffield.comb3.NiAlO',\n", + " 'gv': '0.0',\n", + " 'mpid': 'mp-610917',\n", + " 'totenergy': '-991.44286'},\n", + " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n", + " 'filename': 'classical_interatomic_potentials.json',\n", + " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n", + " 'length': 1841203,\n", + " 'mime_type': 'text/plain',\n", + " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n", + " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n", + " 'material': {'composition': 'O384', 'elements': ['O']},\n", + " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n", + " 'mdf_id': '5bd747ce2ef3880b0f2138a0',\n", + " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n", + " 'resource_type': 'record',\n", + " 'scroll_id': 719,\n", + " 'source_id': 'cip_v1',\n", + " 'source_name': 'cip',\n", + " 'version': 1}}]" ] }, - "execution_count": 6, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } diff --git a/mdf_forge/forge.py b/mdf_forge/forge.py index 65c194c..1afd523 100644 --- a/mdf_forge/forge.py +++ b/mdf_forge/forge.py @@ -280,7 +280,7 @@ def reset_query(self): # * Expanded functions # *********************************************** - def exists(self, field, required=True, new_group=False): + def match_exists(self, field, required=True, new_group=False): """Require a field to exist in the results. Matches will have some value in ``field``. @@ -301,7 +301,7 @@ def exists(self, field, required=True, new_group=False): """ return self.match_field(field, "*", required=required, new_group=new_group) - def not_exists(self, field, required=True, new_group=False): + def match_not_exists(self, field, new_group=False): """Require a field to not exist in the results. Matches will not have ``field`` present. @@ -311,8 +311,6 @@ def not_exists(self, field, required=True, new_group=False): using the dot syntax. For example, ``"mdf.source_name"`` is the ``source_name`` field of the ``mdf`` dictionary. - required (bool): If ``True``, will add term with ``AND``. - If ``False``, will use ``OR``. **Default:** ``True``. new_group (bool): If ``True``, will separate the term into a new parenthetical group. If ``False``, will not. **Default:** ``False``. @@ -320,7 +318,7 @@ def not_exists(self, field, required=True, new_group=False): Returns: Forge: Self """ - return self.exclude_field(field, "*", required=required, new_group=new_group) + return self.exclude_field(field, "*", new_group=new_group) def match_range(self, field, start=None, stop=None, inclusive=True, required=True, new_group=False): @@ -358,7 +356,7 @@ def match_range(self, field, start=None, stop=None, inclusive=True, stop = "*" # *-* is the same as field exists if start == "*" and stop == "*": - return self.exists(field, required=required, new_group=new_group) + return self.match_exists(field, required=required, new_group=new_group) if inclusive: value = "[" + str(start) + " TO " + str(stop) + "]" @@ -366,8 +364,7 @@ def match_range(self, field, start=None, stop=None, inclusive=True, value = "{" + str(start) + " TO " + str(stop) + "}" return self.match_field(field, value, required=required, new_group=new_group) - def exclude_range(self, field, start="*", stop="*", inclusive=True, - required=True, new_group=False): + def exclude_range(self, field, start="*", stop="*", inclusive=True, new_group=False): """Exclude a ``field:[some range]`` term from the query. Matches will not have any ``value`` in the range in the ``field``. @@ -386,8 +383,6 @@ def exclude_range(self, field, start="*", stop="*", inclusive=True, If ``False``, the ``start`` and ``stop`` values will not be excluded from the search. **Default:** ``True``. - required (bool): If ``True``, will add term with ``AND``. - If ``False``, will use ``OR``. **Default:** ``True``. new_group (bool): If ``True``, will separate the term into a new parenthetical group. If ``False``, will not. **Default:** ``False``. @@ -402,7 +397,7 @@ def exclude_range(self, field, start="*", stop="*", inclusive=True, stop = "*" # *-* is the same as field doesn't exist if start == "*" and stop == "*": - return self.not_exists(field, required=required, new_group=new_group) + return self.match_not_exists(field, new_group=new_group) if inclusive: value = "[" + str(start) + " TO " + str(stop) + "]" diff --git a/tests/test_forge.py b/tests/test_forge.py index 4dfcf93..cb04124 100644 --- a/tests/test_forge.py +++ b/tests/test_forge.py @@ -2,7 +2,6 @@ import re import types -import globus_sdk from globus_sdk.exc import SearchAPIError from mdf_forge import forge import mdf_toolbox @@ -239,14 +238,6 @@ def test_query_cleaning(): assert q10.clean_query() == "term OR term2" -# Test properties -def test_forge_properties(): - f = forge.Forge(index="mdf") - assert type(f.search_client) is globus_sdk.SearchClient - assert type(f.transfer_client) is globus_sdk.TransferClient - assert type(f.mdf_authorizer) is globus_sdk.RefreshTokenAuthorizer - - # Sample results for download testing example_result1 = { "mdf": { @@ -355,14 +346,16 @@ def check_field(res, field, regex): if key == "[]": dict_path += "[0]" else: - dict_path += "['{}']".format(key) + dict_path += ".get('{}', {})".format(key, "{}") # If no results, set matches to false all_match = (len(res) > 0) only_match = (len(res) > 0) some_match = False for r in res: vals = eval("r"+dict_path) - if type(vals) is not list: + if vals == {}: + vals = [] + elif type(vals) is not list: vals = [vals] # If a result does not contain the value, no match if regex not in vals and not any([re.search(str(regex), value) for value in vals]): @@ -389,13 +382,6 @@ def check_field(res, field, regex): return -1 -def test_forge_alt_clients(): - f = forge.Forge(index="mdf") - assert isinstance(f.search_client, globus_sdk.SearchClient) - f2 = forge.Forge(index="mdf", clients={"search": globus_sdk.TransferClient()}) - assert isinstance(f2.search_client, globus_sdk.TransferClient) - - def test_forge_match_field(): f = forge.Forge(index="mdf") # Basic usage @@ -423,6 +409,20 @@ def test_forge_exclude_field(): assert check_field(res1, "material.elements", "Al") == -1 +def test_forge_match_exists(): + f = forge.Forge(index="mdf") + # Basic usage + f.match_exists("services.citrine") + assert check_field(f.search(), "services.citrine", ".*") == 0 + + +def test_forge_match_not_exists(): + f = forge.Forge(index="mdf") + # Basic usage + f.match_not_exists("services.citrine") + assert check_field(f.search(), "services.citrine", ".*") == -1 + + def test_forge_match_range(): # Single-value use f = forge.Forge(index="mdf") From 26325a09238d34553729c3a0c11d22dbd1a880b7 Mon Sep 17 00:00:00 2001 From: jgaff Date: Fri, 18 Jan 2019 11:53:06 -0600 Subject: [PATCH 5/5] Increment version number --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3a0baff..8ecd969 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='mdf_forge', - version='0.6.5', + version='0.6.6', packages=['mdf_forge'], description='Materials Data Facility python package', long_description=("Forge is the Materials Data Facility Python package"