diff --git a/README.md b/README.md
index f911a95..ce25ae8 100644
--- a/README.md
+++ b/README.md
@@ -30,5 +30,5 @@ If you find a bug or want a feature, feel free to open an issue here on GitHub (
# Support
-This work was performed under financial assistance award 70NANB14H012 from U.S. Department of Commerce, National Institute of Standards and Technology as part of the [Center for Hierarchical Material Design (CHiMaD)](http://chimad.northwestern.edu). This work was also supported by the National Science Foundation as part of the [Midwest Big Data Hub](http://midwestbigdatahub.org) under NSF Award Number: 1636950 "BD Spokes: SPOKE: MIDWEST: Collaborative: Integrative Materials Design (IMaD): Leverage, Innovate, and Disseminate".
+This work was performed under financial assistance award 70NANB14H012 from U.S. Department of Commerce, National Institute of Standards and Technology as part of the [Center for Hierarchical Material Design (CHiMaD)](http://chimad.northwestern.edu). This work was performed under the following financial assistance award 70NANB19H005 from U.S. Department of Commerce, National Institute of Standards and Technology as part of the Center for Hierarchical Materials Design (CHiMaD). This work was also supported by the National Science Foundation as part of the [Midwest Big Data Hub](http://midwestbigdatahub.org) under NSF Award Number: 1636950 "BD Spokes: SPOKE: MIDWEST: Collaborative: Integrative Materials Design (IMaD): Leverage, Innovate, and Disseminate".
diff --git a/docs/examples/Example_Statistics-MDF_Datasets.ipynb b/docs/examples/Example_Statistics-MDF_Datasets.ipynb
index 2bca79b..e823ee1 100644
--- a/docs/examples/Example_Statistics-MDF_Datasets.ipynb
+++ b/docs/examples/Example_Statistics-MDF_Datasets.ipynb
@@ -41,7 +41,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "100%|██████████| 259/259 [02:55<00:00, 1.48it/s]\n"
+ "100%|██████████| 373/373 [03:21<00:00, 1.85it/s]\n"
]
}
],
@@ -51,7 +51,7 @@
"# Now, let's pull out the source_name, title, and number of records for each dataset.\n",
"mdf_resources = []\n",
"for r in tqdm(res):\n",
- " q = \"mdf.parent_id:\" + r[\"mdf\"][\"mdf_id\"]\n",
+ " q = \"mdf.resource_type:record AND mdf.source_name:\" + r[\"mdf\"][\"source_name\"]\n",
" x, info = mdf.search(q, advanced=True, info=True, limit=0)\n",
" mdf_resources.append((r['mdf']['source_name'], r['dc'][\"titles\"][0]['title'], info[\"total_query_matches\"]))\n",
"df = pd.DataFrame(mdf_resources, columns=['source_name', 'title', 'num_records'])"
@@ -66,7 +66,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Number of data resources: 259\n"
+ "Number of data resources: 373\n"
]
},
{
@@ -97,13 +97,19 @@
" \n",
"
\n",
" \n",
+ " 372 | \n",
+ " sstein_stein_bandgap_2019 | \n",
+ " Machine learning of optical properties of mate... | \n",
+ " 478111 | \n",
+ "
\n",
+ " \n",
" 78 | \n",
" oqmd | \n",
" The Open Quantum Materials Database | \n",
" 395348 | \n",
"
\n",
" \n",
- " 167 | \n",
+ " 338 | \n",
" stein_bandgap_2019 | \n",
" Machine learning of optical properties of mate... | \n",
" 180900 | \n",
@@ -121,7 +127,7 @@
" 31488 | \n",
"
\n",
" \n",
- " 82 | \n",
+ " 249 | \n",
" nist_xps_db | \n",
" NIST X-ray Photoelectron Spectroscopy Database | \n",
" 29189 | \n",
@@ -139,7 +145,7 @@
" 19842 | \n",
"
\n",
" \n",
- " 163 | \n",
+ " 330 | \n",
" w_14 | \n",
" Accuracy and transferability of Gaussian appro... | \n",
" 9693 | \n",
@@ -151,7 +157,7 @@
" 3783 | \n",
"
\n",
" \n",
- " 79 | \n",
+ " 246 | \n",
" cip | \n",
" Evaluation and comparison of classical interat... | \n",
" 3291 | \n",
@@ -163,7 +169,7 @@
" 1618 | \n",
"
\n",
" \n",
- " 164 | \n",
+ " 331 | \n",
" surface_crystal_energy | \n",
" Data from: Surface energies of elemental crystals | \n",
" 1216 | \n",
@@ -175,55 +181,49 @@
" 1073 | \n",
"
\n",
" \n",
- " 160 | \n",
+ " 327 | \n",
" mdr_item_1496 | \n",
" Ultrahigh Carbon Steel Micrographs | \n",
" 1007 | \n",
"
\n",
- " \n",
- " 36 | \n",
- " mdr_item_1378 | \n",
- " Fitting database entries for a modified embedd... | \n",
- " 707 | \n",
- "
\n",
" \n",
"\n",
""
],
"text/plain": [
" source_name \\\n",
+ "372 sstein_stein_bandgap_2019 \n",
"78 oqmd \n",
- "167 stein_bandgap_2019 \n",
+ "338 stein_bandgap_2019 \n",
"75 h2o_13 \n",
"74 ab_initio_solute_database \n",
- "82 nist_xps_db \n",
+ "249 nist_xps_db \n",
"4 jarvis \n",
"6 amcs \n",
- "163 w_14 \n",
+ "330 w_14 \n",
"76 bfcc13 \n",
- "79 cip \n",
+ "246 cip \n",
"2 sluschi \n",
- "164 surface_crystal_energy \n",
+ "331 surface_crystal_energy \n",
"5 khazana_polymer \n",
- "160 mdr_item_1496 \n",
- "36 mdr_item_1378 \n",
+ "327 mdr_item_1496 \n",
"\n",
" title num_records \n",
+ "372 Machine learning of optical properties of mate... 478111 \n",
"78 The Open Quantum Materials Database 395348 \n",
- "167 Machine learning of optical properties of mate... 180900 \n",
+ "338 Machine learning of optical properties of mate... 180900 \n",
"75 Machine-learning approach for one- and two-bod... 45482 \n",
"74 High-throughput Ab-initio Dilute Solute Diffus... 31488 \n",
- "82 NIST X-ray Photoelectron Spectroscopy Database 29189 \n",
+ "249 NIST X-ray Photoelectron Spectroscopy Database 29189 \n",
"4 JARVIS - Joint Automated Repository for Variou... 26559 \n",
"6 The American Mineralogist Crystal Structure Da... 19842 \n",
- "163 Accuracy and transferability of Gaussian appro... 9693 \n",
+ "330 Accuracy and transferability of Gaussian appro... 9693 \n",
"76 Cluster expansion made easy with Bayesian comp... 3783 \n",
- "79 Evaluation and comparison of classical interat... 3291 \n",
+ "246 Evaluation and comparison of classical interat... 3291 \n",
"2 Solid and Liquid in Ultra Small Coexistence wi... 1618 \n",
- "164 Data from: Surface energies of elemental crystals 1216 \n",
+ "331 Data from: Surface energies of elemental crystals 1216 \n",
"5 Khazana (Polymer) 1073 \n",
- "160 Ultrahigh Carbon Steel Micrographs 1007 \n",
- "36 Fitting database entries for a modified embedd... 707 "
+ "327 Ultrahigh Carbon Steel Micrographs 1007 "
]
},
"execution_count": 4,
@@ -245,7 +245,7 @@
{
"data": {
"text/plain": [
- "752831"
+ "1230958"
]
},
"execution_count": 5,
diff --git a/docs/tutorials/4-General_Helper_Functions.ipynb b/docs/tutorials/4-General_Helper_Functions.ipynb
index 3a7c2a3..02d67c8 100644
--- a/docs/tutorials/4-General_Helper_Functions.ipynb
+++ b/docs/tutorials/4-General_Helper_Functions.ipynb
@@ -135,27 +135,44 @@
{
"data": {
"text/plain": [
- "{'cip': {'bv': '79.0',\n",
- " 'energy': '-3.36',\n",
- " 'forcefield': 'Al99.eam.alloy',\n",
- " 'gv': '29.4',\n",
- " 'mpid': 'mp-134',\n",
- " 'totenergy': '-107.52'},\n",
- " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n",
- " 'filename': 'classical_interatomic_potentials.json',\n",
- " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n",
- " 'length': 1841203,\n",
+ "{'crystal_structure': {'number_of_atoms': 108.0,\n",
+ " 'space_group_number': 225,\n",
+ " 'stoichiometry': 'A',\n",
+ " 'volume': 1779.162},\n",
+ " 'files': [{'data_type': 'ASCII text',\n",
+ " 'filename': 'INCAR',\n",
+ " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/ab_initio_solute_database_v1-2/data/FCC_solute_AlCu_20140918T204831/perfect_stat/INCAR',\n",
+ " 'length': 169,\n",
" 'mime_type': 'text/plain',\n",
- " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n",
- " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n",
- " 'material': {'composition': 'Al32', 'elements': ['Al']},\n",
- " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n",
- " 'mdf_id': '5bd747cf2ef3880b0f213904',\n",
- " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n",
+ " 'sha512': 'da3b28318b6c8496dda80d81f89176edc55997c2b75dafbcf92fdd8bb6c30d0dc27d2c3cfff8383e541ccd85bd84629da60b1e68da6411b5974f31bd85de0f8a',\n",
+ " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/ab_initio_solute_database_v1-2/data/FCC_solute_AlCu_20140918T204831/perfect_stat/INCAR'},\n",
+ " {'data_type': 'ASCII text',\n",
+ " 'filename': 'CONTCAR',\n",
+ " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/ab_initio_solute_database_v1-2/data/FCC_solute_AlCu_20140918T204831/perfect_stat/CONTCAR',\n",
+ " 'length': 3348,\n",
+ " 'mime_type': 'text/plain',\n",
+ " 'sha512': '613498249ad2d01dc3cb4aa37a41bd63ba2c95a599ab0b0cb43f4e30c3f5381a8c2a1404d3c20d24da434b0785189bb603859a99f5abbcc242cc87cc9c3e0ca8',\n",
+ " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/ab_initio_solute_database_v1-2/data/FCC_solute_AlCu_20140918T204831/perfect_stat/CONTCAR'},\n",
+ " {'data_type': 'ASCII text',\n",
+ " 'filename': 'KPOINTS',\n",
+ " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/ab_initio_solute_database_v1-2/data/FCC_solute_AlCu_20140918T204831/perfect_stat/KPOINTS',\n",
+ " 'length': 42,\n",
+ " 'mime_type': 'text/plain',\n",
+ " 'sha512': '56f819a7cff23127409c48d69cef684f578600b02b9727fc3ab46aa297bb201b890dabcfc4b232088fb4d0cb938514283986f8eae68a85bdf303960d2f9058dd',\n",
+ " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/ab_initio_solute_database_v1-2/data/FCC_solute_AlCu_20140918T204831/perfect_stat/KPOINTS'},\n",
+ " {'data_type': 'ASCII text',\n",
+ " 'filename': 'POSCAR',\n",
+ " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/ab_initio_solute_database_v1-2/data/FCC_solute_AlCu_20140918T204831/perfect_stat/POSCAR',\n",
+ " 'length': 3348,\n",
+ " 'mime_type': 'text/plain',\n",
+ " 'sha512': '613498249ad2d01dc3cb4aa37a41bd63ba2c95a599ab0b0cb43f4e30c3f5381a8c2a1404d3c20d24da434b0785189bb603859a99f5abbcc242cc87cc9c3e0ca8',\n",
+ " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/ab_initio_solute_database_v1-2/data/FCC_solute_AlCu_20140918T204831/perfect_stat/POSCAR'}],\n",
+ " 'material': {'composition': 'Al108', 'elements': ['Al']},\n",
+ " 'mdf': {'ingest_date': '2018-11-24T08:12:11.852893Z',\n",
" 'resource_type': 'record',\n",
- " 'scroll_id': 819,\n",
- " 'source_id': 'cip_v1',\n",
- " 'source_name': 'cip',\n",
+ " 'scroll_id': 28093,\n",
+ " 'source_id': 'ab_initio_solute_database_v1.2',\n",
+ " 'source_name': 'ab_initio_solute_database',\n",
" 'version': 1}}"
]
},
@@ -182,7 +199,7 @@
" 'limit': 10,\n",
" 'query': '( NOT mdf.source_name:sluschi AND material.elements:Al AND NOT mdf.source_name:oqmd)',\n",
" 'retries': 0,\n",
- " 'total_query_matches': 14885}"
+ " 'total_query_matches': 14886}"
]
},
"execution_count": 8,
@@ -210,7 +227,7 @@
{
"data": {
"text/plain": [
- ""
+ ""
]
},
"execution_count": 9,
@@ -288,7 +305,7 @@
" 'cip.mpid': 'text',\n",
" 'cip.totenergy': 'text',\n",
" 'crystal_structure.cross_reference.icsd': 'long',\n",
- " 'crystal_structure.number_of_atoms': 'float',\n",
+ " 'crystal_structure.number_of_atoms': 'long',\n",
" 'crystal_structure.space_group_number': 'long',\n",
" 'crystal_structure.stoichiometry': 'text',\n",
" 'crystal_structure.volume': 'float',\n",
@@ -348,6 +365,7 @@
" 'files.mime_type': 'text',\n",
" 'files.sha512': 'text',\n",
" 'files.url': 'text',\n",
+ " 'image.format': 'text',\n",
" 'image.height': 'long',\n",
" 'image.megapixels': 'float',\n",
" 'image.width': 'long',\n",
@@ -359,8 +377,6 @@
" 'jarvis.__custom.id_desc': 'text',\n",
" 'jarvis.__custom.landing_page_desc': 'text',\n",
" 'jarvis.__custom.total_energy_desc': 'text',\n",
- " 'jarvis.band_gap.mbj': 'float',\n",
- " 'jarvis.band_gap.optb88vdw': 'float',\n",
" 'jarvis.bandgap.mbj': 'float',\n",
" 'jarvis.bandgap.optb88vdw': 'float',\n",
" 'jarvis.crossreference.materials_project': 'text',\n",
@@ -375,8 +391,7 @@
" 'material.elements': 'text',\n",
" 'mdf.ingest_date': 'date',\n",
" 'mdf.mdf_id': 'text',\n",
- " 'mdf.parent_id': 'text',\n",
- " 'mdf.repositories': 'text',\n",
+ " 'mdf.organizations': 'text',\n",
" 'mdf.resource_type': 'text',\n",
" 'mdf.scroll_id': 'long',\n",
" 'mdf.source_id': 'text',\n",
@@ -413,7 +428,7 @@
" 'origin.name': 'text',\n",
" 'origin.type': 'text',\n",
" 'services.citrine': 'text',\n",
- " 'services.globus_publish': 'text',\n",
+ " 'services.mdf_publish': 'text',\n",
" 'services.mdf_search': 'text',\n",
" 'services.mrr': 'text'}"
]
@@ -446,8 +461,7 @@
"text/plain": [
"{'mdf.ingest_date': 'date',\n",
" 'mdf.mdf_id': 'text',\n",
- " 'mdf.parent_id': 'text',\n",
- " 'mdf.repositories': 'text',\n",
+ " 'mdf.organizations': 'text',\n",
" 'mdf.resource_type': 'text',\n",
" 'mdf.scroll_id': 'long',\n",
" 'mdf.source_id': 'text',\n",
@@ -544,6 +558,118 @@
"mdf.describe_field(\"record\", field=\"mdf.source_name\", raw=True)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### describe_organization\n",
+ "To learn more about an organization registered with MDF, use `describe_organization()`. This method can tell you more about an organization, including the provided description, homepage, and submission rules. When you call `describe_organization()`, you just pass in the name or alias of an organization (capitalization doesn't matter)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " Argonne National Laboratory\n",
+ "\taliases: ANL\n",
+ "\tcanonical_name: Argonne National Laboratory\n",
+ "\tdescription: Argonne serves America as a science and energy laboratory distinguished by the breadth of our R&D capabilities in concert with our powerful suite of experimental and computational facilities.\n",
+ "\thomepage: https://www.anl.gov/\n",
+ "\tparent_organizations: None\n",
+ "\tpermission_groups: public\n"
+ ]
+ }
+ ],
+ "source": [
+ "mdf.describe_organization(\"argonne national laboratory\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " Center for Hierarchical Materials Design\n",
+ "\taliases: CHiMaD\n",
+ "\tcanonical_name: Center for Hierarchical Materials Design\n",
+ "\tdescription: Center for Hierarchical Materials Design (CHiMaD) is a NIST-sponsored center of excellence for advanced materials research focusing on developing the next generation of computational tools, databases and experimental techniques in order to enable the accelerated design of novel materials and their integration to industry, one of the primary goals of the U.S. Government's Materials Genome Initiative (MGI).\n",
+ "\thomepage: http://chimad.northwestern.edu/\n",
+ "\tparent_organizations: National Institute of Standards and Technology\n",
+ "\tpermission_groups: public\n"
+ ]
+ }
+ ],
+ "source": [
+ "mdf.describe_organization(\"CHiMaD\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can also get a brief overview of an organization without the technical details by setting `summary=True`. `describe_organization()` also supports the `raw` argument to get results back as a dictionary (`raw` overrides `summary`)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " National Institute of Standards and Technology\n",
+ "\taliases: NIST\n",
+ "\tdescription: The National Institute of Standards and Technology (NIST) was founded in 1901 and is now part of the U.S. Department of Commerce. NIST is one of the nation's oldest physical science laboratories.\n",
+ "\thomepage: https://www.nist.gov/\n"
+ ]
+ }
+ ],
+ "source": [
+ "mdf.describe_organization(\"NIST\", summary=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'error': None,\n",
+ " 'organization': {'aliases': ['NIST MDR', 'MDR'],\n",
+ " 'canonical_name': 'NIST Materials Data Repository',\n",
+ " 'description': 'The National Institute of Standards and Technology has created a materials science data repository as part of an effort in coordination with the Materials Genome Initiative (MGI) to establish data exchange protocols and mechanisms that will foster data sharing and reuse across a wide community of researchers, with the goal of enhancing the quality of materials data and models.',\n",
+ " 'homepage': 'https://materialsdata.nist.gov/',\n",
+ " 'parent_organizations': ['National Institute of Standards and Technology'],\n",
+ " 'permission_groups': ['public']},\n",
+ " 'status_code': 200,\n",
+ " 'success': True}"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mdf.describe_organization(\"NIST MDR\", raw=True)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -561,7 +687,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
@@ -570,7 +696,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 21,
"metadata": {
"scrolled": true
},
@@ -578,23 +704,22 @@
{
"data": {
"text/plain": [
- "{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/mdr_item_772_v1/',\n",
- " 'link': 'https://www.globus.org/app/transfer?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/mdr_item_772_v1/'},\n",
- " 'dc': {'alternateIdentifiers': [{'alternateIdentifier': 'http://hdl.handle.net/11115/163',\n",
+ "{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/mdr_item_775_v1/',\n",
+ " 'link': 'https://app.globus.org/file-manager?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/mdr_item_775_v1/'},\n",
+ " 'dc': {'alternateIdentifiers': [{'alternateIdentifier': 'http://hdl.handle.net/11115/166',\n",
" 'alternateIdentifierType': 'Handle'},\n",
- " {'alternateIdentifier': '772',\n",
+ " {'alternateIdentifier': '775',\n",
" 'alternateIdentifierType': 'NIST DSpace ID'}],\n",
- " 'creators': [{'creatorName': 'Lewandowski and A. Awadallah, J.J.',\n",
- " 'familyName': 'Lewandowski and A. Awadallah',\n",
+ " 'creators': [{'creatorName': 'Valencia and P.N. Quested, J.J.',\n",
+ " 'familyName': 'Valencia and P.N. Quested',\n",
" 'givenName': 'J.J.'}],\n",
" 'publicationYear': '2013',\n",
" 'publisher': 'NIST Materials Data Repository',\n",
" 'resourceType': {'resourceType': 'Dataset',\n",
" 'resourceTypeGeneral': 'Dataset'},\n",
- " 'titles': [{'title': 'Hydrostatic Extrusion of Metals and Alloys'}]},\n",
- " 'mdf': {'ingest_date': '2018-11-15T19:06:23.862425Z',\n",
- " 'mdf_id': '5bedc3af2ef38842b9953c09',\n",
- " 'repositories': ['National Institute of Standards and Technology',\n",
+ " 'titles': [{'title': 'Thermophysical Properties'}]},\n",
+ " 'mdf': {'ingest_date': '2018-11-15T19:09:44.202046Z',\n",
+ " 'organizations': ['National Institute of Standards and Technology',\n",
" 'U.S. Department of Commerce',\n",
" 'DOC',\n",
" 'MDR',\n",
@@ -603,14 +728,14 @@
" 'NIST MDR'],\n",
" 'resource_type': 'dataset',\n",
" 'scroll_id': 0,\n",
- " 'source_id': 'mdr_item_772_v1',\n",
- " 'source_name': 'mdr_item_772',\n",
+ " 'source_id': 'mdr_item_775_v1.1',\n",
+ " 'source_name': 'mdr_item_775',\n",
" 'version': 1},\n",
" 'services': {'mdf_search': 'This dataset was ingested to MDF Search.',\n",
" 'mrr': 'This dataset was registered with the MRR.'}}"
]
},
- "execution_count": 17,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -629,7 +754,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 22,
"metadata": {
"scrolled": true
},
@@ -637,34 +762,33 @@
{
"data": {
"text/plain": [
- "{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/khazana_vasp_v4/',\n",
- " 'link': 'https://www.globus.org/app/transfer?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/khazana_vasp_v4/'},\n",
- " 'dc': {'contributors': [{'affiliations': ['University of Connecticut'],\n",
- " 'contributorName': 'Ramprasad, Rampi',\n",
+ "{'data': {'endpoint_path': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/schleife_al_channel_v1-1/',\n",
+ " 'link': 'https://app.globus.org/file-manager?origin_id=e38ee745-6d04-11e5-ba46-22000b92c6ec&origin_path=/MDF/mdf_connect/prod/data/schleife_al_channel_v1-1/'},\n",
+ " 'dc': {'contributors': [{'affiliations': ['University of Illinois Urbana-Champaign'],\n",
+ " 'contributorName': 'Schleife, Andre',\n",
" 'contributorType': 'ContactPerson',\n",
- " 'familyName': 'Ramprasad',\n",
- " 'givenName': 'Rampi'}],\n",
- " 'creators': [{'affiliations': ['University of Connecticut'],\n",
- " 'creatorName': 'Ramprasad, Rampi'}],\n",
- " 'dates': [{'date': '2017-08-04T19:25:05.718973Z', 'dateType': 'Collected'}],\n",
- " 'descriptions': [{'description': 'A computational materials knowledgebase',\n",
- " 'descriptionType': 'Other'}],\n",
- " 'publicationYear': '2016',\n",
+ " 'familyName': 'Schleife',\n",
+ " 'givenName': 'Andre'}],\n",
+ " 'creators': [{'affiliations': ['University of Illinois Urbana-Champaign'],\n",
+ " 'creatorName': 'Schleife, Andre',\n",
+ " 'familyName': 'Schleife',\n",
+ " 'givenName': 'Andre'}],\n",
+ " 'dates': [{'date': '2017-10-10T15:45:40.065761Z', 'dateType': 'Collected'}],\n",
+ " 'publicationYear': '2015',\n",
" 'publisher': 'MDF (placeholder)',\n",
" 'resourceType': {'resourceType': 'JSON', 'resourceTypeGeneral': 'Dataset'},\n",
- " 'subjects': [{'subject': 'DFT'}, {'subject': 'VASP'}],\n",
- " 'titles': [{'title': 'Khazana (VASP)'}]},\n",
- " 'mdf': {'ingest_date': '2018-11-05T21:42:40.557765Z',\n",
- " 'mdf_id': '5be0b9502ef388136874efdf',\n",
+ " 'subjects': [{'subject': 'data_link'}],\n",
+ " 'titles': [{'title': 'Schleife Al 256 Channel'}]},\n",
+ " 'mdf': {'ingest_date': '2018-11-30T21:04:03.431302Z',\n",
" 'resource_type': 'dataset',\n",
" 'scroll_id': 0,\n",
- " 'source_id': 'khazana_vasp_v4',\n",
- " 'source_name': 'khazana_vasp',\n",
- " 'version': 4},\n",
+ " 'source_id': 'schleife_al_channel_v1.1',\n",
+ " 'source_name': 'schleife_al_channel',\n",
+ " 'version': 1},\n",
" 'services': {'mdf_search': 'This dataset was ingested to MDF Search.'}}"
]
},
- "execution_count": 18,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -686,7 +810,7 @@
"metadata": {},
"source": [
"### aggregate\n",
- "Queries submitted with `search()` are limited to returning 10,000 results. If this limit is too low, you can use `aggregate()` to retrieve _all_ results from a query, no matter how many. Please be careful with this function, as you can easily accidentally retrieve a very large number of results without meaning to. Consider using `search(your_query, limit=0, info=True)` (see above) first to discover how many results you will get beforehand."
+ "Queries submitted with `search()` are limited to returning 10,000 results. If this limit is too low, you can use `aggregate()` to retrieve _all_ results from a query, no matter how many. Please be careful with this function, as you can easily accidentally retrieve a very large number of results without meaning to. Consider using `search(your_query, limit=0, info=True)` first to discover how many results you will get beforehand (see [Query info](#Query-info) above for more information)."
]
},
{
@@ -698,7 +822,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -724,7 +848,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
diff --git a/docs/tutorials/5-Field-Specific_Helper_Functions.ipynb b/docs/tutorials/5-Field-Specific_Helper_Functions.ipynb
index 5b27731..68a598f 100644
--- a/docs/tutorials/5-Field-Specific_Helper_Functions.ipynb
+++ b/docs/tutorials/5-Field-Specific_Helper_Functions.ipynb
@@ -50,7 +50,7 @@
{
"data": {
"text/plain": [
- ""
+ ""
]
},
"execution_count": 3,
@@ -74,33 +74,31 @@
"text/plain": [
"{'crystal_structure': {'number_of_atoms': 4,\n",
" 'space_group_number': 225,\n",
- " 'volume': 93.2374},\n",
+ " 'volume': 66.7282},\n",
" 'dft': {'converged': True,\n",
- " 'cutoff_energy': 520.0,\n",
+ " 'cutoff_energy': 247.5,\n",
" 'exchange_correlation_functional': 'PBE'},\n",
" 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n",
- " 'filename': '332513.json',\n",
- " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/oqmd_v13/332513.json',\n",
- " 'length': 10403,\n",
+ " 'filename': '670930.json',\n",
+ " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/oqmd_v13/670930.json',\n",
+ " 'length': 11138,\n",
" 'mime_type': 'text/plain',\n",
- " 'sha512': 'a4732793bff687ea39f43f741d6e3f380bb3608d38114420569282005efaee92e21799e6d6c3a4c63d7cf8b42d6388a81bc992d2a71709fd4d4f05cb3e6bb077',\n",
- " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/oqmd_v13/332513.json'}],\n",
- " 'material': {'composition': 'Mn1Pa1Pm2', 'elements': ['Mn', 'Pa', 'Pm']},\n",
+ " 'sha512': 'a87051ace78249013ba17d331586f2470868e5e573e0bce347d22205f2a224a25772690ca8c7b78dce1743c98a2d585a09d74a062bc05bd8331de77d4c40d54d',\n",
+ " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/oqmd_v13/670930.json'}],\n",
+ " 'material': {'composition': 'Be2Lu1Sn1', 'elements': ['Be', 'Lu', 'Sn']},\n",
" 'mdf': {'ingest_date': '2018-11-09T19:44:43.687681Z',\n",
- " 'mdf_id': '5be5e3af2ef388650efd6705',\n",
+ " 'mdf_id': '5be5e3af2ef388650efd6708',\n",
" 'parent_id': '5be5e3ab2ef388650efd6704',\n",
" 'resource_type': 'record',\n",
- " 'scroll_id': 1,\n",
+ " 'scroll_id': 4,\n",
" 'source_id': 'oqmd_v13',\n",
" 'source_name': 'oqmd',\n",
" 'version': 13},\n",
" 'oqmd': {'band_gap': {'units': 'eV', 'value': 0.0},\n",
- " 'configuration': 'standard',\n",
- " 'delta_e': {'units': 'eV/atom', 'value': 0.729781857801725},\n",
+ " 'configuration': 'fine_relax',\n",
" 'magnetic_moment': {'units': 'bohr/atom'},\n",
- " 'stability': {'units': 'eV/atom', 'value': 0.729382215301725},\n",
- " 'total_energy': {'units': 'eV/atom', 'value': -6.27350557},\n",
- " 'volume_pa': {'units': 'angstrom^3/atom', 'value': 23.3093}}}"
+ " 'total_energy': {'units': 'eV/atom', 'value': -3.7894355225},\n",
+ " 'volume_pa': {'units': 'angstrom^3/atom', 'value': 16.6821}}}"
]
},
"execution_count": 4,
@@ -117,8 +115,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "### match_elements\n",
- "`match_elements()` matches values against the `\"materials.elements\"` field. It is equivalent to chaining `match_field(\"materials.elements\", value)` for each value."
+ "### match_records\n",
+ "`match_records()` matches one or more specific records from a specific dataset. It matches the `source_name` against the `\"mdf.source_name\"` field and the `scroll_id` values against the `mdf.scroll_id` field."
]
},
{
@@ -129,7 +127,7 @@
{
"data": {
"text/plain": [
- ""
+ ""
]
},
"execution_count": 5,
@@ -138,7 +136,7 @@
}
],
"source": [
- "mdf.match_elements([\"Al\", \"Cu\"])"
+ "mdf.match_records(\"cip\", [1003, 1006])"
]
},
{
@@ -151,12 +149,12 @@
{
"data": {
"text/plain": [
- "{'cip': {'bv': '121.8',\n",
- " 'energy': '-3.63',\n",
- " 'forcefield': 'AlCu.eam.alloy',\n",
- " 'gv': '43.5',\n",
- " 'mpid': 'mp-12777',\n",
- " 'totenergy': '-464.16852'},\n",
+ "{'cip': {'bv': '300.9',\n",
+ " 'energy': '-2.67',\n",
+ " 'forcefield': 'ffield.CuOCH.comb3',\n",
+ " 'gv': '103.8',\n",
+ " 'mpid': 'mp-703914',\n",
+ " 'totenergy': '-1538.4116'},\n",
" 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n",
" 'filename': 'classical_interatomic_potentials.json',\n",
" 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n",
@@ -164,12 +162,12 @@
" 'mime_type': 'text/plain',\n",
" 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n",
" 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n",
- " 'material': {'composition': 'Al32 Cu96', 'elements': ['Al', 'Cu']},\n",
+ " 'material': {'composition': 'Cu192 O384', 'elements': ['Cu', 'O']},\n",
" 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n",
- " 'mdf_id': '5bd747d32ef3880b0f2142a4',\n",
+ " 'mdf_id': '5bd747cf2ef3880b0f2139bf',\n",
" 'parent_id': '5bd747cd2ef3880b0f2135d1',\n",
" 'resource_type': 'record',\n",
- " 'scroll_id': 3283,\n",
+ " 'scroll_id': 1006,\n",
" 'source_id': 'cip_v1',\n",
" 'source_name': 'cip',\n",
" 'version': 1}}"
@@ -181,7 +179,7 @@
}
],
"source": [
- "res = mdf.search(limit=10)\n",
+ "res = mdf.search()\n",
"res[0]"
]
},
@@ -189,8 +187,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "### match_resource_types\n",
- "`match_resource_types()` matches values against the `\"mdf.resource_types\"` field. It is equivalent to chaining `match_field(\"mdf.resource_types\", value)` for each value."
+ "### match_elements\n",
+ "`match_elements()` matches values against the `\"materials.elements\"` field. It is equivalent to chaining `match_field(\"materials.elements\", value)` for each value."
]
},
{
@@ -201,7 +199,7 @@
{
"data": {
"text/plain": [
- ""
+ ""
]
},
"execution_count": 7,
@@ -210,7 +208,7 @@
}
],
"source": [
- "mdf.match_resource_types(\"record\")"
+ "mdf.match_elements([\"Al\", \"Cu\"])"
]
},
{
@@ -223,12 +221,12 @@
{
"data": {
"text/plain": [
- "{'cip': {'bv': '30535.3',\n",
- " 'energy': '-0.37',\n",
- " 'forcefield': 'Fe-P.eam.fs',\n",
- " 'gv': '-1878841.7',\n",
- " 'mpid': 'mp-130',\n",
- " 'totenergy': '-17.804778'},\n",
+ "{'cip': {'bv': '121.8',\n",
+ " 'energy': '-3.63',\n",
+ " 'forcefield': 'AlCu.eam.alloy',\n",
+ " 'gv': '43.5',\n",
+ " 'mpid': 'mp-12777',\n",
+ " 'totenergy': '-464.16852'},\n",
" 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n",
" 'filename': 'classical_interatomic_potentials.json',\n",
" 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n",
@@ -236,12 +234,12 @@
" 'mime_type': 'text/plain',\n",
" 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n",
" 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n",
- " 'material': {'composition': 'P48', 'elements': ['P']},\n",
+ " 'material': {'composition': 'Al32 Cu96', 'elements': ['Al', 'Cu']},\n",
" 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n",
- " 'mdf_id': '5bd747cd2ef3880b0f21369b',\n",
+ " 'mdf_id': '5bd747d32ef3880b0f2142a4',\n",
" 'parent_id': '5bd747cd2ef3880b0f2135d1',\n",
" 'resource_type': 'record',\n",
- " 'scroll_id': 202,\n",
+ " 'scroll_id': 3283,\n",
" 'source_id': 'cip_v1',\n",
" 'source_name': 'cip',\n",
" 'version': 1}}"
@@ -261,8 +259,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "### match_ids\n",
- "`match_ids()` matches values against the `\"mdf.mdf_id\"` field. It is equivalent to chaining `match_field(\"mdf.mdf_id\", value)` for each value."
+ "### match_resource_types\n",
+ "`match_resource_types()` matches values against the `\"mdf.resource_types\"` field. It is equivalent to chaining `match_field(\"mdf.resource_types\", value)` for each value."
]
},
{
@@ -273,7 +271,7 @@
{
"data": {
"text/plain": [
- ""
+ ""
]
},
"execution_count": 9,
@@ -282,8 +280,7 @@
}
],
"source": [
- "an_id = res[1][\"mdf\"][\"mdf_id\"]\n",
- "mdf.match_ids(an_id)"
+ "mdf.match_resource_types(\"record\")"
]
},
{
@@ -296,28 +293,28 @@
{
"data": {
"text/plain": [
- "[{'cip': {'bv': '59842.7',\n",
- " 'energy': '-3.29',\n",
- " 'forcefield': 'Fe-P.eam.fs',\n",
- " 'gv': '-28195.7',\n",
- " 'mpid': 'mp-778',\n",
- " 'totenergy': '-237.05557'},\n",
- " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n",
- " 'filename': 'classical_interatomic_potentials.json',\n",
- " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n",
- " 'length': 1841203,\n",
- " 'mime_type': 'text/plain',\n",
- " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n",
- " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n",
- " 'material': {'composition': 'P24 Fe48', 'elements': ['Fe', 'P']},\n",
- " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n",
- " 'mdf_id': '5bd747cd2ef3880b0f2136a1',\n",
- " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n",
- " 'resource_type': 'record',\n",
- " 'scroll_id': 208,\n",
- " 'source_id': 'cip_v1',\n",
- " 'source_name': 'cip',\n",
- " 'version': 1}}]"
+ "{'cip': {'bv': '30535.3',\n",
+ " 'energy': '-0.37',\n",
+ " 'forcefield': 'Fe-P.eam.fs',\n",
+ " 'gv': '-1878841.7',\n",
+ " 'mpid': 'mp-130',\n",
+ " 'totenergy': '-17.804778'},\n",
+ " 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n",
+ " 'filename': 'classical_interatomic_potentials.json',\n",
+ " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json',\n",
+ " 'length': 1841203,\n",
+ " 'mime_type': 'text/plain',\n",
+ " 'sha512': '96635ee0c15d1d0187b18805653a02b1a6dfa5648db82153467045de18adcc08c753e2897d2b48a78a2167a442219e9aeff6b1103732c2158facac8fa4911b33',\n",
+ " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/cip_v1/classical_interatomic_potentials.json'}],\n",
+ " 'material': {'composition': 'P48', 'elements': ['P']},\n",
+ " 'mdf': {'ingest_date': '2018-10-29T17:47:57.468388Z',\n",
+ " 'mdf_id': '5bd747cd2ef3880b0f21369b',\n",
+ " 'parent_id': '5bd747cd2ef3880b0f2135d1',\n",
+ " 'resource_type': 'record',\n",
+ " 'scroll_id': 202,\n",
+ " 'source_id': 'cip_v1',\n",
+ " 'source_name': 'cip',\n",
+ " 'version': 1}}"
]
},
"execution_count": 10,
@@ -326,7 +323,8 @@
}
],
"source": [
- "mdf.search()"
+ "res = mdf.search(limit=10)\n",
+ "res[0]"
]
},
{
@@ -349,7 +347,7 @@
{
"data": {
"text/plain": [
- ""
+ ""
]
},
"execution_count": 11,
@@ -444,7 +442,7 @@
{
"data": {
"text/plain": [
- ""
+ ""
]
},
"execution_count": 13,
@@ -539,7 +537,7 @@
{
"data": {
"text/plain": [
- ""
+ ""
]
},
"execution_count": 15,
@@ -644,7 +642,7 @@
{
"data": {
"text/plain": [
- ""
+ ""
]
},
"execution_count": 19,
@@ -745,36 +743,33 @@
{
"data": {
"text/plain": [
- "{'crystal_structure': {'cross_reference': {'icsd': 150823},\n",
- " 'number_of_atoms': 4,\n",
- " 'space_group_number': 225,\n",
- " 'volume': 49.3454},\n",
+ "{'crystal_structure': {'cross_reference': {'icsd': 57668},\n",
+ " 'number_of_atoms': 5,\n",
+ " 'space_group_number': 164,\n",
+ " 'volume': 72.5641},\n",
" 'dft': {'converged': True,\n",
" 'cutoff_energy': 520.0,\n",
" 'exchange_correlation_functional': 'PBE'},\n",
" 'files': [{'data_type': 'ASCII text, with very long lines, with no line terminators',\n",
- " 'filename': '1815.json',\n",
- " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/oqmd_v13/1815.json',\n",
- " 'length': 11693,\n",
+ " 'filename': '19430.json',\n",
+ " 'globus': 'globus://e38ee745-6d04-11e5-ba46-22000b92c6ec/MDF/mdf_connect/prod/data/oqmd_v13/19430.json',\n",
+ " 'length': 11181,\n",
" 'mime_type': 'text/plain',\n",
- " 'sha512': '3f26300e0c9d4ce4a53ac5169b3cb8720927263f34d3654e3134fe52e3c0069c41c2db2d38f26cffd28d48073e266914d9a1fd517c344e022b11d4dfe94876a8',\n",
- " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/oqmd_v13/1815.json'}],\n",
- " 'material': {'composition': 'Al1Cu3', 'elements': ['Al', 'Cu']},\n",
+ " 'sha512': '8015dc8ce744634f5e9be41ddd37379c444ef17f1aaeae57bfbab544125fa787d1f2ed937ba0e9b7be535f88dd0155e05de6541057ab2eaab6c383e85eb9495a',\n",
+ " 'url': 'https://e38ee745-6d04-11e5-ba46-22000b92c6ec.e.globus.org/MDF/mdf_connect/prod/data/oqmd_v13/19430.json'}],\n",
+ " 'material': {'composition': 'Al3Cu2', 'elements': ['Al', 'Cu']},\n",
" 'mdf': {'ingest_date': '2018-11-09T19:44:43.687681Z',\n",
- " 'mdf_id': '5be5e4122ef388650efdf50d',\n",
+ " 'mdf_id': '5be5e7632ef388650e02b0e4',\n",
" 'parent_id': '5be5e3ab2ef388650efd6704',\n",
" 'resource_type': 'record',\n",
- " 'scroll_id': 36361,\n",
+ " 'scroll_id': 346591,\n",
" 'source_id': 'oqmd_v13',\n",
" 'source_name': 'oqmd',\n",
" 'version': 13},\n",
" 'oqmd': {'band_gap': {'units': 'eV', 'value': 0.0},\n",
- " 'configuration': 'static',\n",
- " 'delta_e': {'units': 'eV/atom', 'value': -0.1675233825},\n",
" 'magnetic_moment': {'units': 'bohr/atom'},\n",
- " 'stability': {'units': 'eV/atom', 'value': 0.02138741875},\n",
- " 'total_energy': {'units': 'eV/atom', 'value': -3.8909277975},\n",
- " 'volume_pa': {'units': 'angstrom^3/atom', 'value': 12.3364}}}"
+ " 'total_energy': {'units': 'eV/atom', 'value': -3.885221738},\n",
+ " 'volume_pa': {'units': 'angstrom^3/atom', 'value': 14.5128}}}"
]
},
"execution_count": 21,
diff --git a/mdf_forge/forge.py b/mdf_forge/forge.py
index 749a9d6..b480238 100644
--- a/mdf_forge/forge.py
+++ b/mdf_forge/forge.py
@@ -1,3 +1,4 @@
+import json
import os
import re
from urllib.parse import urlparse
@@ -31,6 +32,7 @@ class Forge(mdf_toolbox.AggregateHelper, mdf_toolbox.SearchHelper):
# "Protected" variables (for dev/debugging)
_schemas_url = "https://api.materialsdatafacility.org/schemas/"
+ _organizations_url = "https://api.materialsdatafacility.org/organizations/"
def __init__(self, index=__default_index, local_ep=None, anonymous=False,
clear_old_tokens=False, **kwargs):
@@ -118,11 +120,6 @@ def match_source_names(self, source_names):
for src in source_names:
match = re.search("_v[0-9]+\\.[0-9]+$", src)
- # TODO: Remove legacy-form support
- if not match:
- match = (re.search("_v[0-9]+-[0-9]+$", src)
- or re.search("_v[0-9]+$", src))
-
if match:
sanitized_names.append(src[:match.start()])
else:
@@ -136,25 +133,36 @@ def match_source_names(self, source_names):
self.match_field(field="mdf.source_name", value=src, required=False, new_group=False)
return self
- def match_ids(self, mdf_ids):
- """Match the IDs in the given ``mdf_id`` list.
+ def match_records(self, source_name, scroll_ids):
+ """Match specific records from a given dataset.
+ Multiple records may be matched, but only one dataset per call.
Arguments:
- mdf_ids (str or list of str): The IDs to match.
+ source_name (str): The ``source_name`` of the records' dataset. The ``source_id``
+ is also accepted for convenience.
+ scroll_ids (int or list of int): The ``scroll_id`` values of the records to match.
Returns:
- Forge: Self
+ Forge: self
"""
- # If no IDs are supplied, nothing to match
- if not mdf_ids:
+ if not source_name or not scroll_ids:
return self
- if isinstance(mdf_ids, str):
- mdf_ids = [mdf_ids]
- # First ID should be in new group and required
- self.match_field(field="mdf.mdf_id", value=mdf_ids[0], required=True, new_group=True)
- # Other IDs should stay in that group, and not be required
- for mid in mdf_ids[1:]:
- self.match_field(field="mdf.mdf_id", value=mid, required=False, new_group=False)
+ if isinstance(scroll_ids, int):
+ scroll_ids = [scroll_ids]
+ # If passed source_id, strip version info
+ match = re.search("_v[0-9]+\\.[0-9]+$", source_name)
+ if not match:
+ match = (re.search("_v[0-9]+-[0-9]+$", source_name)
+ or re.search("_v[0-9]+$", source_name))
+ if match:
+ source_name = source_name[:match.start()]
+ # source_name is required, starts new group
+ # First scroll is (nested) new required group
+ # (source:source AND (scroll:scroll0 OR scroll:scroll1 ... ))
+ self.match_field(field="mdf.source_name", value=source_name, required=True, new_group=True)
+ self.match_field(field="mdf.scroll_id", value=scroll_ids[0], required=True, new_group=True)
+ for scroll in scroll_ids[1:]:
+ self.match_field(field="mdf.scroll_id", value=scroll, required=False, new_group=False)
return self
def match_elements(self, elements, match_all=True):
@@ -467,25 +475,17 @@ def fetch_datasets_from_results(self, entries=None, query=None, reset_query=True
entries = [entries]
elif isinstance(entries, tuple):
entries = entries[0]
- ds_ids = set()
-
- # For every entry, extract the appropriate ID
- for entry in entries:
- # For records, extract the parent_id
- # Most entries should be records here
- if entry["mdf"]["resource_type"] == "record":
- ds_ids.add(entry["mdf"]["parent_id"])
- # For datasets, extract the mdf_id
- elif entry["mdf"]["resource_type"] == "dataset":
- ds_ids.add(entry["mdf"]["mdf_id"])
- # For anything else (collection), do nothing
- else:
- pass
+ # If no entries, error
+ if len(entries) == 0:
+ raise ValueError("No entries provided or found")
- # If no ids are preset, raise an error
- if len(ds_ids) == 0:
- raise AttributeError('No dataset records found in these entries')
- return self.match_ids(list(ds_ids)).search()
+ # Extract source_name from every entry, make unique, skip invalid entries
+ ds_ids = set([entry["mdf"]["source_name"] for entry in entries
+ if entry.get("mdf", {}).get("source_name")])
+ if not ds_ids:
+ return []
+
+ return self.match_source_names(ds_ids).match_resource_types("dataset").search()
def get_dataset_version(self, source_name):
"""Get the version of a certain dataset.
@@ -957,3 +957,95 @@ def describe_field(self, resource_type, field=None, raw=False):
else:
mdf_toolbox.print_jsonschema(schema)
return
+
+ def describe_organization(self, organization, summary=False, raw=False):
+ """Fetch and display the description of an organization registered with MDF.
+
+ Arguments:
+ organization (str): The organization to describe.
+ This value can also be ``"list"`` to list all organizations' names,
+ or ``"all"`` to fetch the metadata for every organization (not recommended).
+ summary (bool): When ``True``, will summarize the organization metadata. The
+ summary just contains the non-technical information about the
+ organization itself.
+ When ``False``, will print all of the metadata.
+ This parameter has no effect if ``raw=True``.
+ **Default:** ``False``
+ raw (bool): When ``False``, will format and print the organization metadata.
+ When ``True``, will return the raw JSON dictionary instead.
+ For human consumption, ``False`` is recommended.
+ **Default:** ``False``
+ """
+ res = requests.get(self._organizations_url+organization)
+ # Check for success
+ error = None
+ org_res = None
+ try:
+ json_res = res.json()
+ except Exception:
+ if res.status_code < 300:
+ error = "Error decoding {} response: {}".format(res.status_code, res.content)
+ else:
+ error = ("Error {}. MDF may be experiencing technical difficulties."
+ .format(res.status_code))
+ else:
+ if res.status_code >= 300:
+ error = "Error {}: {}".format(res.status_code, json_res["error"])
+ else:
+ # Support "all" and "list" keywords
+ org_res = json_res.get("organization",
+ json_res.get("all_organizations",
+ json_res.get("organization_list", {})))
+
+ # Return if raw=True
+ if raw:
+ return {
+ "success": error is None,
+ "error": error,
+ "organization": org_res,
+ "status_code": res.status_code
+ }
+ # Otherwise, print the result
+ else:
+ if error is not None:
+ print(error)
+ else:
+ # Support "all" and "list"
+ if not isinstance(org_res, list):
+ org_res = [org_res]
+ for org in org_res:
+ # Only "list" is non-dict, just print org name and continue
+ if not isinstance(org, dict):
+ print(org)
+ continue
+
+ print("\n", org["canonical_name"])
+ # If user just wants a summary, pop the non-summary keys
+ # Essentially, the summary is non-technical info,
+ # just describing the org itself - not in MDF context
+ if summary:
+ org.pop("canonical_name", None) # Already printed
+ org.pop("permission_groups", None)
+ org.pop("acl", None)
+ org.pop("data_destinations", None)
+ org.pop("curation", None)
+ org.pop("project_blocks", None)
+ org.pop("required_fields", None)
+ org.pop("services", None)
+ # Don't display "None" parents
+ if not org.get("parent_organizations"):
+ org.pop("parent_organizations", None)
+
+ # Print dict as key: value
+ # All values besides "services" are max single-depth containers
+ for k, v in org.items():
+ if not v:
+ v = "None"
+ # "services", just prettyprint the dict
+ if isinstance(v, dict):
+ print("\t{}: {}".format(k, json.dumps(v, indent=4)))
+ elif isinstance(v, list):
+ print("\t{}: {}".format(k, ", ".join([(x or "None") for x in v])))
+ else:
+ print("\t{}: {}".format(k, str(v)))
+ return
diff --git a/setup.py b/setup.py
index cbec70d..5a0df73 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
setup(
name='mdf_forge',
- version='0.7.2',
+ version='0.7.3',
packages=['mdf_forge'],
description='Materials Data Facility python package',
long_description=("Forge is the Materials Data Facility Python package"
diff --git a/tests/test_forge.py b/tests/test_forge.py
index 9cac316..b3c89e9 100644
--- a/tests/test_forge.py
+++ b/tests/test_forge.py
@@ -2,7 +2,9 @@
import re
import types
+import mdf_toolbox
import pytest
+
from mdf_forge import Forge
@@ -126,7 +128,7 @@ def check_field(res, field, regex):
elif type(vals) is not list:
vals = [vals]
# If a result does not contain the value, no match
- if regex not in vals and not any([re.search(str(regex), value) for value in vals]):
+ if regex not in vals and not any([re.search(str(regex), str(value)) for value in vals]):
all_match = False
only_match = False
# If a result contains other values, inclusive match
@@ -171,29 +173,24 @@ def test_forge_match_source_names():
assert f.match_source_names("") == f
-def test_forge_match_ids():
- # Get a couple IDs
+def test_forge_test_match_records():
f = Forge(index="mdf")
- res0 = f.search("mdf.source_name:khazana_vasp", advanced=True, limit=2)
- id1 = res0[0]["mdf"]["mdf_id"]
- id2 = res0[1]["mdf"]["mdf_id"]
+ # One record
+ f.match_records("cip", 1006)
+ res = f.search()
+ assert len(res) == 1
+ assert check_field(res, "mdf.source_name", "cip") == 0
+ assert check_field(res, "mdf.scroll_id", 1006) == 0
- # One ID
- f.match_ids(id1)
- res1 = f.search()
- assert res1 != []
- assert check_field(res1, "mdf.mdf_id", id1) == 0
+ # Multi-record, strip version info
+ f.match_records("cip_v3.4", [1006, 1002])
+ res = f.search()
+ assert len(res) == 2
+ assert check_field(res, "mdf.source_name", "cip") == 0
+ assert check_field(res, "mdf.scroll_id", 1006) == 2
- # Multi-ID
- f.match_ids([id1, id2])
- res2 = f.search()
- # res1 is a subset of res2
- assert len(res2) > len(res1)
- assert all([r1 in res2 for r1 in res1])
- assert check_field(res2, "mdf.mdf_id", id2) == 2
-
- # No id
- assert f.match_ids("") == f
+ # No args
+ assert f.match_records("", "") == f
def test_forge_match_elements():
@@ -295,21 +292,19 @@ def test_forge_match_resource_types():
assert f.match_resource_types("") == f
-# TODO: Enable this test once Organizations are deployed and actually in-use on Prod index
-@pytest.mark.xfail
def test_forge_match_organizations():
f = Forge(index="mdf")
# One repo
- f.match_organizations("DOE")
+ f.match_organizations("NIST")
res1 = f.search()
assert res1 != []
- check_val1 = check_field(res1, "mdf.organizations", "DOE")
+ check_val1 = check_field(res1, "mdf.organizations", "NIST")
assert check_val1 == 1
# Multi-repo
- f.match_organizations(["NIST", "DOE"], match_all=False)
+ f.match_organizations(["NIST", "PRISMS"], match_all=False)
res2 = f.search()
- assert check_field(res2, "mdf.organizations", "DOE") == 2
+ assert check_field(res2, "mdf.organizations", "PRISMS") == 2
assert check_field(res2, "mdf.organizations", "NIST") == 2
# No repos
@@ -398,11 +393,11 @@ def test_forge_fetch_datasets_from_results():
# Fetch single dataset
res1 = f.fetch_datasets_from_results(res01[0])
- assert res1[0] == oqmd
+ assert mdf_toolbox.insensitive_comparison(res1[0], oqmd)
# Fetch dataset with results + info
res2 = f.fetch_datasets_from_results(res02)
- assert res2[0] == oqmd
+ assert mdf_toolbox.insensitive_comparison(res2[0], oqmd)
# Fetch multiple datasets
rtemp = res01+res03
@@ -413,7 +408,7 @@ def test_forge_fetch_datasets_from_results():
# Fetch dataset from dataset
res4 = f.fetch_datasets_from_results(res04)
- assert res4 == res04
+ assert mdf_toolbox.insensitive_comparison(res4, res04)
# Fetch entries from current query
f.match_source_names("nist_xps_db")
@@ -421,9 +416,7 @@ def test_forge_fetch_datasets_from_results():
# Fetch nothing
unknown_entry = {"mdf": {"resource_type": "unknown"}}
- with pytest.raises(AttributeError) as excinfo:
- assert f.fetch_datasets_from_results(unknown_entry) == []
- assert 'No dataset records found' in str(excinfo.value)
+ assert f.fetch_datasets_from_results(unknown_entry) == []
def test_forge_http_download(capsys):
@@ -653,3 +646,52 @@ def test_describe_field(capsys):
f.describe_field("dataset", field="foo.bar")
out, err = capsys.readouterr()
assert "Error: Field 'foo' (from 'foo.bar')" in out
+
+
+def test_describe_organization(capsys):
+ f = Forge()
+ # Basic usage (with raw=True)
+ res = f.describe_organization("Argonne National Laboratory", raw=True)
+ assert res["success"]
+ assert isinstance(res["organization"], dict)
+ assert res["organization"]["canonical_name"] == "Argonne National Laboratory"
+ assert "ANL" in res["organization"]["aliases"]
+ # List
+ res = f.describe_organization("list", raw=True)
+ assert isinstance(res["organization"], list)
+ assert "Center for Hierarchical Materials Design" in res["organization"]
+ # All
+ res = f.describe_organization("all", raw=True)
+ assert isinstance(res["organization"], list)
+ assert isinstance(res["organization"][0], dict)
+ # Print to stdout
+ f.describe_organization("CHiMaD")
+ out, err = capsys.readouterr()
+ assert "canonical_name: Center for Hierarchical Materials Design" in out
+ assert "aliases: CHiMaD" in out
+ assert "permission_groups: public" in out
+ # List
+ f.describe_organization("list")
+ out, err = capsys.readouterr()
+ assert "Center for Hierarchical Materials Design" in out
+ assert "CHiMaD" not in out
+ assert "Argonne National Laboratory" in out
+ assert "ANL" not in out
+ # Summary flag
+ f.describe_organization("chimad", summary=True)
+ out, err = capsys.readouterr()
+ assert "canonical_name: Center for Hierarchical Materials Design" not in out
+ assert "Center for Hierarchical Materials Design" in out
+ assert "aliases: CHiMaD" in out
+ assert "permission_groups: public" not in out
+
+ # Errors
+ # Invalid org
+ res = f.describe_organization("foobar", raw=True)
+ assert res["success"] is False
+ assert "Error 404" in res["error"]
+ assert res["status_code"] == 404
+ # stdout
+ res = f.describe_organization("foobar")
+ out, err = capsys.readouterr()
+ assert "Error 404" in out