From 50ac1e8d8b48665c2bd1304792007a17e8bf4f3c Mon Sep 17 00:00:00 2001
From: Michael Kuchnik <mkuchnik@meta.com>
Date: Mon, 26 Feb 2024 15:50:08 +0000
Subject: [PATCH] Revert breaking metadata changes

---
 datasets/1.0/bigcode-the-stack/metadata.json    | 17 ++++++++++++-----
 .../distribution_bad_contained_in/metadata.json | 11 ++---------
 .../0.8/distribution_bad_type/metadata.json     | 11 ++---------
 .../0.8/distribution_missing_name/metadata.json | 11 ++---------
 .../metadata.json                               | 11 ++---------
 .../graphs/0.8/metadata_bad_type/metadata.json  | 11 ++---------
 .../metadata.json                               | 11 ++---------
 .../graphs/0.8/mlfield_bad_source/metadata.json | 11 ++---------
 .../graphs/0.8/mlfield_bad_type/metadata.json   | 11 ++---------
 .../mlfield_missing_property_name/metadata.json | 11 ++---------
 .../0.8/mlfield_missing_source/metadata.json    | 11 ++---------
 .../metadata.json                               | 11 ++---------
 .../metadata.json                               | 11 ++---------
 .../0.8/recordset_wrong_join/metadata.json      | 11 ++---------
 14 files changed, 38 insertions(+), 122 deletions(-)

diff --git a/datasets/1.0/bigcode-the-stack/metadata.json b/datasets/1.0/bigcode-the-stack/metadata.json
index ece9b34bb..c690ad550 100644
--- a/datasets/1.0/bigcode-the-stack/metadata.json
+++ b/datasets/1.0/bigcode-the-stack/metadata.json
@@ -47,18 +47,25 @@
   "name": "bigcode-the-stack",
   "conformsTo": "http://mlcommons.org/croissant/1.0",
   "description": "The Stack contains over 6TB of permissively-licensed source code files covering 358 programming languages. The dataset was created as part of the BigCode Project, an open scientific collaboration working on the responsible development of Large Language Models for Code (Code LLMs). The Stack serves as a pre-training dataset for Code LLMs, i.e., code-generating AI systems which enable the synthesis of programs from natural language descriptions as well as other from code snippets.",
+  "citeAs": "@article{Kocetkov2022TheStack, title={The Stack: 3 TB of permissively licensed source code}, author={Kocetkov, Denis and Li, Raymond and Ben Allal, Loubna and Li, Jia and Mou,Chenghao and Mu\u00f1oz Ferrandis, Carlos and Jernite, Yacine and Mitchell, Margaret and Hughes, Sean and Wolf, Thomas and Bahdanau, Dzmitry and von Werra, Leandro and de Vries, Harm}, journal={Preprint}, year={2022} }",
   "creator": [
     {
-      "@type": "Organization",
-      "name": "Harm de Vries"
+      "@type": "Person",
+      "name": "Harm de Vries",
+      "email": "harm.devries@servicenow.com"
     },
     {
-      "@type": "Organization",
-      "name": "Leandro von Werra"
+      "@type": "Person",
+      "name": "Leandro von Werra",
+      "email": "leandro@huggingface.co"
     }
   ],
-  "citeAs": "@article{Kocetkov2022TheStack, title={The Stack: 3 TB of permissively licensed source code}, author={Kocetkov, Denis and Li, Raymond and Ben Allal, Loubna and Li, Jia and Mou,Chenghao and Mu\u00f1oz Ferrandis, Carlos and Jernite, Yacine and Mitchell, Margaret and Hughes, Sean and Wolf, Thomas and Bahdanau, Dzmitry and von Werra, Leandro and de Vries, Harm}, journal={Preprint}, year={2022} }",
+  "keywords": [
+    "crowdsourced",
+    "expert-generated"
+  ],
   "license": "other",
+  "sameAs": "https://www.bigcode-project.org/docs/about/the-stack/",
   "sdLicense": "https://www.apache.org/licenses/LICENSE-2.0",
   "url": "https://huggingface.co/datasets/bigcode/the-stack",
   "distribution": [
diff --git a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_bad_contained_in/metadata.json b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_bad_contained_in/metadata.json
index 2a0d998ae..73ccc7f84 100644
--- a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_bad_contained_in/metadata.json
+++ b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_bad_contained_in/metadata.json
@@ -2,10 +2,8 @@
   "@context": {
     "@language": "en",
     "@vocab": "https://schema.org/",
-    "citeAs": "cr:citeAs",
     "column": "ml:column",
     "conformsTo": "dct:conformsTo",
-    "cr": "http://mlcommons.org/croissant/",
     "data": {
       "@id": "ml:data",
       "@type": "@json"
@@ -20,15 +18,11 @@
     "extract": "ml:extract",
     "field": "ml:field",
     "fileProperty": "ml:fileProperty",
-    "fileObject": "cr:fileObject",
-    "fileSet": "cr:fileSet",
     "format": "ml:format",
     "includes": "ml:includes",
     "isEnumeration": "ml:isEnumeration",
-    "isLiveDataset": "cr:isLiveDataset",
     "jsonPath": "ml:jsonPath",
-    "key": "cr:key",
-    "md5": "cr:md5",
+    "ml": "http://mlcommons.org/schema/",
     "parentField": "ml:parentField",
     "path": "ml:path",
     "personalSensitiveInformation": "ml:personalSensitiveInformation",
@@ -41,8 +35,7 @@
     "separator": "ml:separator",
     "source": "ml:source",
     "subField": "ml:subField",
-    "transform": "ml:transform",
-    "ml": "http://mlcommons.org/schema/"
+    "transform": "ml:transform"
   },
   "@type": "sc:Dataset",
   "name": "mydataset",
diff --git a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_bad_type/metadata.json b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_bad_type/metadata.json
index 948b0de7e..43014951a 100644
--- a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_bad_type/metadata.json
+++ b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_bad_type/metadata.json
@@ -2,10 +2,8 @@
   "@context": {
     "@language": "en",
     "@vocab": "https://schema.org/",
-    "citeAs": "cr:citeAs",
     "column": "ml:column",
     "conformsTo": "dct:conformsTo",
-    "cr": "http://mlcommons.org/croissant/",
     "data": {
       "@id": "ml:data",
       "@type": "@json"
@@ -20,15 +18,11 @@
     "extract": "ml:extract",
     "field": "ml:field",
     "fileProperty": "ml:fileProperty",
-    "fileObject": "cr:fileObject",
-    "fileSet": "cr:fileSet",
     "format": "ml:format",
     "includes": "ml:includes",
     "isEnumeration": "ml:isEnumeration",
-    "isLiveDataset": "cr:isLiveDataset",
     "jsonPath": "ml:jsonPath",
-    "key": "cr:key",
-    "md5": "cr:md5",
+    "ml": "http://mlcommons.org/schema/",
     "parentField": "ml:parentField",
     "path": "ml:path",
     "personalSensitiveInformation": "ml:personalSensitiveInformation",
@@ -41,8 +35,7 @@
     "separator": "ml:separator",
     "source": "ml:source",
     "subField": "ml:subField",
-    "transform": "ml:transform",
-    "ml": "http://mlcommons.org/schema/"
+    "transform": "ml:transform"
   },
   "@type": "sc:Dataset",
   "name": "mydataset",
diff --git a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_missing_name/metadata.json b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_missing_name/metadata.json
index f969f6959..2e85d5000 100644
--- a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_missing_name/metadata.json
+++ b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_missing_name/metadata.json
@@ -2,10 +2,8 @@
   "@context": {
     "@language": "en",
     "@vocab": "https://schema.org/",
-    "citeAs": "cr:citeAs",
     "column": "ml:column",
     "conformsTo": "dct:conformsTo",
-    "cr": "http://mlcommons.org/croissant/",
     "data": {
       "@id": "ml:data",
       "@type": "@json"
@@ -20,15 +18,11 @@
     "extract": "ml:extract",
     "field": "ml:field",
     "fileProperty": "ml:fileProperty",
-    "fileObject": "cr:fileObject",
-    "fileSet": "cr:fileSet",
     "format": "ml:format",
     "includes": "ml:includes",
     "isEnumeration": "ml:isEnumeration",
-    "isLiveDataset": "cr:isLiveDataset",
     "jsonPath": "ml:jsonPath",
-    "key": "cr:key",
-    "md5": "cr:md5",
+    "ml": "http://mlcommons.org/schema/",
     "parentField": "ml:parentField",
     "path": "ml:path",
     "personalSensitiveInformation": "ml:personalSensitiveInformation",
@@ -41,8 +35,7 @@
     "separator": "ml:separator",
     "source": "ml:source",
     "subField": "ml:subField",
-    "transform": "ml:transform",
-    "ml": "http://mlcommons.org/schema/"
+    "transform": "ml:transform"
   },
   "@type": "sc:Dataset",
   "name": "mydataset",
diff --git a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_missing_property_content_url/metadata.json b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_missing_property_content_url/metadata.json
index 7b8a38dca..05c395e90 100644
--- a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_missing_property_content_url/metadata.json
+++ b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/distribution_missing_property_content_url/metadata.json
@@ -2,10 +2,8 @@
   "@context": {
     "@language": "en",
     "@vocab": "https://schema.org/",
-    "citeAs": "cr:citeAs",
     "column": "ml:column",
     "conformsTo": "dct:conformsTo",
-    "cr": "http://mlcommons.org/croissant/",
     "data": {
       "@id": "ml:data",
       "@type": "@json"
@@ -20,15 +18,11 @@
     "extract": "ml:extract",
     "field": "ml:field",
     "fileProperty": "ml:fileProperty",
-    "fileObject": "cr:fileObject",
-    "fileSet": "cr:fileSet",
     "format": "ml:format",
     "includes": "ml:includes",
     "isEnumeration": "ml:isEnumeration",
-    "isLiveDataset": "cr:isLiveDataset",
     "jsonPath": "ml:jsonPath",
-    "key": "cr:key",
-    "md5": "cr:md5",
+    "ml": "http://mlcommons.org/schema/",
     "parentField": "ml:parentField",
     "path": "ml:path",
     "personalSensitiveInformation": "ml:personalSensitiveInformation",
@@ -41,8 +35,7 @@
     "separator": "ml:separator",
     "source": "ml:source",
     "subField": "ml:subField",
-    "transform": "ml:transform",
-    "ml": "http://mlcommons.org/schema/"
+    "transform": "ml:transform"
   },
   "@type": "sc:Dataset",
   "name": "mydataset",
diff --git a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/metadata_bad_type/metadata.json b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/metadata_bad_type/metadata.json
index 6ac1fc674..fc225b1a2 100644
--- a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/metadata_bad_type/metadata.json
+++ b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/metadata_bad_type/metadata.json
@@ -2,10 +2,8 @@
   "@context": {
     "@language": "en",
     "@vocab": "https://schema.org/",
-    "citeAs": "cr:citeAs",
     "column": "ml:column",
     "conformsTo": "dct:conformsTo",
-    "cr": "http://mlcommons.org/croissant/",
     "data": {
       "@id": "ml:data",
       "@type": "@json"
@@ -20,15 +18,11 @@
     "extract": "ml:extract",
     "field": "ml:field",
     "fileProperty": "ml:fileProperty",
-    "fileObject": "cr:fileObject",
-    "fileSet": "cr:fileSet",
     "format": "ml:format",
     "includes": "ml:includes",
     "isEnumeration": "ml:isEnumeration",
-    "isLiveDataset": "cr:isLiveDataset",
     "jsonPath": "ml:jsonPath",
-    "key": "cr:key",
-    "md5": "cr:md5",
+    "ml": "http://mlcommons.org/schema/",
     "parentField": "ml:parentField",
     "path": "ml:path",
     "personalSensitiveInformation": "ml:personalSensitiveInformation",
@@ -41,8 +35,7 @@
     "separator": "ml:separator",
     "source": "ml:source",
     "subField": "ml:subField",
-    "transform": "ml:transform",
-    "ml": "http://mlcommons.org/schema/"
+    "transform": "ml:transform"
   },
   "@type": "sc:WRONG_TYPE",
   "name": "mydataset",
diff --git a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/metadata_missing_property_name/metadata.json b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/metadata_missing_property_name/metadata.json
index 803aa6d42..f6e313627 100644
--- a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/metadata_missing_property_name/metadata.json
+++ b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/metadata_missing_property_name/metadata.json
@@ -2,10 +2,8 @@
   "@context": {
     "@language": "en",
     "@vocab": "https://schema.org/",
-    "citeAs": "cr:citeAs",
     "column": "ml:column",
     "conformsTo": "dct:conformsTo",
-    "cr": "http://mlcommons.org/croissant/",
     "data": {
       "@id": "ml:data",
       "@type": "@json"
@@ -20,15 +18,11 @@
     "extract": "ml:extract",
     "field": "ml:field",
     "fileProperty": "ml:fileProperty",
-    "fileObject": "cr:fileObject",
-    "fileSet": "cr:fileSet",
     "format": "ml:format",
     "includes": "ml:includes",
     "isEnumeration": "ml:isEnumeration",
-    "isLiveDataset": "cr:isLiveDataset",
     "jsonPath": "ml:jsonPath",
-    "key": "cr:key",
-    "md5": "cr:md5",
+    "ml": "http://mlcommons.org/schema/",
     "parentField": "ml:parentField",
     "path": "ml:path",
     "personalSensitiveInformation": "ml:personalSensitiveInformation",
@@ -41,8 +35,7 @@
     "separator": "ml:separator",
     "source": "ml:source",
     "subField": "ml:subField",
-    "transform": "ml:transform",
-    "ml": "http://mlcommons.org/schema/"
+    "transform": "ml:transform"
   },
   "@type": "sc:Dataset",
   "description": "This is a description.",
diff --git a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_bad_source/metadata.json b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_bad_source/metadata.json
index 901a06504..281b26082 100644
--- a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_bad_source/metadata.json
+++ b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_bad_source/metadata.json
@@ -2,10 +2,8 @@
   "@context": {
     "@language": "en",
     "@vocab": "https://schema.org/",
-    "citeAs": "cr:citeAs",
     "column": "ml:column",
     "conformsTo": "dct:conformsTo",
-    "cr": "http://mlcommons.org/croissant/",
     "data": {
       "@id": "ml:data",
       "@type": "@json"
@@ -20,15 +18,11 @@
     "extract": "ml:extract",
     "field": "ml:field",
     "fileProperty": "ml:fileProperty",
-    "fileObject": "cr:fileObject",
-    "fileSet": "cr:fileSet",
     "format": "ml:format",
     "includes": "ml:includes",
     "isEnumeration": "ml:isEnumeration",
-    "isLiveDataset": "cr:isLiveDataset",
     "jsonPath": "ml:jsonPath",
-    "key": "cr:key",
-    "md5": "cr:md5",
+    "ml": "http://mlcommons.org/schema/",
     "parentField": "ml:parentField",
     "path": "ml:path",
     "personalSensitiveInformation": "ml:personalSensitiveInformation",
@@ -41,8 +35,7 @@
     "separator": "ml:separator",
     "source": "ml:source",
     "subField": "ml:subField",
-    "transform": "ml:transform",
-    "ml": "http://mlcommons.org/schema/"
+    "transform": "ml:transform"
   },
   "@type": "sc:Dataset",
   "name": "mydataset",
diff --git a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_bad_type/metadata.json b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_bad_type/metadata.json
index 5b0abb3b4..6e3446e8e 100644
--- a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_bad_type/metadata.json
+++ b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_bad_type/metadata.json
@@ -2,10 +2,8 @@
   "@context": {
     "@language": "en",
     "@vocab": "https://schema.org/",
-    "citeAs": "cr:citeAs",
     "column": "ml:column",
     "conformsTo": "dct:conformsTo",
-    "cr": "http://mlcommons.org/croissant/",
     "data": {
       "@id": "ml:data",
       "@type": "@json"
@@ -20,15 +18,11 @@
     "extract": "ml:extract",
     "field": "ml:field",
     "fileProperty": "ml:fileProperty",
-    "fileObject": "cr:fileObject",
-    "fileSet": "cr:fileSet",
     "format": "ml:format",
     "includes": "ml:includes",
     "isEnumeration": "ml:isEnumeration",
-    "isLiveDataset": "cr:isLiveDataset",
     "jsonPath": "ml:jsonPath",
-    "key": "cr:key",
-    "md5": "cr:md5",
+    "ml": "http://mlcommons.org/schema/",
     "parentField": "ml:parentField",
     "path": "ml:path",
     "personalSensitiveInformation": "ml:personalSensitiveInformation",
@@ -41,8 +35,7 @@
     "separator": "ml:separator",
     "source": "ml:source",
     "subField": "ml:subField",
-    "transform": "ml:transform",
-    "ml": "http://mlcommons.org/schema/"
+    "transform": "ml:transform"
   },
   "@type": "sc:Dataset",
   "name": "mydataset",
diff --git a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_missing_property_name/metadata.json b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_missing_property_name/metadata.json
index 2fe884eb3..e2278e7e5 100644
--- a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_missing_property_name/metadata.json
+++ b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_missing_property_name/metadata.json
@@ -2,10 +2,8 @@
   "@context": {
     "@language": "en",
     "@vocab": "https://schema.org/",
-    "citeAs": "cr:citeAs",
     "column": "ml:column",
     "conformsTo": "dct:conformsTo",
-    "cr": "http://mlcommons.org/croissant/",
     "data": {
       "@id": "ml:data",
       "@type": "@json"
@@ -20,15 +18,11 @@
     "extract": "ml:extract",
     "field": "ml:field",
     "fileProperty": "ml:fileProperty",
-    "fileObject": "cr:fileObject",
-    "fileSet": "cr:fileSet",
     "format": "ml:format",
     "includes": "ml:includes",
     "isEnumeration": "ml:isEnumeration",
-    "isLiveDataset": "cr:isLiveDataset",
     "jsonPath": "ml:jsonPath",
-    "key": "cr:key",
-    "md5": "cr:md5",
+    "ml": "http://mlcommons.org/schema/",
     "parentField": "ml:parentField",
     "path": "ml:path",
     "personalSensitiveInformation": "ml:personalSensitiveInformation",
@@ -41,8 +35,7 @@
     "separator": "ml:separator",
     "source": "ml:source",
     "subField": "ml:subField",
-    "transform": "ml:transform",
-    "ml": "http://mlcommons.org/schema/"
+    "transform": "ml:transform"
   },
   "@type": "sc:Dataset",
   "name": "mydataset",
diff --git a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_missing_source/metadata.json b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_missing_source/metadata.json
index bdf821269..d7147662f 100644
--- a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_missing_source/metadata.json
+++ b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/mlfield_missing_source/metadata.json
@@ -2,10 +2,8 @@
   "@context": {
     "@language": "en",
     "@vocab": "https://schema.org/",
-    "citeAs": "cr:citeAs",
     "column": "ml:column",
     "conformsTo": "dct:conformsTo",
-    "cr": "http://mlcommons.org/croissant/",
     "data": {
       "@id": "ml:data",
       "@type": "@json"
@@ -20,15 +18,11 @@
     "extract": "ml:extract",
     "field": "ml:field",
     "fileProperty": "ml:fileProperty",
-    "fileObject": "cr:fileObject",
-    "fileSet": "cr:fileSet",
     "format": "ml:format",
     "includes": "ml:includes",
     "isEnumeration": "ml:isEnumeration",
-    "isLiveDataset": "cr:isLiveDataset",
     "jsonPath": "ml:jsonPath",
-    "key": "cr:key",
-    "md5": "cr:md5",
+    "ml": "http://mlcommons.org/schema/",
     "parentField": "ml:parentField",
     "path": "ml:path",
     "personalSensitiveInformation": "ml:personalSensitiveInformation",
@@ -40,8 +34,7 @@
     "sc": "https://schema.org/",
     "separator": "ml:separator",
     "subField": "ml:subField",
-    "transform": "ml:transform",
-    "ml": "http://mlcommons.org/schema/"
+    "transform": "ml:transform"
   },
   "@type": "sc:Dataset",
   "name": "mydataset",
diff --git a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/recordset_missing_context_for_datatype/metadata.json b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/recordset_missing_context_for_datatype/metadata.json
index 50981630c..6d04dacf2 100644
--- a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/recordset_missing_context_for_datatype/metadata.json
+++ b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/recordset_missing_context_for_datatype/metadata.json
@@ -2,10 +2,8 @@
   "@context": {
     "@language": "en",
     "@vocab": "https://schema.org/",
-    "citeAs": "cr:citeAs",
     "column": "ml:column",
     "conformsTo": "dct:conformsTo",
-    "cr": "http://mlcommons.org/croissant/",
     "data": {
       "@id": "ml:data",
       "@type": "@json"
@@ -16,15 +14,11 @@
     "extract": "ml:extract",
     "field": "ml:field",
     "fileProperty": "ml:fileProperty",
-    "fileObject": "cr:fileObject",
-    "fileSet": "cr:fileSet",
     "format": "ml:format",
     "includes": "ml:includes",
     "isEnumeration": "ml:isEnumeration",
-    "isLiveDataset": "cr:isLiveDataset",
     "jsonPath": "ml:jsonPath",
-    "key": "cr:key",
-    "md5": "cr:md5",
+    "ml": "http://mlcommons.org/schema/",
     "parentField": "ml:parentField",
     "path": "ml:path",
     "personalSensitiveInformation": "ml:personalSensitiveInformation",
@@ -37,8 +31,7 @@
     "separator": "ml:separator",
     "source": "ml:source",
     "subField": "ml:subField",
-    "transform": "ml:transform",
-    "ml": "http://mlcommons.org/schema/"
+    "transform": "ml:transform"
   },
   "@type": "sc:Dataset",
   "name": "mydataset",
diff --git a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/recordset_missing_property_name/metadata.json b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/recordset_missing_property_name/metadata.json
index 495e02a13..caef30d72 100644
--- a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/recordset_missing_property_name/metadata.json
+++ b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/recordset_missing_property_name/metadata.json
@@ -2,10 +2,8 @@
   "@context": {
     "@language": "en",
     "@vocab": "https://schema.org/",
-    "citeAs": "cr:citeAs",
     "column": "ml:column",
     "conformsTo": "dct:conformsTo",
-    "cr": "http://mlcommons.org/croissant/",
     "data": {
       "@id": "ml:data",
       "@type": "@json"
@@ -20,15 +18,11 @@
     "extract": "ml:extract",
     "field": "ml:field",
     "fileProperty": "ml:fileProperty",
-    "fileObject": "cr:fileObject",
-    "fileSet": "cr:fileSet",
     "format": "ml:format",
     "includes": "ml:includes",
     "isEnumeration": "ml:isEnumeration",
-    "isLiveDataset": "cr:isLiveDataset",
     "jsonPath": "ml:jsonPath",
-    "key": "cr:key",
-    "md5": "cr:md5",
+    "ml": "http://mlcommons.org/schema/",
     "parentField": "ml:parentField",
     "path": "ml:path",
     "personalSensitiveInformation": "ml:personalSensitiveInformation",
@@ -41,8 +35,7 @@
     "separator": "ml:separator",
     "source": "ml:source",
     "subField": "ml:subField",
-    "transform": "ml:transform",
-    "ml": "http://mlcommons.org/schema/"
+    "transform": "ml:transform"
   },
   "@type": "sc:Dataset",
   "name": "mydataset",
diff --git a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/recordset_wrong_join/metadata.json b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/recordset_wrong_join/metadata.json
index 07f48f2c4..ca3bf6639 100644
--- a/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/recordset_wrong_join/metadata.json
+++ b/python/mlcroissant/mlcroissant/_src/tests/graphs/0.8/recordset_wrong_join/metadata.json
@@ -2,10 +2,8 @@
   "@context": {
     "@language": "en",
     "@vocab": "https://schema.org/",
-    "citeAs": "cr:citeAs",
     "column": "ml:column",
     "conformsTo": "dct:conformsTo",
-    "cr": "http://mlcommons.org/croissant/",
     "data": {
       "@id": "ml:data",
       "@type": "@json"
@@ -20,15 +18,11 @@
     "extract": "ml:extract",
     "field": "ml:field",
     "fileProperty": "ml:fileProperty",
-    "fileObject": "cr:fileObject",
-    "fileSet": "cr:fileSet",
     "format": "ml:format",
     "includes": "ml:includes",
     "isEnumeration": "ml:isEnumeration",
-    "isLiveDataset": "cr:isLiveDataset",
     "jsonPath": "ml:jsonPath",
-    "key": "cr:key",
-    "md5": "cr:md5",
+    "ml": "http://mlcommons.org/schema/",
     "parentField": "ml:parentField",
     "path": "ml:path",
     "personalSensitiveInformation": "ml:personalSensitiveInformation",
@@ -41,8 +35,7 @@
     "separator": "ml:separator",
     "source": "ml:source",
     "subField": "ml:subField",
-    "transform": "ml:transform",
-    "ml": "http://mlcommons.org/schema/"
+    "transform": "ml:transform"
   },
   "@type": "sc:Dataset",
   "name": "mydataset",