-
Notifications
You must be signed in to change notification settings - Fork 46
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'origin/main' into benjelloun-patch-2
- Loading branch information
Showing
35 changed files
with
1,022 additions
and
662 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -132,6 +132,6 @@ The Task Force is open to anyone (as is the parent [Datasets working group](http | |
The Task Force is co-chaired by [Omar Benjelloun](mailto:[email protected]) and [Elena Simperl](mailto:[email protected]). | ||
|
||
## Contributors | ||
Albert Villanova (Hugging Face), Andrew Zaldivar (Google), Baishan Guo (Meta), Carole Jean-Wu (Meta), Ce Zhang (ETH Zurich), Costanza Conforti (Google), D. Sculley (Kaggle), Dan Brickley (Schema.Org), Eduardo Arino de la Rubia (Meta), Edward Lockhart (Deepmind), Elena Simperl (King's College London), Goeff Thomas (Kaggle), Joaquin Vanschoren (TU/Eindhoven, OpenML), Jos van der Velde (TU/Eindhoven, OpenML), Julien Chaumond (Hugging Face), Kurt Bollacker (MLCommons), Lora Aroyo (Google), Luis Oala (Dotphoton), Meg Risdal (Kaggle), Natasha Noy (Google), Newsha Ardalani (Meta), Omar Benjelloun (Google), Peter Mattson (MLCommons), Pierre Marcenac (Google), Pierre Ruyssen (Google), Pieter Gijsbers (TU/Eindhoven, OpenML), Prabhant Singh (TU/Eindhoven, OpenML), Quentin Lhoest (Hugging Face), Steffen Vogler (Bayer), Taniya Das (TU/Eindhoven, OpenML) | ||
Albert Villanova (Hugging Face), Andrew Zaldivar (Google), Baishan Guo (Meta), Carole Jean-Wu (Meta), Ce Zhang (ETH Zurich), Costanza Conforti (Google), D. Sculley (Kaggle), Dan Brickley (Schema.Org), Eduardo Arino de la Rubia (Meta), Edward Lockhart (Deepmind), Elena Simperl (King's College London), Goeff Thomas (Kaggle), Joan Giner-Miguelez (UOC), Joaquin Vanschoren (TU/Eindhoven, OpenML), Jos van der Velde (TU/Eindhoven, OpenML), Julien Chaumond (Hugging Face), Kurt Bollacker (MLCommons), Lora Aroyo (Google), Luis Oala (Dotphoton), Meg Risdal (Kaggle), Natasha Noy (Google), Newsha Ardalani (Meta), Omar Benjelloun (Google), Peter Mattson (MLCommons), Pierre Marcenac (Google), Pierre Ruyssen (Google), Pieter Gijsbers (TU/Eindhoven, OpenML), Prabhant Singh (TU/Eindhoven, OpenML), Quentin Lhoest (Hugging Face), Steffen Vogler (Bayer), Taniya Das (TU/Eindhoven, OpenML) | ||
|
||
Thank you for supporting Croissant! 🙂 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
113 changes: 113 additions & 0 deletions
113
python/mlcroissant/mlcroissant/_src/core/dataclasses.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
"""Utils to overload Python built-in dataclasses.""" | ||
|
||
from __future__ import annotations | ||
|
||
import dataclasses | ||
from typing import Any, Callable, Literal | ||
|
||
from rdflib import term | ||
|
||
from mlcroissant._src.core.context import Context | ||
from mlcroissant._src.core.types import Json | ||
|
||
|
||
class JsonldField(dataclasses.Field): | ||
"""Overloads dataclasses.Field with JSON-LD-specific attributes.""" | ||
|
||
def __init__( | ||
self, | ||
*args, | ||
cardinality: Literal["ONE", "MANY"], | ||
description: str, | ||
from_jsonld: Callable[[Context, Json], Any] | None, | ||
input_types: list[Any], | ||
to_jsonld: Callable[[Context, Json], Any] | None, | ||
required: bool, | ||
url: term.URIRef | Callable[[Context], term.URIRef], | ||
): | ||
"""Sets all args and kwargs.""" | ||
super().__init__(*args) | ||
self.cardinality = cardinality | ||
self.description = description | ||
self.from_jsonld = from_jsonld | ||
self.input_types = input_types | ||
self.to_jsonld = to_jsonld | ||
self.required = required | ||
self.url = url | ||
|
||
def call_from_jsonld(self, ctx: Context, value: Any): | ||
"""Calls `from_jsonld` in the field.""" | ||
if value and self.from_jsonld: | ||
return self.from_jsonld(ctx, value) | ||
else: | ||
return value | ||
|
||
def call_to_jsonld(self, ctx: Context, value: Any): | ||
"""Calls `to_jsonld` in the field.""" | ||
if value and self.to_jsonld: | ||
return self.to_jsonld(ctx, value) | ||
else: | ||
return value | ||
|
||
def call_url(self, ctx: Context) -> term.URIRef: | ||
"""Calls `jsonld` in the field.""" | ||
url = self.url | ||
if isinstance(url, term.URIRef): | ||
return url | ||
else: | ||
return url(ctx) | ||
|
||
|
||
def jsonld_field( | ||
default=dataclasses.MISSING, | ||
default_factory=dataclasses.MISSING, | ||
init=True, | ||
repr=True, | ||
hash=None, | ||
compare=True, | ||
metadata=None, | ||
kw_only=dataclasses.MISSING, | ||
cardinality="ONE", | ||
description="", | ||
from_jsonld=None, | ||
input_types=None, | ||
to_jsonld=None, | ||
required=False, | ||
url=None, | ||
): | ||
"""Overloads dataclasses.field with specific attributes.""" | ||
if ( | ||
default is not dataclasses.MISSING | ||
and default_factory is not dataclasses.MISSING | ||
): | ||
raise ValueError("cannot specify both default and default_factory") | ||
if not input_types or not isinstance(input_types, list): | ||
raise ValueError(f"input type should be a non-empty list. Got: {input_types}") | ||
if not url: | ||
raise ValueError(f"Provide a url. Got: {url}") | ||
return JsonldField( | ||
default, | ||
default_factory, | ||
init, | ||
repr, | ||
hash, | ||
compare, | ||
metadata, | ||
kw_only, | ||
cardinality=cardinality, | ||
description=description, | ||
from_jsonld=from_jsonld, | ||
input_types=input_types, | ||
to_jsonld=to_jsonld, | ||
required=required, | ||
url=url, | ||
) | ||
|
||
|
||
def jsonld_fields(cls_or_instance) -> list[JsonldField]: | ||
"""Filters the JSON-LD fields.""" | ||
return [ | ||
field | ||
for field in dataclasses.fields(cls_or_instance) | ||
if isinstance(field, JsonldField) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.