Skip to content

Commit

Permalink
Filter by org badge (#3155)
Browse files Browse the repository at this point in the history
Fixes [#1431](datagouv/data.gouv.fr#1431)

Filter by org badge in
- [x] datasets api GET
- [x] datasets api search
- [x] reuses api GET
- [x] reuses api search
- [x] dataservices api GET
- [x] ~~dataservices api search~~ (no search in apiv2 for dataservices,
yet?)
- [x] organizations api GET
- [x] organizations search

---------

Co-authored-by: Thibaud Dauce <[email protected]>
  • Loading branch information
magopian and ThibaudDauce authored Nov 4, 2024
1 parent 94bf618 commit 1f966a6
Show file tree
Hide file tree
Showing 25 changed files with 420 additions and 138 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- Fix flaky "duplicated email" importing fixtures tests [#3176](https://github.com/opendatateam/udata/pull/3176)
- Fix deprecated CircleCI config [#3181](https://github.com/opendatateam/udata/pull/3181)
- Use proper RESTful Hydra API endpoints [#3178](https://github.com/opendatateam/udata/pull/3178)
- Add a "filter by organization badge" for datasets, dataservices, reuses and organizations [#3155](https://github.com/opendatateam/udata/pull/3155]

## 9.2.4 (2024-10-22)

Expand Down
180 changes: 143 additions & 37 deletions udata/api_fields.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import functools
from typing import Any, Dict

import flask_restx.fields as restx_fields
import mongoengine
import mongoengine.fields as mongo_fields
Expand Down Expand Up @@ -114,13 +117,15 @@ def constructor_read(**kwargs):
# But we want to keep the `constructor_write` to allow changing the list.
def constructor_write(**kwargs):
return restx_fields.List(field_write, **kwargs)

elif isinstance(
field, (mongo_fields.GenericReferenceField, mongoengine.fields.GenericLazyReferenceField)
):

def constructor(**kwargs):
return restx_fields.Nested(lazy_reference, **kwargs)
elif isinstance(field, (mongo_fields.ReferenceField, mongo_fields.LazyReferenceField)):

elif isinstance(field, mongo_fields.ReferenceField | mongo_fields.LazyReferenceField):
# For reference we accept while writing a String representing the ID of the referenced model.
# For reading, if the user supplied a `nested_fields` (RestX model), we use it to convert
# the referenced model, if not we return a String (and RestX will call the `str()` of the model
Expand All @@ -142,13 +147,15 @@ def constructor_read(**kwargs):

def constructor(**kwargs):
return restx_fields.Nested(nested_fields, **kwargs)

elif hasattr(field.document_type_obj, "__read_fields__"):

def constructor_read(**kwargs):
return restx_fields.Nested(field.document_type_obj.__read_fields__, **kwargs)

def constructor_write(**kwargs):
return restx_fields.Nested(field.document_type_obj.__write_fields__, **kwargs)

else:
raise ValueError(
f"EmbeddedDocumentField `{key}` requires a `nested_fields` param to serialize/deserialize or a `@generate_fields()` definition."
Expand Down Expand Up @@ -200,47 +207,61 @@ def wrapper(cls):
read_fields = {}
write_fields = {}
ref_fields = {}
sortables = kwargs.get("additionalSorts", [])
sortables = kwargs.get("additional_sorts", [])

filterables = []
additional_filters = get_fields_with_additional_filters(
kwargs.get("additional_filters", {})
)

read_fields["id"] = restx_fields.String(required=True, readonly=True)

for key, field, info in get_fields(cls):
sortable_key = info.get("sortable", False)
if sortable_key:
sortables.append(
{"key": sortable_key if isinstance(sortable_key, str) else key, "value": key}
{
"key": sortable_key if isinstance(sortable_key, str) else key,
"value": key,
}
)

filterable = info.get("filterable", None)

if filterable is not None:
if "key" not in filterable:
filterable["key"] = key
if "column" not in filterable:
filterable["column"] = key

if "constraints" not in filterable:
filterable["constraints"] = []
if isinstance(
field, (mongo_fields.ReferenceField, mongo_fields.LazyReferenceField)
) or (
isinstance(field, mongo_fields.ListField)
and isinstance(
field.field,
(mongo_fields.ReferenceField, mongo_fields.LazyReferenceField),
)
):
filterable["constraints"].append("objectid")
filterables.append(compute_filter(key, field, info, filterable))

additional_filter = additional_filters.get(key, None)
if additional_filter:
if not isinstance(
field, mongo_fields.ReferenceField | mongo_fields.LazyReferenceField
):
raise Exception("Cannot use additional_filters on not a ref.")

ref_model = field.document_type

for child in additional_filter.get("children", []):
inner_field = getattr(ref_model, child["key"])

if "type" not in filterable:
filterable["type"] = str
if isinstance(field, mongo_fields.BooleanField):
filterable["type"] = boolean
column = f"{key}__{child['key']}"
child["key"] = f"{key}_{child['key']}"
filterable = compute_filter(column, inner_field, info, child)

# We may add more information later here:
# - type of mongo query to execute (right now only simple =)
# Since MongoDB is not capable of doing joins with a column like `organization__slug` we need to
# do a custom filter by splitting the query in two.

filterables.append(filterable)
def query(filterable, query, value):
# We use the computed `filterable["column"]` here because the `compute_filter` function
# could have added a default filter at the end (for example `organization__badges` converted
# in `organization__badges__kind`)
parts = filterable["column"].split("__", 1)
models = ref_model.objects.filter(**{parts[1]: value}).only("id")
return query.filter(**{f"{parts[0]}__in": models})

# do a query-based filter instead of a column based one
filterable["query"] = functools.partial(query, filterable)

filterables.append(filterable)

read, write = convert_db_to_field(key, field, info)

Expand Down Expand Up @@ -330,9 +351,10 @@ def make_lambda(method):

for filterable in filterables:
parser.add_argument(
filterable["key"],
filterable.get("label", filterable["key"]),
type=filterable["type"],
location="args",
choices=filterable.get("choices", None),
)

cls.__index_parser__ = parser
Expand Down Expand Up @@ -360,25 +382,31 @@ def apply_sort_filters_and_pagination(base_query):
base_query = base_query.search_text(phrase_query)

for filterable in filterables:
if args.get(filterable["key"]) is not None:
for constraint in filterable["constraints"]:
filter = args.get(filterable.get("label", filterable["key"]))
if filter is not None:
for constraint in filterable.get("constraints", []):
if constraint == "objectid" and not ObjectId.is_valid(
args[filterable["key"]]
):
api.abort(400, f'`{filterable["key"]}` must be an identifier')

base_query = base_query.filter(
**{
filterable["column"]: args[filterable["key"]],
}
)
query = filterable.get("query", None)
if query:
base_query = filterable["query"](base_query, filter)
else:
base_query = base_query.filter(
**{
filterable["column"]: filter,
}
)

if paginable:
base_query = base_query.paginate(args["page"], args["page_size"])

return base_query

cls.apply_sort_filters_and_pagination = apply_sort_filters_and_pagination
cls.__additional_class_info__ = kwargs
return cls

return wrapper
Expand Down Expand Up @@ -417,12 +445,12 @@ def patch(obj, request):
value = model_attribute.from_input(value)
elif isinstance(model_attribute, mongoengine.fields.ListField) and isinstance(
model_attribute.field,
(mongo_fields.ReferenceField, mongo_fields.LazyReferenceField),
mongo_fields.ReferenceField | mongo_fields.LazyReferenceField,
):
# TODO `wrap_primary_key` do Mongo request, do a first pass to fetch all documents before calling it (to avoid multiple queries).
value = [wrap_primary_key(key, model_attribute.field, id) for id in value]
elif isinstance(
model_attribute, (mongo_fields.ReferenceField, mongo_fields.LazyReferenceField)
model_attribute, mongo_fields.ReferenceField | mongo_fields.LazyReferenceField
):
value = wrap_primary_key(key, model_attribute, value)
elif isinstance(
Expand Down Expand Up @@ -517,3 +545,81 @@ def wrap_primary_key(
raise ValueError(
f"Unknown ID field type {id_field.__class__} for {document_type} (ID field name is {id_field_name}, value was {value})"
)


def get_fields_with_additional_filters(additional_filters: Dict[str, str]) -> Dict[str, Any]:
"""
Right now we only support additional filters like "organization.badges".
The goal of this function is to key the additional filters by the first part (`organization`) to
be able to compute them when we loop over all the fields (`title`, `organization`…)
The `additional_filters` property is a dict: {"label": "key"}, for example {"organization_badge": "organization.badges"}.
The `label` will be the name of the parser arg, like `?organization_badge=public-service`, which makes more
sense than `?organization_badges=public-service`.
"""
results: dict = {}
for label, key in additional_filters.items():
parts = key.split(".")
if len(parts) == 2:
parent = parts[0]
child = parts[1]

if parent not in results:
results[parent] = {"children": []}

results[parent]["children"].append(
{
"label": label,
"key": child,
"type": str,
}
)
else:
raise Exception(f"Do not support `additional_filters` without two parts: {key}.")

return results


def compute_filter(column: str, field, info, filterable):
# "key" is the param key in the URL
if "key" not in filterable:
filterable["key"] = column

# If we do a filter on a embed document, get the class info
# of this document to see if there is a default filter value
embed_info = None
if isinstance(field, mongo_fields.EmbeddedDocumentField):
embed_info = field.get("__additional_class_info__", None)
elif isinstance(field, mongo_fields.EmbeddedDocumentListField):
embed_info = getattr(field.field.document_type, "__additional_class_info__", None)

if embed_info and embed_info.get("default_filterable_field", None):
# There is a default filterable field so append it to the column and replace the
# field to use the inner one (for example using the `kind` `StringField` instead of
# the embed `Badge` field.)
filterable["column"] = f"{column}__{embed_info['default_filterable_field']}"
field = getattr(field.field.document_type, embed_info["default_filterable_field"])
else:
filterable["column"] = column

if "constraints" not in filterable:
filterable["constraints"] = []

if isinstance(field, mongo_fields.ReferenceField | mongo_fields.LazyReferenceField) or (
isinstance(field, mongo_fields.ListField)
and isinstance(field.field, mongo_fields.ReferenceField | mongo_fields.LazyReferenceField)
):
filterable["constraints"].append("objectid")

if "type" not in filterable:
if isinstance(field, mongo_fields.BooleanField):
filterable["type"] = boolean
else:
filterable["type"] = str

filterable["choices"] = info.get("choices", None)
if hasattr(field, "choices") and field.choices:
filterable["choices"] = field.choices

return filterable
8 changes: 3 additions & 5 deletions udata/core/badges/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@

from udata.factories import ModelFactory

from .models import Badge


def badge_factory(model):
def badge_factory(model_):
class BadgeFactory(ModelFactory):
class Meta:
model = Badge
model = model_._fields["badges"].field.document_type

kind = FuzzyChoice(model.__badges__.keys())
kind = FuzzyChoice(model_.__badges__)

return BadgeFactory
3 changes: 0 additions & 3 deletions udata/core/badges/forms.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from udata.forms import ModelForm, fields, validators
from udata.i18n import lazy_gettext as _
from udata.models import Badge

__all__ = ("badge_form",)

Expand All @@ -9,8 +8,6 @@ def badge_form(model):
"""A form factory for a given model badges"""

class BadgeForm(ModelForm):
model_class = Badge

kind = fields.RadioField(
_("Kind"),
[validators.DataRequired()],
Expand Down
Loading

0 comments on commit 1f966a6

Please sign in to comment.