From 7288786eb16615cfab78c839c6ad5364a768bb92 Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Fri, 17 Jan 2025 13:30:44 -0500 Subject: [PATCH] Use `Unaccent` with dandiset search filter --- dandiapi/api/views/dandiset.py | 109 ++++++++++++++++++++------------- 1 file changed, 68 insertions(+), 41 deletions(-) diff --git a/dandiapi/api/views/dandiset.py b/dandiapi/api/views/dandiset.py index 3df6a975d..f0d974624 100644 --- a/dandiapi/api/views/dandiset.py +++ b/dandiapi/api/views/dandiset.py @@ -5,9 +5,10 @@ from allauth.socialaccount.models import SocialAccount from django.contrib.auth.models import User +from django.contrib.postgres.lookups import Unaccent from django.db import transaction -from django.db.models import Count, Max, OuterRef, QuerySet, Subquery, Sum -from django.db.models.functions import Coalesce +from django.db.models import Count, Max, OuterRef, QuerySet, Subquery, Sum, TextField +from django.db.models.functions import Cast, Coalesce from django.db.models.query_utils import Q from django.http import Http404 from drf_yasg.utils import no_body, swagger_auto_schema @@ -17,6 +18,7 @@ from rest_framework.generics import get_object_or_404 from rest_framework.response import Response from rest_framework.serializers import ValidationError +from rest_framework.settings import api_settings as drf_settings from rest_framework.viewsets import ReadOnlyModelViewSet from dandiapi.api.asset_paths import get_root_paths_many @@ -56,11 +58,12 @@ if TYPE_CHECKING: from rest_framework.request import Request + from rest_framework.views import APIView from dandiapi.api.models.upload import Upload -class DandisetFilterBackend(filters.OrderingFilter): +class DandisetOrderingFilter(filters.OrderingFilter): ordering_fields = ['id', 'name', 'modified', 'size'] ordering_description = ( 'Which field to use when ordering the results. ' @@ -69,51 +72,75 @@ class DandisetFilterBackend(filters.OrderingFilter): def filter_queryset(self, request, queryset, view): orderings = self.get_ordering(request, queryset, view) - if orderings: - ordering = orderings[0] - # ordering can be either 'created' or '-created', so test for both - if ordering.endswith('id'): - return queryset.order_by(ordering) - if ordering.endswith('name'): - # name refers to the name of the most recent version, so a subquery is required - latest_version = Version.objects.filter(dandiset=OuterRef('pk')).order_by( - '-created' - )[:1] - queryset = queryset.annotate(name=Subquery(latest_version.values('metadata__name'))) - return queryset.order_by(ordering) - if ordering.endswith('modified'): - # modified refers to the modification timestamp of the most - # recent version, so a subquery is required - latest_version = Version.objects.filter(dandiset=OuterRef('pk')).order_by( - '-created' - )[:1] - # get the `modified` field of the most recent version. - # '_version' is appended because the Dandiset model already has a `modified` field - queryset = queryset.annotate( - modified_version=Subquery(latest_version.values('modified')) - ) - return queryset.order_by(f'{ordering}_version') - if ordering.endswith('size'): - latest_version = Version.objects.filter(dandiset=OuterRef('pk')).order_by( - '-created' - )[:1] - queryset = queryset.annotate( - size=Subquery( - latest_version.annotate( - size=Coalesce(Sum('assets__blob__size'), 0) - + Coalesce(Sum('assets__zarr__size'), 0) - ).values('size') - ) + if not orderings: + return queryset + ordering = orderings[0] + + # ordering can be either 'created' or '-created', so test for both + if ordering.endswith('id'): + return queryset.order_by(ordering) + + if ordering.endswith('name'): + # name refers to the name of the most recent version, so a subquery is required + latest_version = Version.objects.filter(dandiset=OuterRef('pk')).order_by('-created')[ + :1 + ] + queryset = queryset.annotate(name=Subquery(latest_version.values('metadata__name'))) + return queryset.order_by(ordering) + + if ordering.endswith('modified'): + # modified refers to the modification timestamp of the most + # recent version, so a subquery is required + latest_version = Version.objects.filter(dandiset=OuterRef('pk')).order_by('-created')[ + :1 + ] + # get the `modified` field of the most recent version. + # '_version' is appended because the Dandiset model already has a `modified` field + queryset = queryset.annotate( + modified_version=Subquery(latest_version.values('modified')) + ) + return queryset.order_by(f'{ordering}_version') + + if ordering.endswith('size'): + latest_version = Version.objects.filter(dandiset=OuterRef('pk')).order_by('-created')[ + :1 + ] + queryset = queryset.annotate( + size=Subquery( + latest_version.annotate( + size=Coalesce(Sum('assets__blob__size'), 0) + + Coalesce(Sum('assets__zarr__size'), 0) + ).values('size') ) - return queryset.order_by(ordering) + ) + return queryset.order_by(ordering) + return queryset +class DandisetSearchFilter(filters.BaseFilterBackend): + search_param = drf_settings.SEARCH_PARAM + + def get_search_term(self, request): + param = request.query_params.get(self.search_param, '') + param = param.replace('\x00', '') # strip null characters + + return param # noqa: RET504 + + def filter_queryset(self, request: Request, queryset: QuerySet, view: APIView) -> QuerySet: + search_term = self.get_search_term(request=request) + if not search_term: + return queryset + + return queryset.alias( + search_field=Unaccent(Cast('versions__metadata', TextField())) + ).filter(search_field__icontains=search_term) + + class DandisetViewSet(ReadOnlyModelViewSet): serializer_class = DandisetDetailSerializer pagination_class = DandiPagination - filter_backends = [filters.SearchFilter, DandisetFilterBackend] - search_fields = ['versions__metadata'] + filter_backends = [DandisetSearchFilter, DandisetOrderingFilter] lookup_value_regex = Dandiset.IDENTIFIER_REGEX # This is to maintain consistency with the auto-generated names shown in swagger.