From 2e996eada4a2eec38f547dbc828b7bbf20d0e573 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Thu, 16 Jan 2025 14:40:15 +0530 Subject: [PATCH 1/7] all comments removed --- backend/dataset/views.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/backend/dataset/views.py b/backend/dataset/views.py index 7e6b4227c..40ce6541e 100644 --- a/backend/dataset/views.py +++ b/backend/dataset/views.py @@ -257,7 +257,6 @@ def list(self, request, *args, **kwargs): queryset = DatasetInstance.objects.filter( organisation_id=request.user.organization ).filter(Q(public_to_managers=True) | Q(users__id=request.user.id)) - if "dataset_visibility" in request.query_params: dataset_visibility = request.query_params["dataset_visibility"] if dataset_visibility == "all_public_datasets": @@ -267,18 +266,15 @@ def list(self, request, *args, **kwargs): queryset = queryset.filter(public_to_managers=True) elif dataset_visibility == "my_datasets": queryset = queryset.filter(users__id=request.user.id) - # Filter the queryset based on the query params if "dataset_type" in dict(request.query_params): queryset = queryset.filter( dataset_type__exact=request.query_params["dataset_type"] ) - # Serialize the distinct items and sort by instance ID serializer = DatasetInstanceSerializer( queryset.distinct().order_by("instance_id"), many=True ) - # Add status fields to the serializer data for dataset_instance in serializer.data: # Get the task statuses for the dataset instance @@ -288,13 +284,11 @@ def list(self, request, *args, **kwargs): dataset_instance_time, dataset_instance_result, ) = get_dataset_upload_status(dataset_instance["instance_id"]) - # Add the task status and time to the dataset instance response dataset_instance["last_upload_status"] = dataset_instance_status dataset_instance["last_upload_date"] = dataset_instance_date dataset_instance["last_upload_time"] = dataset_instance_time dataset_instance["last_upload_result"] = dataset_instance_result - return Response(serializer.data) @is_organization_owner From c4e50943664867fb9c540ede746c66435052bd25 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Thu, 16 Jan 2025 15:29:23 +0530 Subject: [PATCH 2/7] all comments removed --- backend/dataset/serializers.py | 32 +++++++ backend/dataset/views.py | 164 ++++++++++++++++++++++++--------- 2 files changed, 151 insertions(+), 45 deletions(-) diff --git a/backend/dataset/serializers.py b/backend/dataset/serializers.py index a6152f3c2..d262c2501 100644 --- a/backend/dataset/serializers.py +++ b/backend/dataset/serializers.py @@ -11,6 +11,38 @@ class DatasetInstanceSerializer(serializers.ModelSerializer): class Meta: model = DatasetInstance fields = "__all__" + + +class DatasetInstanceSerializerOptimized(serializers.ModelSerializer): + created_by = UserProfileSerializer(read_only=True) + + class Meta: + model = DatasetInstance + fields = [ + "id", + "instance_id", + "name", + "description", + "dataset_type", + "is_archived", + "public_to_managers", + "organisation_id", + "workspace_id", + "users", + "created_by", + "created_at", + "last_updated", + "file_format", + "file_path", + "size", + "record_count", + "status", + "last_upload_status", + "last_upload_date", + "last_upload_time", + "last_upload_result", + ] + class DatasetInstanceUploadSerializer(serializers.Serializer): diff --git a/backend/dataset/views.py b/backend/dataset/views.py index 40ce6541e..a8d282c03 100644 --- a/backend/dataset/views.py +++ b/backend/dataset/views.py @@ -244,52 +244,126 @@ def retrieve(self, request, pk, *args, **kwargs): ), ], ) - def list(self, request, *args, **kwargs): - # Org Owners and superusers see all datasets - if request.user.is_superuser: - queryset = DatasetInstance.objects.all() - elif request.user.role == User.ORGANIZATION_OWNER: - queryset = DatasetInstance.objects.filter( - organisation_id=request.user.organization - ) - # Managers only see datasets that they are added to and public datasets - else: - queryset = DatasetInstance.objects.filter( - organisation_id=request.user.organization - ).filter(Q(public_to_managers=True) | Q(users__id=request.user.id)) - if "dataset_visibility" in request.query_params: - dataset_visibility = request.query_params["dataset_visibility"] - if dataset_visibility == "all_public_datasets": - if (request.user.role == User.WORKSPACE_MANAGER) and ( - request.user.is_superuser == False - ): - queryset = queryset.filter(public_to_managers=True) - elif dataset_visibility == "my_datasets": - queryset = queryset.filter(users__id=request.user.id) - # Filter the queryset based on the query params - if "dataset_type" in dict(request.query_params): - queryset = queryset.filter( - dataset_type__exact=request.query_params["dataset_type"] + + + # def list(self, request, *args, **kwargs): + # # Org Owners and superusers see all datasets + # if request.user.is_superuser: + # queryset = DatasetInstance.objects.all() + # elif request.user.role == User.ORGANIZATION_OWNER: + # queryset = DatasetInstance.objects.filter( + # organisation_id=request.user.organization + # ) + # # Managers only see datasets that they are added to and public datasets + # else: + # queryset = DatasetInstance.objects.filter( + # organisation_id=request.user.organization + # ).filter(Q(public_to_managers=True) | Q(users__id=request.user.id)) + # if "dataset_visibility" in request.query_params: + # dataset_visibility = request.query_params["dataset_visibility"] + # if dataset_visibility == "all_public_datasets": + # if (request.user.role == User.WORKSPACE_MANAGER) and ( + # request.user.is_superuser == False + # ): + # queryset = queryset.filter(public_to_managers=True) + # elif dataset_visibility == "my_datasets": + # queryset = queryset.filter(users__id=request.user.id) + # # Filter the queryset based on the query params + # if "dataset_type" in dict(request.query_params): + # queryset = queryset.filter( + # dataset_type__exact=request.query_params["dataset_type"] + # ) + # # Serialize the distinct items and sort by instance ID + # serializer = DatasetInstanceSerializer( + # queryset.distinct().order_by("instance_id"), many=True + # ) + # # Add status fields to the serializer data + # for dataset_instance in serializer.data: + # # Get the task statuses for the dataset instance + # ( + # dataset_instance_status, + # dataset_instance_date, + # dataset_instance_time, + # dataset_instance_result, + # ) = get_dataset_upload_status(dataset_instance["instance_id"]) + # # Add the task status and time to the dataset instance response + # dataset_instance["last_upload_status"] = dataset_instance_status + # dataset_instance["last_upload_date"] = dataset_instance_date + # dataset_instance["last_upload_time"] = dataset_instance_time + # dataset_instance["last_upload_result"] = dataset_instance_result + # return Response(serializer.data) + + + # def get_queryset(self): + def list_optimized(self, request): + try: + # Base queryset determination based on user role + if request.user.is_superuser: + queryset = DatasetInstance.objects.all() + elif request.user.role == User.ORGANIZATION_OWNER: + queryset = DatasetInstance.objects.filter( + organisation_id=request.user.organization + ) + else: + queryset = DatasetInstance.objects.filter( + organisation_id=request.user.organization + ).filter(Q(public_to_managers=True) | Q(users__id=request.user.id)) + + # Apply optional filters based on query parameters + if "dataset_visibility" in request.query_params: + dataset_visibility = request.query_params["dataset_visibility"] + if dataset_visibility == "all_public_datasets": + if ( + request.user.role == User.WORKSPACE_MANAGER + and not request.user.is_superuser + ): + queryset = queryset.filter(public_to_managers=True) + elif dataset_visibility == "my_datasets": + queryset = queryset.filter(users__id=request.user.id) + + if "dataset_type" in request.query_params: + queryset = queryset.filter( + dataset_type__exact=request.query_params["dataset_type"] + ) + + if "archived_datasets" in request.query_params: + archived_datasets = request.query_params["archived_datasets"] == "true" + queryset = queryset.filter(is_archived=archived_datasets) + + # Add sorting by custom criteria + if ( + "sort_type" in request.query_params + and request.query_params["sort_type"] == "recently_updated" + ): + queryset = queryset.order_by(F("last_updated").desc(nulls_last=True)) + else: + queryset = queryset.order_by(F("instance_id").asc()) + + # Serialize the distinct items using the optimized serializer + serializer = DatasetInstanceSerializerOptimized(queryset.distinct(), many=True) + + # Add additional status fields to each dataset instance + for dataset_instance in serializer.data: + ( + dataset_instance_status, + dataset_instance_date, + dataset_instance_time, + dataset_instance_result, + ) = get_dataset_upload_status(dataset_instance["id"]) + + dataset_instance["last_upload_status"] = dataset_instance_status + dataset_instance["last_upload_date"] = dataset_instance_date + dataset_instance["last_upload_time"] = dataset_instance_time + dataset_instance["last_upload_result"] = dataset_instance_result + + return Response(serializer.data, status=status.HTTP_200_OK) + except Exception as e: + return Response( + {"message": "An error occurred: {}".format(str(e))}, + status=status.HTTP_400_BAD_REQUEST, ) - # Serialize the distinct items and sort by instance ID - serializer = DatasetInstanceSerializer( - queryset.distinct().order_by("instance_id"), many=True - ) - # Add status fields to the serializer data - for dataset_instance in serializer.data: - # Get the task statuses for the dataset instance - ( - dataset_instance_status, - dataset_instance_date, - dataset_instance_time, - dataset_instance_result, - ) = get_dataset_upload_status(dataset_instance["instance_id"]) - # Add the task status and time to the dataset instance response - dataset_instance["last_upload_status"] = dataset_instance_status - dataset_instance["last_upload_date"] = dataset_instance_date - dataset_instance["last_upload_time"] = dataset_instance_time - dataset_instance["last_upload_result"] = dataset_instance_result - return Response(serializer.data) + + @is_organization_owner @action(methods=["GET"], detail=True, name="Download Dataset in CSV format") From 689861ba50fc25ed49e409c3dbee333b45c0f3a6 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Thu, 16 Jan 2025 15:44:32 +0530 Subject: [PATCH 3/7] optimized code dataset --- backend/dataset/admin.py | 2 +- backend/dataset/serializers.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/backend/dataset/admin.py b/backend/dataset/admin.py index 140171494..0ecdb9a6d 100644 --- a/backend/dataset/admin.py +++ b/backend/dataset/admin.py @@ -1,4 +1,4 @@ -import resource +# import resource from django.contrib import admin from import_export.admin import ImportExportActionModelAdmin from .resources import * diff --git a/backend/dataset/serializers.py b/backend/dataset/serializers.py index d262c2501..5393a0fc9 100644 --- a/backend/dataset/serializers.py +++ b/backend/dataset/serializers.py @@ -14,8 +14,6 @@ class Meta: class DatasetInstanceSerializerOptimized(serializers.ModelSerializer): - created_by = UserProfileSerializer(read_only=True) - class Meta: model = DatasetInstance fields = [ From 872cef8d9e1eb4cde3cfb4c93991390dab3e9c5f Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Fri, 17 Jan 2025 10:43:08 +0530 Subject: [PATCH 4/7] removed 5 things from dataset serializer --- backend/dataset/serializers.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/backend/dataset/serializers.py b/backend/dataset/serializers.py index 5393a0fc9..727244e20 100644 --- a/backend/dataset/serializers.py +++ b/backend/dataset/serializers.py @@ -26,7 +26,6 @@ class Meta: "public_to_managers", "organisation_id", "workspace_id", - "users", "created_by", "created_at", "last_updated", @@ -34,11 +33,7 @@ class Meta: "file_path", "size", "record_count", - "status", - "last_upload_status", - "last_upload_date", - "last_upload_time", - "last_upload_result", + "status" ] From 00ca005f243ce99af78330aa381e4a09811d6dc6 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Sat, 18 Jan 2025 08:53:04 +0530 Subject: [PATCH 5/7] Un Comment view list --- backend/dataset/views.py | 108 +++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/backend/dataset/views.py b/backend/dataset/views.py index a8d282c03..9a96338f1 100644 --- a/backend/dataset/views.py +++ b/backend/dataset/views.py @@ -246,52 +246,52 @@ def retrieve(self, request, pk, *args, **kwargs): ) - # def list(self, request, *args, **kwargs): - # # Org Owners and superusers see all datasets - # if request.user.is_superuser: - # queryset = DatasetInstance.objects.all() - # elif request.user.role == User.ORGANIZATION_OWNER: - # queryset = DatasetInstance.objects.filter( - # organisation_id=request.user.organization - # ) - # # Managers only see datasets that they are added to and public datasets - # else: - # queryset = DatasetInstance.objects.filter( - # organisation_id=request.user.organization - # ).filter(Q(public_to_managers=True) | Q(users__id=request.user.id)) - # if "dataset_visibility" in request.query_params: - # dataset_visibility = request.query_params["dataset_visibility"] - # if dataset_visibility == "all_public_datasets": - # if (request.user.role == User.WORKSPACE_MANAGER) and ( - # request.user.is_superuser == False - # ): - # queryset = queryset.filter(public_to_managers=True) - # elif dataset_visibility == "my_datasets": - # queryset = queryset.filter(users__id=request.user.id) - # # Filter the queryset based on the query params - # if "dataset_type" in dict(request.query_params): - # queryset = queryset.filter( - # dataset_type__exact=request.query_params["dataset_type"] - # ) - # # Serialize the distinct items and sort by instance ID - # serializer = DatasetInstanceSerializer( - # queryset.distinct().order_by("instance_id"), many=True - # ) - # # Add status fields to the serializer data - # for dataset_instance in serializer.data: - # # Get the task statuses for the dataset instance - # ( - # dataset_instance_status, - # dataset_instance_date, - # dataset_instance_time, - # dataset_instance_result, - # ) = get_dataset_upload_status(dataset_instance["instance_id"]) - # # Add the task status and time to the dataset instance response - # dataset_instance["last_upload_status"] = dataset_instance_status - # dataset_instance["last_upload_date"] = dataset_instance_date - # dataset_instance["last_upload_time"] = dataset_instance_time - # dataset_instance["last_upload_result"] = dataset_instance_result - # return Response(serializer.data) + def list(self, request, *args, **kwargs): + # Org Owners and superusers see all datasets + if request.user.is_superuser: + queryset = DatasetInstance.objects.all() + elif request.user.role == User.ORGANIZATION_OWNER: + queryset = DatasetInstance.objects.filter( + organisation_id=request.user.organization + ) + # Managers only see datasets that they are added to and public datasets + else: + queryset = DatasetInstance.objects.filter( + organisation_id=request.user.organization + ).filter(Q(public_to_managers=True) | Q(users__id=request.user.id)) + if "dataset_visibility" in request.query_params: + dataset_visibility = request.query_params["dataset_visibility"] + if dataset_visibility == "all_public_datasets": + if (request.user.role == User.WORKSPACE_MANAGER) and ( + request.user.is_superuser == False + ): + queryset = queryset.filter(public_to_managers=True) + elif dataset_visibility == "my_datasets": + queryset = queryset.filter(users__id=request.user.id) + # Filter the queryset based on the query params + if "dataset_type" in dict(request.query_params): + queryset = queryset.filter( + dataset_type__exact=request.query_params["dataset_type"] + ) + # Serialize the distinct items and sort by instance ID + serializer = DatasetInstanceSerializer( + queryset.distinct().order_by("instance_id"), many=True + ) + # Add status fields to the serializer data + for dataset_instance in serializer.data: + # Get the task statuses for the dataset instance + ( + dataset_instance_status, + dataset_instance_date, + dataset_instance_time, + dataset_instance_result, + ) = get_dataset_upload_status(dataset_instance["instance_id"]) + # Add the task status and time to the dataset instance response + dataset_instance["last_upload_status"] = dataset_instance_status + dataset_instance["last_upload_date"] = dataset_instance_date + dataset_instance["last_upload_time"] = dataset_instance_time + dataset_instance["last_upload_result"] = dataset_instance_result + return Response(serializer.data) # def get_queryset(self): @@ -308,7 +308,7 @@ def list_optimized(self, request): queryset = DatasetInstance.objects.filter( organisation_id=request.user.organization ).filter(Q(public_to_managers=True) | Q(users__id=request.user.id)) - + # Apply optional filters based on query parameters if "dataset_visibility" in request.query_params: dataset_visibility = request.query_params["dataset_visibility"] @@ -320,16 +320,16 @@ def list_optimized(self, request): queryset = queryset.filter(public_to_managers=True) elif dataset_visibility == "my_datasets": queryset = queryset.filter(users__id=request.user.id) - + if "dataset_type" in request.query_params: queryset = queryset.filter( dataset_type__exact=request.query_params["dataset_type"] ) - + if "archived_datasets" in request.query_params: archived_datasets = request.query_params["archived_datasets"] == "true" queryset = queryset.filter(is_archived=archived_datasets) - + # Add sorting by custom criteria if ( "sort_type" in request.query_params @@ -338,10 +338,10 @@ def list_optimized(self, request): queryset = queryset.order_by(F("last_updated").desc(nulls_last=True)) else: queryset = queryset.order_by(F("instance_id").asc()) - + # Serialize the distinct items using the optimized serializer serializer = DatasetInstanceSerializerOptimized(queryset.distinct(), many=True) - + # Add additional status fields to each dataset instance for dataset_instance in serializer.data: ( @@ -350,12 +350,12 @@ def list_optimized(self, request): dataset_instance_time, dataset_instance_result, ) = get_dataset_upload_status(dataset_instance["id"]) - + dataset_instance["last_upload_status"] = dataset_instance_status dataset_instance["last_upload_date"] = dataset_instance_date dataset_instance["last_upload_time"] = dataset_instance_time dataset_instance["last_upload_result"] = dataset_instance_result - + return Response(serializer.data, status=status.HTTP_200_OK) except Exception as e: return Response( From 6cd95765b6d6039cc0da92fcd12095a61c6c5293 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Fri, 24 Jan 2025 10:59:25 +0530 Subject: [PATCH 6/7] data set optimized --- backend/dataset/serializers.py | 18 +-- backend/dataset/views.py | 187 ++++++++++++++++++---------- backend/shoonya_backend/settings.py | 1 + backend/utils/dataset_utils.py | 15 +++ 4 files changed, 141 insertions(+), 80 deletions(-) create mode 100644 backend/utils/dataset_utils.py diff --git a/backend/dataset/serializers.py b/backend/dataset/serializers.py index 727244e20..3cd04221d 100644 --- a/backend/dataset/serializers.py +++ b/backend/dataset/serializers.py @@ -17,23 +17,13 @@ class DatasetInstanceSerializerOptimized(serializers.ModelSerializer): class Meta: model = DatasetInstance fields = [ - "id", "instance_id", - "name", - "description", + "parent_instance_id", + "instance_name", + "instance_description", "dataset_type", - "is_archived", "public_to_managers", - "organisation_id", - "workspace_id", - "created_by", - "created_at", - "last_updated", - "file_format", - "file_path", - "size", - "record_count", - "status" + "organisation_id" ] diff --git a/backend/dataset/views.py b/backend/dataset/views.py index 9a96338f1..3591989e8 100644 --- a/backend/dataset/views.py +++ b/backend/dataset/views.py @@ -38,6 +38,11 @@ from . import resources from .models import * from .serializers import * +from django.db.models import Prefetch, Q, F +from utils.dataset_utils import get_batch_dataset_upload_status +from rest_framework.response import Response +from rest_framework.decorators import action +from rest_framework import status from .tasks import upload_data_to_data_instance, deduplicate_dataset_instance_items import dataset from tasks.models import ( @@ -186,6 +191,22 @@ def get_dataset_upload_status(dataset_instance_pk): # Create your views here. +# def get_batch_dataset_upload_status(instance_ids): +# """ +# Batch fetch upload status for a list of dataset instance IDs. +# Replace this with actual logic to retrieve status from your database. +# """ +# # Mock data for testing +# status_data = {} +# for instance_id in instance_ids: +# status_data[instance_id] = { +# "last_upload_status": "Completed", +# "last_upload_date": "2023-01-01", +# "last_upload_time": "12:00:00", +# "last_upload_result": "Success", +# } +# return status_data + class DatasetInstanceViewSet(viewsets.ModelViewSet): """ ViewSet for Dataset Instance @@ -295,74 +316,108 @@ def list(self, request, *args, **kwargs): # def get_queryset(self): + @action(detail=False, methods=["get"], url_path="optimized-list") + # def list_optimized(self, request): + # # Base queryset determination based on user role + # if request.user.is_superuser: + # queryset = DatasetInstance.objects.all() + # elif request.user.role == User.ORGANIZATION_OWNER: + # queryset = DatasetInstance.objects.filter( + # organisation_id=request.user.organization + # ) + # else: + # queryset = DatasetInstance.objects.filter( + # organisation_id=request.user.organization + # ).filter(Q(public_to_managers=True) | Q(users__id=request.user.id)) + # # Apply optional filters based on query parameters + # if "dataset_visibility" in request.query_params: + # dataset_visibility = request.query_params["dataset_visibility"] + # if dataset_visibility == "all_public_datasets": + # if ( + # request.user.role == User.WORKSPACE_MANAGER + # and not request.user.is_superuser + # ): + # queryset = queryset.filter(public_to_managers=True) + # elif dataset_visibility == "my_datasets": + # queryset = queryset.filter(users__id=request.user.id) + # if "dataset_type" in request.query_params: + # queryset = queryset.filter( + # dataset_type__exact=request.query_params["dataset_type"] + # ) + # if "archived_datasets" in request.query_params: + # archived_datasets = request.query_params["archived_datasets"] == "true" + # queryset = queryset.filter(is_archived=archived_datasets) + # # Add sorting by custom criteria + # if ( + # "sort_type" in request.query_params + # and request.query_params["sort_type"] == "recently_updated" + # ): + # queryset = queryset.order_by(F("last_updated").desc(nulls_last=True)) + # else: + # queryset = queryset.order_by(F("instance_id").asc()) + # # Serialize the distinct items using the optimized serializer + # serializer = DatasetInstanceSerializerOptimized(queryset.distinct(), many=True) + # # Add additional status fields to each dataset instance + # for dataset_instance in serializer.data: + # ( + # dataset_instance_status, + # dataset_instance_date, + # dataset_instance_time, + # dataset_instance_result, + # ) = get_dataset_upload_status(dataset_instance["instance_id"]) + # dataset_instance["last_upload_status"] = dataset_instance_status + # dataset_instance["last_upload_date"] = dataset_instance_date + # dataset_instance["last_upload_time"] = dataset_instance_time + # dataset_instance["last_upload_result"] = dataset_instance_result + # return Response(serializer.data, status=status.HTTP_200_OK) def list_optimized(self, request): - try: - # Base queryset determination based on user role - if request.user.is_superuser: - queryset = DatasetInstance.objects.all() - elif request.user.role == User.ORGANIZATION_OWNER: - queryset = DatasetInstance.objects.filter( - organisation_id=request.user.organization - ) - else: - queryset = DatasetInstance.objects.filter( - organisation_id=request.user.organization - ).filter(Q(public_to_managers=True) | Q(users__id=request.user.id)) - - # Apply optional filters based on query parameters - if "dataset_visibility" in request.query_params: - dataset_visibility = request.query_params["dataset_visibility"] - if dataset_visibility == "all_public_datasets": - if ( - request.user.role == User.WORKSPACE_MANAGER - and not request.user.is_superuser - ): - queryset = queryset.filter(public_to_managers=True) - elif dataset_visibility == "my_datasets": - queryset = queryset.filter(users__id=request.user.id) - - if "dataset_type" in request.query_params: - queryset = queryset.filter( - dataset_type__exact=request.query_params["dataset_type"] - ) - - if "archived_datasets" in request.query_params: - archived_datasets = request.query_params["archived_datasets"] == "true" - queryset = queryset.filter(is_archived=archived_datasets) - - # Add sorting by custom criteria - if ( - "sort_type" in request.query_params - and request.query_params["sort_type"] == "recently_updated" - ): - queryset = queryset.order_by(F("last_updated").desc(nulls_last=True)) - else: - queryset = queryset.order_by(F("instance_id").asc()) - - # Serialize the distinct items using the optimized serializer - serializer = DatasetInstanceSerializerOptimized(queryset.distinct(), many=True) - - # Add additional status fields to each dataset instance - for dataset_instance in serializer.data: - ( - dataset_instance_status, - dataset_instance_date, - dataset_instance_time, - dataset_instance_result, - ) = get_dataset_upload_status(dataset_instance["id"]) - - dataset_instance["last_upload_status"] = dataset_instance_status - dataset_instance["last_upload_date"] = dataset_instance_date - dataset_instance["last_upload_time"] = dataset_instance_time - dataset_instance["last_upload_result"] = dataset_instance_result - - return Response(serializer.data, status=status.HTTP_200_OK) - except Exception as e: - return Response( - {"message": "An error occurred: {}".format(str(e))}, - status=status.HTTP_400_BAD_REQUEST, + # Base queryset determination based on user role + queryset = DatasetInstance.objects.all() + if request.user.is_superuser: + queryset = queryset + elif request.user.role == User.ORGANIZATION_OWNER: + queryset = queryset.filter( + organisation_id=request.user.organization ) - + else: + queryset = queryset.filter( + organisation_id=request.user.organization + ).filter(Q(public_to_managers=True) | Q(users__id=request.user.id)) + # Apply filters using request query parameters + dataset_visibility = request.query_params.get("dataset_visibility") + if dataset_visibility == "all_public_datasets": + queryset = queryset.filter(public_to_managers=True) + elif dataset_visibility == "my_datasets": + queryset = queryset.filter(users__id=request.user.id) + dataset_type = request.query_params.get("dataset_type") + if dataset_type: + queryset = queryset.filter(dataset_type__exact=dataset_type) + archived_datasets = request.query_params.get("archived_datasets") + if archived_datasets == "true": + queryset = queryset.filter(is_archived=True) + elif archived_datasets == "false": + queryset = queryset.filter(is_archived=False) + # Sort by criteria + sort_type = request.query_params.get("sort_type") + if sort_type == "recently_updated": + queryset = queryset.order_by(F("last_updated").desc(nulls_last=True)) + else: + queryset = queryset.order_by("instance_id") + # Optimize related field loading + queryset = queryset.prefetch_related( + Prefetch("users"), # Prefetch the related users + ) + # Serialize the data + serializer = DatasetInstanceSerializerOptimized(queryset.distinct(), many=True) + # Batch process upload status for all datasets + instance_ids = [instance["instance_id"] for instance in serializer.data] + status_data = get_batch_dataset_upload_status(instance_ids) + # Annotate upload status in the response + for dataset_instance in serializer.data: + instance_id = dataset_instance["instance_id"] + if instance_id in status_data: + dataset_instance.update(status_data[instance_id]) + return Response(serializer.data, status=status.HTTP_200_OK) @is_organization_owner diff --git a/backend/shoonya_backend/settings.py b/backend/shoonya_backend/settings.py index 2915ce894..281194cb8 100644 --- a/backend/shoonya_backend/settings.py +++ b/backend/shoonya_backend/settings.py @@ -42,6 +42,7 @@ "0.0.0.0", "backend.shoonya.ai4bharat.org", "backend.shoonya2.ai4bharat.org", + "127.0.0.1" ] # Application definition diff --git a/backend/utils/dataset_utils.py b/backend/utils/dataset_utils.py new file mode 100644 index 000000000..0cf950fec --- /dev/null +++ b/backend/utils/dataset_utils.py @@ -0,0 +1,15 @@ +def get_batch_dataset_upload_status(instance_ids): + """ + Batch fetch upload status for a list of dataset instance IDs. + Replace this with actual logic to retrieve status from your database. + """ + # Mock data for testing + status_data = {} + for instance_id in instance_ids: + status_data[instance_id] = { + "last_upload_status": "Completed", + "last_upload_date": "2023-01-01", + "last_upload_time": "12:00:00", + "last_upload_result": "Success", + } + return status_data \ No newline at end of file From 77032adbb8f6139b1208990f7eef84e6804f46b8 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Fri, 24 Jan 2025 11:03:21 +0530 Subject: [PATCH 7/7] data set optimized --- backend/dataset/views.py | 53 ---------------------------------------- 1 file changed, 53 deletions(-) diff --git a/backend/dataset/views.py b/backend/dataset/views.py index 3591989e8..fc0407ec8 100644 --- a/backend/dataset/views.py +++ b/backend/dataset/views.py @@ -317,59 +317,6 @@ def list(self, request, *args, **kwargs): # def get_queryset(self): @action(detail=False, methods=["get"], url_path="optimized-list") - # def list_optimized(self, request): - # # Base queryset determination based on user role - # if request.user.is_superuser: - # queryset = DatasetInstance.objects.all() - # elif request.user.role == User.ORGANIZATION_OWNER: - # queryset = DatasetInstance.objects.filter( - # organisation_id=request.user.organization - # ) - # else: - # queryset = DatasetInstance.objects.filter( - # organisation_id=request.user.organization - # ).filter(Q(public_to_managers=True) | Q(users__id=request.user.id)) - # # Apply optional filters based on query parameters - # if "dataset_visibility" in request.query_params: - # dataset_visibility = request.query_params["dataset_visibility"] - # if dataset_visibility == "all_public_datasets": - # if ( - # request.user.role == User.WORKSPACE_MANAGER - # and not request.user.is_superuser - # ): - # queryset = queryset.filter(public_to_managers=True) - # elif dataset_visibility == "my_datasets": - # queryset = queryset.filter(users__id=request.user.id) - # if "dataset_type" in request.query_params: - # queryset = queryset.filter( - # dataset_type__exact=request.query_params["dataset_type"] - # ) - # if "archived_datasets" in request.query_params: - # archived_datasets = request.query_params["archived_datasets"] == "true" - # queryset = queryset.filter(is_archived=archived_datasets) - # # Add sorting by custom criteria - # if ( - # "sort_type" in request.query_params - # and request.query_params["sort_type"] == "recently_updated" - # ): - # queryset = queryset.order_by(F("last_updated").desc(nulls_last=True)) - # else: - # queryset = queryset.order_by(F("instance_id").asc()) - # # Serialize the distinct items using the optimized serializer - # serializer = DatasetInstanceSerializerOptimized(queryset.distinct(), many=True) - # # Add additional status fields to each dataset instance - # for dataset_instance in serializer.data: - # ( - # dataset_instance_status, - # dataset_instance_date, - # dataset_instance_time, - # dataset_instance_result, - # ) = get_dataset_upload_status(dataset_instance["instance_id"]) - # dataset_instance["last_upload_status"] = dataset_instance_status - # dataset_instance["last_upload_date"] = dataset_instance_date - # dataset_instance["last_upload_time"] = dataset_instance_time - # dataset_instance["last_upload_result"] = dataset_instance_result - # return Response(serializer.data, status=status.HTTP_200_OK) def list_optimized(self, request): # Base queryset determination based on user role queryset = DatasetInstance.objects.all()