Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

all comments removed #1142

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/dataset/admin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import resource
# import resource
from django.contrib import admin
from import_export.admin import ImportExportActionModelAdmin
from .resources import *
Expand Down
15 changes: 15 additions & 0 deletions backend/dataset/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,21 @@ class DatasetInstanceSerializer(serializers.ModelSerializer):
class Meta:
model = DatasetInstance
fields = "__all__"


class DatasetInstanceSerializerOptimized(serializers.ModelSerializer):
class Meta:
model = DatasetInstance
fields = [
"instance_id",
"parent_instance_id",
"instance_name",
"instance_description",
"dataset_type",
"public_to_managers",
"organisation_id"
]



class DatasetInstanceUploadSerializer(serializers.Serializer):
Expand Down
82 changes: 76 additions & 6 deletions backend/dataset/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
from . import resources
from .models import *
from .serializers import *
from django.db.models import Prefetch, Q, F
from utils.dataset_utils import get_batch_dataset_upload_status
from rest_framework.response import Response
from rest_framework.decorators import action
from rest_framework import status
from .tasks import upload_data_to_data_instance, deduplicate_dataset_instance_items
import dataset
from tasks.models import (
Expand Down Expand Up @@ -186,6 +191,22 @@ def get_dataset_upload_status(dataset_instance_pk):


# Create your views here.
# def get_batch_dataset_upload_status(instance_ids):
# """
# Batch fetch upload status for a list of dataset instance IDs.
# Replace this with actual logic to retrieve status from your database.
# """
# # Mock data for testing
# status_data = {}
# for instance_id in instance_ids:
# status_data[instance_id] = {
# "last_upload_status": "Completed",
# "last_upload_date": "2023-01-01",
# "last_upload_time": "12:00:00",
# "last_upload_result": "Success",
# }
# return status_data

class DatasetInstanceViewSet(viewsets.ModelViewSet):
"""
ViewSet for Dataset Instance
Expand Down Expand Up @@ -244,6 +265,8 @@ def retrieve(self, request, pk, *args, **kwargs):
),
],
)


def list(self, request, *args, **kwargs):
# Org Owners and superusers see all datasets
if request.user.is_superuser:
Expand All @@ -257,7 +280,6 @@ def list(self, request, *args, **kwargs):
queryset = DatasetInstance.objects.filter(
organisation_id=request.user.organization
).filter(Q(public_to_managers=True) | Q(users__id=request.user.id))

if "dataset_visibility" in request.query_params:
dataset_visibility = request.query_params["dataset_visibility"]
if dataset_visibility == "all_public_datasets":
Expand All @@ -267,18 +289,15 @@ def list(self, request, *args, **kwargs):
queryset = queryset.filter(public_to_managers=True)
elif dataset_visibility == "my_datasets":
queryset = queryset.filter(users__id=request.user.id)

# Filter the queryset based on the query params
if "dataset_type" in dict(request.query_params):
queryset = queryset.filter(
dataset_type__exact=request.query_params["dataset_type"]
)

# Serialize the distinct items and sort by instance ID
serializer = DatasetInstanceSerializer(
queryset.distinct().order_by("instance_id"), many=True
)

# Add status fields to the serializer data
for dataset_instance in serializer.data:
# Get the task statuses for the dataset instance
Expand All @@ -288,14 +307,65 @@ def list(self, request, *args, **kwargs):
dataset_instance_time,
dataset_instance_result,
) = get_dataset_upload_status(dataset_instance["instance_id"])

# Add the task status and time to the dataset instance response
dataset_instance["last_upload_status"] = dataset_instance_status
dataset_instance["last_upload_date"] = dataset_instance_date
dataset_instance["last_upload_time"] = dataset_instance_time
dataset_instance["last_upload_result"] = dataset_instance_result

return Response(serializer.data)


# def get_queryset(self):
@action(detail=False, methods=["get"], url_path="optimized-list")
def list_optimized(self, request):
# Base queryset determination based on user role
queryset = DatasetInstance.objects.all()
if request.user.is_superuser:
queryset = queryset
elif request.user.role == User.ORGANIZATION_OWNER:
queryset = queryset.filter(
organisation_id=request.user.organization
)
else:
queryset = queryset.filter(
organisation_id=request.user.organization
).filter(Q(public_to_managers=True) | Q(users__id=request.user.id))
# Apply filters using request query parameters
dataset_visibility = request.query_params.get("dataset_visibility")
if dataset_visibility == "all_public_datasets":
queryset = queryset.filter(public_to_managers=True)
elif dataset_visibility == "my_datasets":
queryset = queryset.filter(users__id=request.user.id)
dataset_type = request.query_params.get("dataset_type")
if dataset_type:
queryset = queryset.filter(dataset_type__exact=dataset_type)
archived_datasets = request.query_params.get("archived_datasets")
if archived_datasets == "true":
queryset = queryset.filter(is_archived=True)
elif archived_datasets == "false":
queryset = queryset.filter(is_archived=False)
# Sort by criteria
sort_type = request.query_params.get("sort_type")
if sort_type == "recently_updated":
queryset = queryset.order_by(F("last_updated").desc(nulls_last=True))
else:
queryset = queryset.order_by("instance_id")
# Optimize related field loading
queryset = queryset.prefetch_related(
Prefetch("users"), # Prefetch the related users
)
# Serialize the data
serializer = DatasetInstanceSerializerOptimized(queryset.distinct(), many=True)
# Batch process upload status for all datasets
instance_ids = [instance["instance_id"] for instance in serializer.data]
status_data = get_batch_dataset_upload_status(instance_ids)
# Annotate upload status in the response
for dataset_instance in serializer.data:
instance_id = dataset_instance["instance_id"]
if instance_id in status_data:
dataset_instance.update(status_data[instance_id])
return Response(serializer.data, status=status.HTTP_200_OK)


@is_organization_owner
@action(methods=["GET"], detail=True, name="Download Dataset in CSV format")
Expand Down
1 change: 1 addition & 0 deletions backend/shoonya_backend/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"0.0.0.0",
"backend.shoonya.ai4bharat.org",
"backend.shoonya2.ai4bharat.org",
"127.0.0.1"
]

# Application definition
Expand Down
15 changes: 15 additions & 0 deletions backend/utils/dataset_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
def get_batch_dataset_upload_status(instance_ids):
"""
Batch fetch upload status for a list of dataset instance IDs.
Replace this with actual logic to retrieve status from your database.
"""
# Mock data for testing
status_data = {}
for instance_id in instance_ids:
status_data[instance_id] = {
"last_upload_status": "Completed",
"last_upload_date": "2023-01-01",
"last_upload_time": "12:00:00",
"last_upload_result": "Success",
}
return status_data
Loading