From 92ac80f0b49428eaa34711513017c9122a6dc4d1 Mon Sep 17 00:00:00 2001 From: Tom Searle Date: Fri, 29 Nov 2024 10:41:42 +0000 Subject: [PATCH] CU-8696ent8k: make prep_docs AnnotationEntity addition atomic. --- webapp/api/api/data_utils.py | 17 +++++++------- webapp/api/api/utils.py | 16 +++++++------ webapp/api/api/views.py | 44 +++++++++++++++++++----------------- 3 files changed, 41 insertions(+), 36 deletions(-) diff --git a/webapp/api/api/data_utils.py b/webapp/api/api/data_utils.py index f6864703..8e29719c 100644 --- a/webapp/api/api/data_utils.py +++ b/webapp/api/api/data_utils.py @@ -6,6 +6,7 @@ from typing import Dict from django.contrib.auth.models import User +from django.db import transaction from django.db.models import Q from core.settings import MEDIA_ROOT @@ -42,14 +43,14 @@ def dataset_from_file(dataset: Dataset): "The 'name' column are document IDs, and the 'text' column is the text you're " "collecting annotations for") - - for i, row in enumerate(df.iterrows()): - row = row[1] - document = Document() - document.name = row['name'] - document.text = sanitise_input(row['text']) - document.dataset = dataset - document.save() + with transaction.atomic(): + for i, row in enumerate(df.iterrows()): + row = row[1] + document = Document() + document.name = row['name'] + document.text = sanitise_input(row['text']) + document.dataset = dataset + document.save() def sanitise_input(text: str): diff --git a/webapp/api/api/utils.py b/webapp/api/api/utils.py index f434bb1a..8459175d 100644 --- a/webapp/api/api/utils.py +++ b/webapp/api/api/utils.py @@ -5,6 +5,7 @@ from background_task import background from django.contrib.auth.models import User +from django.db import transaction from django.db.models.signals import post_save from django.dispatch import receiver from medcat.cat import CAT @@ -251,13 +252,14 @@ def prep_docs(project_id: List[int], doc_ids: List[int], user_id: int): logger.info(f'Running MedCAT model for project {project.id}:{project.name} over doc: {doc.id}') spacy_doc = cat(doc.text) anns = AnnotatedEntity.objects.filter(document=doc).filter(project=project) - add_annotations(spacy_doc=spacy_doc, - user=user, - project=project, - document=doc, - cat=cat, - existing_annotations=anns) - # add doc to prepared_documents + with transaction.atomic(): + add_annotations(spacy_doc=spacy_doc, + user=user, + project=project, + document=doc, + cat=cat, + existing_annotations=anns) + # add doc to prepared_documents project.prepared_documents.add(doc) project.save() logger.info('Prepared all docs for project: %s, docs processed: %s', diff --git a/webapp/api/api/views.py b/webapp/api/api/views.py index 6a415125..2e35ce3b 100644 --- a/webapp/api/api/views.py +++ b/webapp/api/api/views.py @@ -5,6 +5,7 @@ from background_task.models import Task, CompletedTask from django.contrib.auth.views import PasswordResetView from django.core.exceptions import ObjectDoesNotExist +from django.db import transaction from django.http import HttpResponseBadRequest, HttpResponseServerError, HttpResponse from django.shortcuts import render from django.utils import timezone @@ -248,7 +249,6 @@ def prepare_documents(request): 'description': 'Missing CUI filter file, %s, cannot be found on the filesystem, ' 'but is still set on the project. To fix remove and reset the ' 'cui filter file' % project.cuis_file}, status=500) - try: for d_id in d_ids: document = Document.objects.get(id=d_id) @@ -264,26 +264,28 @@ def prepare_documents(request): is_validated = document in project.validated_documents.all() - # If the document is not already annotated, annotate it - if (len(anns) == 0 and not is_validated) or update: - # Based on the project id get the right medcat - cat = get_medcat(project=project) - logger.info('loaded medcat model for project: %s', project.id) - - # Set CAT filters - cat.config.linking['filters']['cuis'] = cuis - - spacy_doc = cat(document.text) - add_annotations(spacy_doc=spacy_doc, - user=user, - project=project, - document=document, - cat=cat, - existing_annotations=anns) - - # add doc to prepared_documents - project.prepared_documents.add(document) - project.save() + with transaction.atomic(): + # If the document is not already annotated, annotate it + if (len(anns) == 0 and not is_validated) or update: + # Based on the project id get the right medcat + cat = get_medcat(project=project) + logger.info('loaded medcat model for project: %s', project.id) + + # Set CAT filters + cat.config.linking['filters']['cuis'] = cuis + + spacy_doc = cat(document.text) + + add_annotations(spacy_doc=spacy_doc, + user=user, + project=project, + document=document, + cat=cat, + existing_annotations=anns) + + # add doc to prepared_documents + project.prepared_documents.add(document) + project.save() except Exception as e: stack = traceback.format_exc()