Skip to content

Commit

Permalink
Index canvases and annotations on manifests (#944)
Browse files Browse the repository at this point in the history
  • Loading branch information
blms committed Nov 3, 2023
1 parent cf949e7 commit f4470dc
Showing 1 changed file with 40 additions and 8 deletions.
48 changes: 40 additions & 8 deletions apps/iiif/manifests/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@
from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from elasticsearch_dsl import analyzer
from django.db.models.query import Prefetch
from django.utils.html import strip_tags
from unidecode import unidecode

from apps.iiif.annotations.models import Annotation
from apps.iiif.canvases.models import Canvas
from apps.iiif.kollections.models import Collection
from .models import Manifest
from apps.iiif.manifests.models import Manifest

# TODO: Better English stemming (e.g. Rome to match Roman), multilingual stemming.
stemmer = analyzer(
Expand All @@ -25,9 +28,15 @@ class ManifestDocument(Document):
# fields to map explicitly in Elasticsearch
authors = fields.KeywordField(multi=True) # only used for faceting/filtering
author = fields.TextField() # only used for searching
collections = fields.NestedField(properties={
"label": fields.KeywordField(),
})
canvas_set = fields.NestedField(
properties={
"result": fields.TextField(analyzer=stemmer),
"position": fields.IntegerField(),
"thumbnail": fields.KeywordField(),
"pid": fields.KeywordField(),
}
) # canvas_set.result = OCR annotation text on each canvas
collections = fields.NestedField(properties={"label": fields.KeywordField()})
date_earliest = fields.DateField()
date_latest = fields.DateField()
has_pdf = fields.BooleanField()
Expand All @@ -38,10 +47,12 @@ class ManifestDocument(Document):

class Index:
"""Settings for Elasticsearch"""

name = "manifests"

class Django:
"""Settings for automatically pulling data from Django"""

model = Manifest

# fields to map dynamically in Elasticsearch
Expand All @@ -57,7 +68,7 @@ class Django:
"publisher",
"viewingdirection",
]
related_models = [Collection]
related_models = [Collection, Canvas, Annotation]

def prepare_authors(self, instance):
"""convert authors string into list"""
Expand Down Expand Up @@ -88,12 +99,33 @@ def prepare_summary(self, instance):

def get_queryset(self):
"""prefetch related to improve performance"""
return super().get_queryset().prefetch_related(
"collections"
return (
super()
.get_queryset()
.prefetch_related(
"collections",
"image_server",
"languages",
Prefetch(
"canvas_set",
queryset=Canvas.objects.prefetch_related(
Prefetch(
"annotation_set",
queryset=Annotation.objects.select_related("owner"),
),
),
),
)
)

def get_instances_from_related(self, related_instance):
"""Retrieving item to index from related collections"""
"""Retrieving item to index from related objects"""
if isinstance(related_instance, Collection):
# many to many relationship
return related_instance.manifests.all()
elif isinstance(related_instance, Canvas):
# many to many relationship
return related_instance.manifest
elif isinstance(related_instance, Annotation):
# many to many relationship
return related_instance.canvas.manifest

0 comments on commit f4470dc

Please sign in to comment.