Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve index update #645

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions src/core/management/commands/update_index_async.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from django.core.cache import cache
from django.core.management.base import BaseCommand


Expand All @@ -7,8 +8,41 @@ class Command(BaseCommand):
" command"
)

def add_arguments(self, parser):
parser.add_argument(
"--force",
help="Force the update of the search index",
dest="force",
default=False,
)

def handle(self, *args, **options):
from core.tasks import update_search_index
from extended_search.management.commands.create_index_mapping_json import (
get_indexed_mapping_dict,
)

force = options["force"]
perform_update = force

if not force:
# Get the current search mapping hash
sm_hash = cache.get("search_mapping_hash")

# Get the search mapping dict
sm_dict = get_indexed_mapping_dict()
# Get the hash of the current search mapping dict
new_sm_hash = hash(frozenset(sm_dict.items()))

# If the hash of the current search mapping dict is different from the
# hash of the search mapping dict that was stored in the cache, then
# update the search index.
if sm_hash != new_sm_hash:
cache.set("search_mapping_hash", new_sm_hash)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not currently settled on using the cache for this, but this is just to capture the concept first.
Happy to discuss alternatives as the negative impact of this not being in the cache is higher than "just a longer page load time"

perform_update = True

if not perform_update:
return

update_search_index.delay()

Expand Down
4 changes: 0 additions & 4 deletions src/core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,6 @@ def ingest_uk_staff_locations(self):

@celery_app.task(bind=True)
def update_search_index(self):
# Run update_index --schema-only
call_command("update_index", schema_only=True)

# Run update_index
call_command("update_index")


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from pathlib import Path

from django.core.management.base import BaseCommand
from wagtail.search.backends import get_search_backend

from extended_search.index import get_indexed_models


# Path: src/extended_search/management/commands/create_index_fields_json.py
JSON_FILE = Path(__file__).parent / "indexed_mapping.json"


def get_sorted_mapping(search_backend, model):
mapping = search_backend.mapping_class(model).get_mapping()

def sort_dict(d):
sorted_dict = dict(sorted(d.items()))
for k, v in d.items():
if isinstance(v, dict):
sorted_dict[k] = sort_dict(v)
else:
sorted_dict[k] = v
return sorted_dict

return sort_dict(mapping)


def get_indexed_mapping_dict():
"""
Return a dictionary of indexed models and their fields
Ignoring some models that we don't care about.
"""
search_backend = get_search_backend()

return {
str(model): get_sorted_mapping(search_backend, model)
for model in get_indexed_models()
# if model not in [Media, Document, Image]
if model._meta.app_label != "testapp"
}


class Command(BaseCommand):
help = "Create test JSON containing the mapping for all indexed models"

def handle(self, *args, **options):
import json

with open(JSON_FILE, "w") as f:
json.dump(get_indexed_mapping_dict(), f, indent=4)
Loading