From 9cd20f9bad0e979e3dab3ba4b9bf51c9d1fc1baf Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Fri, 10 Jan 2025 16:58:16 +0000 Subject: [PATCH 01/11] chore:initialize Sentry only when SENTRY_DSN is provided. Added conditional initialization of Sentry to ensure it is only configured when a SENTRY_DSN environment variable is set. Updated the logging configuration to include a Sentry-specific logger for error-level messages. This improves flexibility and avoids unnecessary overhead when Sentry is not used. --- fbr/settings.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fbr/settings.py b/fbr/settings.py index 2af8bef..b9b0859 100644 --- a/fbr/settings.py +++ b/fbr/settings.py @@ -43,7 +43,19 @@ "DJANGO_SECRET_KEY", default="find-business-regulations-secret-key" ) +# Only init sentry if the SENTRY_DSN is provided +if os.environ.get("SENTRY_DSN"): + import sentry_sdk + + from sentry_sdk.integrations.django import DjangoIntegration + + sentry_sdk.init( + os.environ.get("SENTRY_DSN"), + integrations=[DjangoIntegration()], + ) + DEBUG = env("DEBUG", default=False) + DJANGO_ADMIN = env("DJANGO_ADMIN", default=False) ALLOWED_HOSTS = env.list("ALLOWED_HOSTS", default=["localhost"]) @@ -273,6 +285,13 @@ }, } +if os.environ.get("SENTRY_DSN"): + LOGGING["loggers"]["sentry_sdk"] = { + "level": "ERROR", + "handlers": ["asim"], + "propagate": False, + } + # Django Log Formatter ASIM settings # See https://github.com/uktrade/django-log-formatter-asim#settings DLFA_TRACE_HEADERS = ("X-B3-TraceId", "X-B3-SpanId") From c54372fdf1153a55173003b6a45876b01a155d4d Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Fri, 10 Jan 2025 18:13:59 +0000 Subject: [PATCH 02/11] chore:remove Django settings and config, add documentation for tests. Django settings and related configuration files have been completely removed, including `base.py`, `live.py`, and `local.py`. Documentation for running search tests and rebuilding the cache has been added to provide guidance for developers. --- app/search/tests/test_search.py | 8 +- docs/search-tests.md | 16 ++ fbr/config/__init__.py | 3 - fbr/config/settings/base.py | 288 -------------------------------- fbr/config/settings/live.py | 21 --- fbr/config/settings/local.py | 10 -- 6 files changed, 19 insertions(+), 327 deletions(-) create mode 100644 docs/search-tests.md delete mode 100644 fbr/config/__init__.py delete mode 100644 fbr/config/settings/base.py delete mode 100644 fbr/config/settings/live.py delete mode 100644 fbr/config/settings/local.py diff --git a/app/search/tests/test_search.py b/app/search/tests/test_search.py index 1f8fcbb..4676de5 100644 --- a/app/search/tests/test_search.py +++ b/app/search/tests/test_search.py @@ -1,10 +1,12 @@ # flake8: noqa - +import os import re import unittest from unittest.mock import MagicMock, call, patch +import django + from app.search.config import SearchDocumentConfig from app.search.utils.search import create_search_query from app.search.utils.terms import sanitize_input @@ -382,7 +384,3 @@ def test_single_or_and_search_operator_query(self, mock_search_query): # Assert the OR and AND operation was applied mock_query1.__or__.assert_called_with(mock_query2) # mock_query2.__and__.assert_called_with(mock_query3) # TODO:fix assert - - -if __name__ == "__main__": - unittest.main() diff --git a/docs/search-tests.md b/docs/search-tests.md new file mode 100644 index 0000000..66d75c7 --- /dev/null +++ b/docs/search-tests.md @@ -0,0 +1,16 @@ +# Run Search Tests + +This document describes how to run the search tests. + +## Introduction + +The search tests are a collection of tests that check the search functionality of the application. The tests are written +in Python using the `pytest` testing framework. The tests are run using the `pytest` command. + +## Running the tests + +To run the search tests, use the following command: + +```bash +$ python manage.py test +``` diff --git a/fbr/config/__init__.py b/fbr/config/__init__.py deleted file mode 100644 index 6e43984..0000000 --- a/fbr/config/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .celery import celery_app - -__all__ = ("celery_app",) diff --git a/fbr/config/settings/base.py b/fbr/config/settings/base.py deleted file mode 100644 index 7d20553..0000000 --- a/fbr/config/settings/base.py +++ /dev/null @@ -1,288 +0,0 @@ -"""Django base settings for Find business regulations project. - -Environment: -We use django-environ but do not read a `.env` file. Locally we provide -docker-compose an environment from `local.env` file in the project root, -using `env_file` field in `docker-compose.yml`. There is a `local.env.example` -in this repo. When deployed, the service retrieves these values from the -environment. - -NB: Some settings acquired using `env()` deliberately *do not* have defaults -as we want to get an `ImproperlyConfigured` exception. This highlights badly -configured deployments. -""" - -import os - -from pathlib import Path -from typing import Any - -import dj_database_url -import environ - -from django_log_formatter_asim import ASIMFormatter - -# Define the root directory (i.e. ) -root = environ.Path(__file__) - 4 # i.e. Repository root -SITE_ROOT = Path(root()) -# Define the project base directory (i.e. /fbr) -BASE_DIR: Path = Path(root(), "fbr") - -# Get environment variables -env = environ.Env( - DEBUG=(bool, False), -) - -# Must be provided by the environment -SECRET_KEY = env( - "DJANGO_SECRET_KEY", default="find-business-regulations-secret-key" -) - -DEBUG = env("DEBUG", default=False) -DJANGO_ADMIN = env("DJANGO_ADMIN", default=False) -ALLOWED_HOSTS = env.list("ALLOWED_HOSTS", default=["localhost"]) -ENVIRONMENT = env( - "COPILOT_ENVIRONMENT_NAME", default="local" -) # TODO: Change to APP_ENV, updates required in deploy repo - -# Application definition -DJANGO_APPS = [ - "django_celery_beat", - "django.contrib.admin", - "django.contrib.auth", - "django.contrib.contenttypes", - "django.contrib.sessions", - "django.contrib.messages", - "django.contrib.staticfiles", - "rest_framework", -] - -LOCAL_APPS = [ - "core", - "search", -] - -THIRD_PARTY_APPS: list = [ - "webpack_loader", -] - -INSTALLED_APPS = DJANGO_APPS + LOCAL_APPS + THIRD_PARTY_APPS - -MIDDLEWARE = [ - "django.middleware.security.SecurityMiddleware", - "whitenoise.middleware.WhiteNoiseMiddleware", - "django.contrib.sessions.middleware.SessionMiddleware", - "django.middleware.common.CommonMiddleware", - "django.middleware.csrf.CsrfViewMiddleware", - "django.contrib.auth.middleware.AuthenticationMiddleware", - "django.contrib.messages.middleware.MessageMiddleware", - "django.middleware.clickjacking.XFrameOptionsMiddleware", -] - -DEFAULT_AUTO_FIELD = "django.db.models.AutoField" - -ROOT_URLCONF = "config.urls" - -TEMPLATES = [ - { - "BACKEND": "django.template.backends.django.DjangoTemplates", - "DIRS": [ - os.path.join(BASE_DIR, "templates"), - ], - "APP_DIRS": True, - "OPTIONS": { - "context_processors": [ - "django.template.context_processors.debug", - "django.template.context_processors.request", - "django.contrib.auth.context_processors.auth", - "django.contrib.messages.context_processors.messages", - "config.context_processors.google_tag_manager", - ], - }, - }, -] - -WSGI_APPLICATION = "config.wsgi.application" - -DATABASES: dict = {} -if DATABASE_URL := env("DATABASE_URL", default=None): - DATABASES = { - "default": { - **dj_database_url.parse( - DATABASE_URL, - engine="postgresql", - ), - "ENGINE": "django.db.backends.postgresql", - } - } -else: - DATABASES = { - "default": { - "ENGINE": "django.db.backends.sqlite3", - "NAME": SITE_ROOT / "db.sqlite3", - } - } - -AUTH_PASSWORD_VALIDATORS = [ - { - "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa: E501 - }, - { - "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", # noqa: E501 - }, - { - "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", # noqa: E501 - }, - { - "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", # noqa: E501 - }, -] - -REDIS_ENDPOINT = env("REDIS_ENDPOINT", default="") - -# Celery -CELERY_BROKER_URL = env("REDIS_ENDPOINT", default="") -if CELERY_BROKER_URL and CELERY_BROKER_URL.startswith("rediss://"): - CELERY_BROKER_URL = f"{CELERY_BROKER_URL}?ssl_cert_reqs=CERT_REQUIRED" -CELERY_RESULT_BACKEND = CELERY_BROKER_URL -CELERY_ACCEPT_CONTENT = ["application/json"] -CELERY_RESULT_SERIALIZER = "json" -CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True -CELERY_BEAT_SCHEDULER = "django_celery_beat.schedulers.DatabaseScheduler" -CELERY_RESULT_EXTENDED = True -CELERY_TASK_TIME_LIMIT = ( - 450 # Maximum runtime for a task in seconds (e.g., 7.5 minutes) -) -CELERY_TASK_SOFT_TIME_LIMIT = ( - 270 # Optional: Grace period before forced termination -) -CELERY_TIMEZONE = "UTC" -CELERY_ENABLE_UTC = True -USE_DEPRECATED_PYTZ = True - -# Internationalisation -LANGUAGE_CODE = "en-gb" -TIME_ZONE = "Europe/London" -USE_I18N = True -USE_TZ = True - -# Static files -STATICFILES_FINDERS = [ - "django.contrib.staticfiles.finders.FileSystemFinder", - "django.contrib.staticfiles.finders.AppDirectoriesFinder", -] -STATIC_ROOT = BASE_DIR / "static" -STATIC_URL = "static/" - -STORAGES = { - "staticfiles": { - "BACKEND": "whitenoise.storage.CompressedManifestStaticFilesStorage", - }, -} - -# Webpack - -STATICFILES_DIRS = [ - SITE_ROOT / "front_end/", -] - -WEBPACK_LOADER = { - "DEFAULT": { - "CACHE": not DEBUG, - "BUNDLE_DIR_NAME": "webpack_bundles/", # must end with slash - "STATS_FILE": os.path.join(SITE_ROOT, "webpack-stats.json"), - "POLL_INTERVAL": 0.1, - "TIMEOUT": None, - "LOADER_CLASS": "webpack_loader.loader.WebpackLoader", - } -} - - -# TODO: Use redis for cache? -CACHES = { - "default": { - "BACKEND": "django.core.cache.backends.locmem.LocMemCache", - } -} - -# Logging -LOGGING: dict[str, Any] = { - "version": 1, - "disable_existing_loggers": False, - "formatters": { - "asim_formatter": { - "()": ASIMFormatter, - }, - "simple": { - "style": "{", - "format": "{asctime} {levelname} {message}", - }, - }, - "handlers": { - "asim": { - "class": "logging.StreamHandler", - "formatter": "asim_formatter", - }, - "console": { - "class": "logging.StreamHandler", - "formatter": "simple", - }, - }, - "root": { - "handlers": ["console"], - "level": "INFO", - }, - "loggers": { - "django": { - "handlers": ["console"], - "level": "INFO", - "propagate": False, - }, - }, -} - -# Django Log Formatter ASIM settings -# See https://github.com/uktrade/django-log-formatter-asim#settings -DLFA_TRACE_HEADERS = ("X-B3-TraceId", "X-B3-SpanId") - -# Set the correct handlers when running in DBT Platform -# console handler set as default as it's easier to read -LOGGING["root"]["handlers"] = ["asim"] -LOGGING["loggers"]["django"]["handlers"] = ["asim"] - -# ------------------------------------------------------------------------------ -# The Find business regulations zone - specific service settings. -# ------------------------------------------------------------------------------ - -# Service - -SERVICE_NAME: str = "Find business regulations" -SERVICE_NAME_SEARCH: str = "Find business regulations" -CONTACT_EMAIL: str = "findbusinessregulations@businessandtrade.gov.uk" - -# Cookies -ANALYTICS_CONSENT_NAME: str = "analytics_consent" - -# DBT Data API -# DBT_DATA_API_URL = env( -# "DBT_DATA_API_URL", -# default="https://data.api.trade.gov.uk/v1/datasets/market-barriers/versions/v1.0.10/data?format=json", # noqa: E501 -# ) - -# HOSTNAME -HOSTNAME_MAP = { - "local": "http://localhost:8081", - "dev": "https://dev.find-business-regulations.uktrade.digital/", - "staging": "https://staging.find-business-regulations.uktrade.digital/", - "prod": "https://find-business-regulations.uktrade.digital/", -} - -HOSTNAME = HOSTNAME_MAP.get(ENVIRONMENT.lower(), HOSTNAME_MAP["prod"]) - -# Google Analytics (GA) -# Note: please consult the performance team before changing these settings -COOKIE_PREFERENCES_SET_NAME: str = "cookie_preferences_set" -COOKIE_ACCEPTED_GA_NAME: str = "accepted_ga_cookies" -GOOGLE_ANALYTICS_TAG_MANAGER_ID = env( - "GOOGLE_ANALYTICS_TAG_MANAGER_ID", default=None -) diff --git a/fbr/config/settings/live.py b/fbr/config/settings/live.py deleted file mode 100644 index c5e637a..0000000 --- a/fbr/config/settings/live.py +++ /dev/null @@ -1,21 +0,0 @@ -import os - -import dj_database_url -import sentry_sdk - -from dbt_copilot_python.database import database_url_from_env -from dbt_copilot_python.network import setup_allowed_hosts -from sentry_sdk.integrations.django import DjangoIntegration - -from .base import * # noqa - -ALLOWED_HOSTS = setup_allowed_hosts(ALLOWED_HOSTS) # noqa - -DATABASES["default"] = dj_database_url.config( # noqa - default=database_url_from_env("DATABASE_CREDENTIALS") -) - -sentry_sdk.init( - os.environ.get("SENTRY_DSN"), - integrations=[DjangoIntegration()], -) diff --git a/fbr/config/settings/local.py b/fbr/config/settings/local.py deleted file mode 100644 index 840912c..0000000 --- a/fbr/config/settings/local.py +++ /dev/null @@ -1,10 +0,0 @@ -"""Local django settings.""" - -from .base import * # noqa - -# Applications that are required to load before DJANGO_APPS -BASE_APPS = [ - "whitenoise.runserver_nostatic", # Serve static files via WhiteNoise -] - -INSTALLED_APPS = BASE_APPS + INSTALLED_APPS # noqa From 4ed04aa7ec6f05e951c28079df13429f4b50d526 Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Fri, 10 Jan 2025 18:14:22 +0000 Subject: [PATCH 03/11] chore:add documentation for cache rebuild methods This commit introduces a new document, `cache-rebuild.md`, which outlines three methods to rebuild the cache: using `make` commands, the automated Celery task, and the Django management command. It provides clear instructions for each method to ensure regular updates and data consistency in the application. --- docs/cache-rebuild.md | 46 +++++++++++++++++++++++++++++++++ fbr/config/settings/__init__.py | 0 2 files changed, 46 insertions(+) create mode 100644 docs/cache-rebuild.md delete mode 100644 fbr/config/settings/__init__.py diff --git a/docs/cache-rebuild.md b/docs/cache-rebuild.md new file mode 100644 index 0000000..6cada82 --- /dev/null +++ b/docs/cache-rebuild.md @@ -0,0 +1,46 @@ +# Cache rebuild + +## Introduction +There are a few ways to rebuild the cache. The cache is a collection of data from legislation and data workspace. +The cache data is stored in a postgres database that is used to store the data that is used to build the search index. + +This document outlies the steps to rebuild the cache using three different methods. + +## Rebuild the cache locally using the `make` command +The `make setup_local_force_rebuild` command is a simple way to rebuild the cache locally including the entire service. +The `make` command is a build automation tool that automatically builds executable programs and libraries from source +code by reading files called `Makefiles` which specify how to derive the target program. + +However, to rebuild the cache onlly, you can use the `make rebuild_cache` command. + +## Rebuild the cache on environment using the automated celery task +The cache can be rebuilt on the environment using the automated celery task. +The task is scheduled to run every 24 hours as a cron job. The task is defined in the `tasks.py` file in the +`celery_worker` directory. However, the actual task is defined in the `celery_app.py` file in the `fbr` directory. + +At present is set to run at 10:00 PM every day. To edit the time then change the following code: +```python +celery_app.conf.beat_schedule = { + "schedule-fbr-cache-task": { + "task": "celery_worker.tasks.rebuild_cache", + "schedule": crontab(hour="22", minute="00"), # Runs daily at 10:00 PM + }, +} +``` + +## Rebuild the cache on environment using the django management command +The cache can be rebuilt on the environment using the django management command. +The command is defined in the `management/commands` directory in the `fbr` directory. + +To run the command, use the following command: +```bash +$ poetry run python manage.py rebuild_cache +``` + +## Conclusion +The cache is a collection of data from legislation and data workspace. The cache data is stored in a postgres database +that is used to store the data that is used to build the search index. The cache can be rebuilt using the `make` +command, the automated celery task, or the django management command. The automated celery task is scheduled to run +every 24 hours as a cron job. The cache can be rebuilt on the environment using the automated celery task or the django +management command. The cache is an important part of the application and should be rebuilt regularly to ensure that the +data is up to date. diff --git a/fbr/config/settings/__init__.py b/fbr/config/settings/__init__.py deleted file mode 100644 index e69de29..0000000 From c9d9c9561ae6b1ae80a4748cc55499594e54f6c2 Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Mon, 13 Jan 2025 09:17:58 +0000 Subject: [PATCH 04/11] chore:add search query examples and comment out Redis/Celery services Added a new markdown file providing examples of search queries for testing. Commented out Redis and Celery services in the `docker-compose.yml` file, likely for future reconsideration or debugging purposes. Fixed a minor typo in the cache rebuild documentation. --- docker-compose.yml | 118 +++++++++++++++++----------------- docs/cache-rebuild.md | 2 +- docs/search-query-examples.md | 70 ++++++++++++++++++++ 3 files changed, 130 insertions(+), 60 deletions(-) create mode 100644 docs/search-query-examples.md diff --git a/docker-compose.yml b/docker-compose.yml index 5f5c15d..bebbdf6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,8 +2,8 @@ volumes: postgres_data: driver: local - redis_data: - driver: local +# redis_data: +# driver: local services: db: @@ -42,63 +42,63 @@ services: networks: - proxynet - redis: - image: redis - # Expose port so we can query it for debugging - ports: - - "6379:6379" - - celery-worker: - build: - context: . - cache_from: - - fbr/application:latest - image: fbr/application:latest - command: celery --app fbr.celery_app worker --task-events --loglevel INFO - entrypoint: '' - volumes: - - .:/app - healthcheck: - test: [ "CMD-SHELL", "python -m dbt_copilot_python.celery_health_check.healthcheck" ] - interval: 10s - timeout: 5s - retries: 2 - start_period: 5s - depends_on: - - redis - - db - environment: - REDIS_ENDPOINT: redis://redis:6379 - DEBUG: true - DJANGO_SETTINGS_MODULE: fbr.settings - RDS_POSTGRES_CREDENTIALS: '{"password":"postgres","dbname":"fbr","engine":"postgres","port":5432,"dbInstanceIdentifier":"xxx","host":"db","username":"postgres"}' - DATABASE_URL: postgres://postgres:postgres@host.docker.internal:5432/fbr # pragma: allowlist secret - - celery-beats: - build: - context: . - cache_from: - - fbr/application:latest - image: fbr/application:latest - command: celery --app fbr.celery_app beat --loglevel INFO - entrypoint: '' - volumes: - - .:/app - healthcheck: - test: [ "CMD-SHELL", "python -m dbt_copilot_python.celery_health_check.healthcheck" ] - interval: 10s - timeout: 5s - retries: 2 - start_period: 5s - depends_on: - - redis - - db - environment: - REDIS_ENDPOINT: redis://redis:6379 - DEBUG: true - DJANGO_SETTINGS_MODULE: fbr.settings - RDS_POSTGRES_CREDENTIALS: '{"password":"postgres","dbname":"fbr","engine":"postgres","port":5432,"dbInstanceIdentifier":"xxx","host":"db","username":"postgres"}' - DATABASE_URL: postgres://postgres:postgres@host.docker.internal:5432/fbr # pragma: allowlist secret +# redis: +# image: redis +# # Expose port so we can query it for debugging +# ports: +# - "6379:6379" +# +# celery-worker: +# build: +# context: . +# cache_from: +# - fbr/application:latest +# image: fbr/application:latest +# command: celery --app fbr.celery_app worker --task-events --loglevel INFO +# entrypoint: '' +# volumes: +# - .:/app +# healthcheck: +# test: [ "CMD-SHELL", "python -m dbt_copilot_python.celery_health_check.healthcheck" ] +# interval: 10s +# timeout: 5s +# retries: 2 +# start_period: 5s +# depends_on: +# - redis +# - db +# environment: +# REDIS_ENDPOINT: redis://redis:6379 +# DEBUG: true +# DJANGO_SETTINGS_MODULE: fbr.settings +# RDS_POSTGRES_CREDENTIALS: '{"password":"postgres","dbname":"fbr","engine":"postgres","port":5432,"dbInstanceIdentifier":"xxx","host":"db","username":"postgres"}' +# DATABASE_URL: postgres://postgres:postgres@host.docker.internal:5432/fbr # pragma: allowlist secret +# +# celery-beats: +# build: +# context: . +# cache_from: +# - fbr/application:latest +# image: fbr/application:latest +# command: celery --app fbr.celery_app beat --loglevel INFO +# entrypoint: '' +# volumes: +# - .:/app +# healthcheck: +# test: [ "CMD-SHELL", "python -m dbt_copilot_python.celery_health_check.healthcheck" ] +# interval: 10s +# timeout: 5s +# retries: 2 +# start_period: 5s +# depends_on: +# - redis +# - db +# environment: +# REDIS_ENDPOINT: redis://redis:6379 +# DEBUG: true +# DJANGO_SETTINGS_MODULE: fbr.settings +# RDS_POSTGRES_CREDENTIALS: '{"password":"postgres","dbname":"fbr","engine":"postgres","port":5432,"dbInstanceIdentifier":"xxx","host":"db","username":"postgres"}' +# DATABASE_URL: postgres://postgres:postgres@host.docker.internal:5432/fbr # pragma: allowlist secret networks: proxynet: diff --git a/docs/cache-rebuild.md b/docs/cache-rebuild.md index 6cada82..ef48e0d 100644 --- a/docs/cache-rebuild.md +++ b/docs/cache-rebuild.md @@ -11,7 +11,7 @@ The `make setup_local_force_rebuild` command is a simple way to rebuild the cach The `make` command is a build automation tool that automatically builds executable programs and libraries from source code by reading files called `Makefiles` which specify how to derive the target program. -However, to rebuild the cache onlly, you can use the `make rebuild_cache` command. +However, to rebuild the cache only, you can use the `make rebuild_cache` command. ## Rebuild the cache on environment using the automated celery task The cache can be rebuilt on the environment using the automated celery task. diff --git a/docs/search-query-examples.md b/docs/search-query-examples.md new file mode 100644 index 0000000..81fb9d4 --- /dev/null +++ b/docs/search-query-examples.md @@ -0,0 +1,70 @@ +# Search Query Examples +This document provides examples of search queries that can be used to test the search functionality of the application. + +## Introduction +Using the search functionality of the application is a key feature. The search functionality allows users to search for +regulations and legislation using keywords. + +## Search query examples +The following are examples of search queries that can be used to test the search functionality of the application ( +using the dev environment, however other environments can be used as well): + +- Search for regulations related to "fire" using all document types: + ```bash + $ curl -X GET "https://dev.find-business-regulations.uktrade.digital/?search=fire&page=1" + ``` + +- Search for regulations related to "fire" using only the "legislation" document type: + ```bash + $ curl -X GET "https://dev.find-business-regulations.uktrade.digital/?search=fire&document_type=legislation&page=1" + ``` + +- Search for regulations related to "fire" using only the "guidance" document type: + ```bash + $ curl -X GET "https://dev.find-business-regulations.uktrade.digital/?search=fire&document_type=guidance&page=1" + ``` + +- Search for regulations related to "fire" using only the "standard" document type: +- ```bash + $ curl -X GET "https://dev.find-business-regulations.uktrade.digital/?search=fire&document_type=standard&page=1" + ``` + +# Create a python mock query (using local environment): + +- Single word +```python + @patch("app.search.utils.search.SearchQuery", autospec=True) + def test_single_word_query(self, mock_search_query): + result = create_search_query("test") + mock_search_query.assert_called_with("test", search_type="plain") + self.assertEqual(result, mock_search_query.return_value) + ``` + +- SQL Injection Prevention +```python + @patch("app.search.utils.search.SearchQuery", autospec=True) +def test_sql_injection_prevention(self, mock_search_query): + malicious_input = "test'; DROP TABLE users; --" + sanitized_query = sanitize_input(malicious_input) + config = SearchDocumentConfig(search_query=sanitized_query) + result = create_search_query(config.search_query) + calls = [ + call("test", search_type="plain"), + call("DROP", search_type="plain"), + call("TABLE", search_type="plain"), + call("users", search_type="plain"), + ] + mock_search_query.assert_has_calls(calls, any_order=False) + self.assertIsNotNone(result) + with self.assertRaises(AssertionError): + mock_search_query.assert_called_with("DROP TABLE users;") + ``` + +- Phase Search Query +```python + @patch("app.search.utils.search.SearchQuery", autospec=True) + def test_phrase_search_query(self, mock_search_query): + result = create_search_query('"test trial"') + mock_search_query.assert_called_with("test trial", search_type="phrase") + self.assertEqual(result, mock_search_query.return_value) + ``` From 84914adb76ee81378df7700b5d8afd97a6fcfd5a Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Mon, 13 Jan 2025 10:09:45 +0000 Subject: [PATCH 05/11] fix:indentation in search query examples and add document URL guide Adjusted inconsistent indentation in code examples within `search-query-examples.md` for improved readability. Added a new guide `documents-url.md` explaining the structure and usage of document URLs, including examples. --- docs/documents-url.md | 33 +++++++++++++++++++++++++++++++++ docs/search-query-examples.md | 34 +++++++++++++++++----------------- 2 files changed, 50 insertions(+), 17 deletions(-) create mode 100644 docs/documents-url.md diff --git a/docs/documents-url.md b/docs/documents-url.md new file mode 100644 index 0000000..002dc23 --- /dev/null +++ b/docs/documents-url.md @@ -0,0 +1,33 @@ +# Documents URL +When selecting a document from the main search page, the user is taken to the document URL. +The document URL is a unique URL that contains the document ID. The document ID is used to retrieve the document from +the database and display it on the page. + +The document URL is constructed as follows (using the dev environment as an example): +``` +https://dev.find-business-regulations.uktrade.digital/document/ +``` + +# Example +For example, if the document ID is `hZtGUlRTTTmz_rtT5RCfsA`, the document URL would be: +``` +https://dev.find-business-regulations.uktrade.digital/document/hZtGUlRTTTmz_rtT5RCfsA +``` + +## Document URL Structure +The document URL is structured as follows: +- The base URL is `https://dev.find-business-regulations.uktrade.digital` +- The path is `/document/` +- The document ID is a unique identifier for the document +- The document ID is used to retrieve the document from the database +- The document is displayed on the page using the document ID +- The document URL is unique for each document +- The document URL is used to access the document directly +- The document URL is used to share the document with others + + +## Conclusion +The document URL is a unique URL that contains the document ID. The document ID is used to retrieve the document from +the database and display it on the page. The document URL is structured as +`https://dev.find-business-regulations.uktrade.digital/document/`. The document URL is unique for each + document and is used to access the document directly and share it with others. diff --git a/docs/search-query-examples.md b/docs/search-query-examples.md index 81fb9d4..11623e0 100644 --- a/docs/search-query-examples.md +++ b/docs/search-query-examples.md @@ -33,7 +33,7 @@ using the dev environment, however other environments can be used as well): - Single word ```python - @patch("app.search.utils.search.SearchQuery", autospec=True) + @patch("app.search.utils.search.SearchQuery", autospec=True) def test_single_word_query(self, mock_search_query): result = create_search_query("test") mock_search_query.assert_called_with("test", search_type="plain") @@ -42,22 +42,22 @@ using the dev environment, however other environments can be used as well): - SQL Injection Prevention ```python - @patch("app.search.utils.search.SearchQuery", autospec=True) -def test_sql_injection_prevention(self, mock_search_query): - malicious_input = "test'; DROP TABLE users; --" - sanitized_query = sanitize_input(malicious_input) - config = SearchDocumentConfig(search_query=sanitized_query) - result = create_search_query(config.search_query) - calls = [ - call("test", search_type="plain"), - call("DROP", search_type="plain"), - call("TABLE", search_type="plain"), - call("users", search_type="plain"), - ] - mock_search_query.assert_has_calls(calls, any_order=False) - self.assertIsNotNone(result) - with self.assertRaises(AssertionError): - mock_search_query.assert_called_with("DROP TABLE users;") + @patch("app.search.utils.search.SearchQuery", autospec=True) + def test_sql_injection_prevention(self, mock_search_query): + malicious_input = "test'; DROP TABLE users; --" + sanitized_query = sanitize_input(malicious_input) + config = SearchDocumentConfig(search_query=sanitized_query) + result = create_search_query(config.search_query) + calls = [ + call("test", search_type="plain"), + call("DROP", search_type="plain"), + call("TABLE", search_type="plain"), + call("users", search_type="plain"), + ] + mock_search_query.assert_has_calls(calls, any_order=False) + self.assertIsNotNone(result) + with self.assertRaises(AssertionError): + mock_search_query.assert_called_with("DROP TABLE users;") ``` - Phase Search Query From 09dafcc5345d1b8228e8cc89568b5ffde51eb3ba Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Mon, 13 Jan 2025 16:59:30 +0000 Subject: [PATCH 06/11] temp:Add test-sentry endpoint for logging integration tests [will be removed in next commit] Introduce a test-sentry endpoint in URLs and core views to send test log messages with different levels (INFO, ERROR, WARNING, DEBUG). This is useful for verifying Sentry logging configuration and integration functionality. --- app/core/views.py | 13 +++++++++++++ fbr/urls.py | 1 + 2 files changed, 14 insertions(+) diff --git a/app/core/views.py b/app/core/views.py index 21bfd13..aa8b4dd 100644 --- a/app/core/views.py +++ b/app/core/views.py @@ -32,6 +32,19 @@ def home(request: HttpRequest) -> HttpResponse: return render(request, template_name="home.html", context=context) +@require_http_methods(["GET"]) +def test_sentry(request): + import logging + + logger = logging.getLogger(__name__) + + logger.info("sentry: [INFO] find business regulations test message") + logger.error("sentry: [ERROR] find business regulations test message") + logger.warning("sentry: [WARNING] find business regulations test message") + logger.debug("sentry: [DEBUG] find business regulations test message") + return HttpResponse("Log message written", status=200) + + @require_safe def health_check(request: HttpRequest) -> HttpResponse: """Healthcheck endpoint. diff --git a/fbr/urls.py b/fbr/urls.py index ecd1336..0f788dc 100644 --- a/fbr/urls.py +++ b/fbr/urls.py @@ -134,6 +134,7 @@ def read_cachetempdata(self, request, *args, **kwargs): name="hide-cookie-banner", ), # path("search/", orp_views.search, name="search"), + path("test-sentry/", core_views.test_sentry, name="test-sentry"), ] if settings.DJANGO_ADMIN: From d1edcde87f0c16584d19348f5b4fbc9af9b84249 Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Mon, 13 Jan 2025 17:27:27 +0000 Subject: [PATCH 07/11] chore:add Sentry SDK initialization and logging enhancements This commit updates the Sentry SDK setup to use a cleaner and more explicit approach by checking `SENTRY_DSN` once at the beginning. It also adds an info-level log message when Sentry is disabled and improves logging configuration by explicitly adding Sentry to loggers, ensuring better observability. --- fbr/settings.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fbr/settings.py b/fbr/settings.py index b9b0859..46cad85 100644 --- a/fbr/settings.py +++ b/fbr/settings.py @@ -14,6 +14,7 @@ configured deployments. """ +import logging import os from pathlib import Path @@ -21,9 +22,11 @@ import dj_database_url import environ +import sentry_sdk from dbt_copilot_python.database import database_url_from_env from django_log_formatter_asim import ASIMFormatter +from sentry_sdk.integrations.django import DjangoIntegration # Define the root directory (i.e. ) root = environ.Path(__file__) - 4 # i.e. Repository root @@ -44,15 +47,12 @@ ) # Only init sentry if the SENTRY_DSN is provided -if os.environ.get("SENTRY_DSN"): - import sentry_sdk - - from sentry_sdk.integrations.django import DjangoIntegration - - sentry_sdk.init( - os.environ.get("SENTRY_DSN"), - integrations=[DjangoIntegration()], - ) +# Sentry set up: +SENTRY_DSN = os.environ.get("SENTRY_DSN", None) +if SENTRY_DSN: + sentry_sdk.init(dsn=SENTRY_DSN, integrations=[DjangoIntegration()]) +else: + logging.getLogger(__name__).info("SENTRY_DSN not set. Sentry is disabled.") DEBUG = env("DEBUG", default=False) @@ -285,7 +285,8 @@ }, } -if os.environ.get("SENTRY_DSN"): +if SENTRY_DSN: + logging.getLogger(__name__).info("added sentry to logging config") LOGGING["loggers"]["sentry_sdk"] = { "level": "ERROR", "handlers": ["asim"], From d580386f7abf1774aa2fc93f0e53685f951c82a7 Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Mon, 13 Jan 2025 17:47:30 +0000 Subject: [PATCH 08/11] minorr:enable logging messages for Sentry initialization status Add logging messages to provide clarity on the Sentry configuration state. Log when Sentry is enabled or disabled, and skip related logging configuration if Sentry is not active. --- fbr/settings.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fbr/settings.py b/fbr/settings.py index 46cad85..1575112 100644 --- a/fbr/settings.py +++ b/fbr/settings.py @@ -51,6 +51,7 @@ SENTRY_DSN = os.environ.get("SENTRY_DSN", None) if SENTRY_DSN: sentry_sdk.init(dsn=SENTRY_DSN, integrations=[DjangoIntegration()]) + logging.getLogger(__name__).info("SENTRY_DSN set. Sentry is enabled.") else: logging.getLogger(__name__).info("SENTRY_DSN not set. Sentry is disabled.") @@ -292,6 +293,10 @@ "handlers": ["asim"], "propagate": False, } +else: + logging.getLogger(__name__).info( + "sentry not enabled. skipping sentry logging config" + ) # Django Log Formatter ASIM settings # See https://github.com/uktrade/django-log-formatter-asim#settings From ab9a60095de4a2064e8c8da78083f204b9a273ea Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Tue, 14 Jan 2025 16:25:13 +0000 Subject: [PATCH 09/11] update:Sentry setup and refine logging configuration Added `traces_sample_rate` to Sentry initialization and removed redundant Sentry logging configuration. Updated `.gitignore` to exclude sensitive `key.pem` files. These changes improve Sentry performance and enhance security practices. --- .gitignore | 1 + fbr/settings.py | 19 ++++++------------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index d7082bf..ea717fa 100644 --- a/.gitignore +++ b/.gitignore @@ -116,3 +116,4 @@ webpack-stats.json # Other .DS_Store +/key.pem diff --git a/fbr/settings.py b/fbr/settings.py index 1575112..5c26cbc 100644 --- a/fbr/settings.py +++ b/fbr/settings.py @@ -47,10 +47,15 @@ ) # Only init sentry if the SENTRY_DSN is provided +# TODO: add SENTRY_TRACES_SAMPLE_RATE to secrets default value of 0.0 # Sentry set up: SENTRY_DSN = os.environ.get("SENTRY_DSN", None) if SENTRY_DSN: - sentry_sdk.init(dsn=SENTRY_DSN, integrations=[DjangoIntegration()]) + sentry_sdk.init( + dsn=SENTRY_DSN, + integrations=[DjangoIntegration()], + traces_sample_rate=env("SENTRY_TRACES_SAMPLE_RATE", default=1.0), + ) logging.getLogger(__name__).info("SENTRY_DSN set. Sentry is enabled.") else: logging.getLogger(__name__).info("SENTRY_DSN not set. Sentry is disabled.") @@ -286,18 +291,6 @@ }, } -if SENTRY_DSN: - logging.getLogger(__name__).info("added sentry to logging config") - LOGGING["loggers"]["sentry_sdk"] = { - "level": "ERROR", - "handlers": ["asim"], - "propagate": False, - } -else: - logging.getLogger(__name__).info( - "sentry not enabled. skipping sentry logging config" - ) - # Django Log Formatter ASIM settings # See https://github.com/uktrade/django-log-formatter-asim#settings DLFA_TRACE_HEADERS = ("X-B3-TraceId", "X-B3-SpanId") From 68b648304945249899b47e4badf0f7e04be523e1 Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Tue, 14 Jan 2025 17:10:29 +0000 Subject: [PATCH 10/11] chore:increase Celery task time limit to 30 minutes. Updated the `CELERY_TASK_TIME_LIMIT` from 15 minutes to 30 minutes to accommodate longer-running tasks. This change ensures smoother operation for tasks requiring extended processing time. --- fbr/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fbr/settings.py b/fbr/settings.py index 5c26cbc..5783f56 100644 --- a/fbr/settings.py +++ b/fbr/settings.py @@ -197,7 +197,7 @@ CELERY_BEAT_SCHEDULER = "django_celery_beat.schedulers.DatabaseScheduler" CELERY_RESULT_EXTENDED = True CELERY_TASK_TIME_LIMIT = ( - 900 # Maximum runtime for a task in seconds (e.g., 900/60 = 15 minutes) + 1800 # Maximum runtime for a task in seconds (e.g., 1800/60 = 30 minutes) ) CELERY_TASK_SOFT_TIME_LIMIT = ( 270 # Optional: Grace period before forced termination From 6e3597c432a3010204d87539c04519dbc2172f94 Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Tue, 14 Jan 2025 18:06:21 +0000 Subject: [PATCH 11/11] remove:test_sentry endpoint from core views and URLs The test_sentry endpoint and its associated URL were removed as it is no longer needed. This cleanup helps eliminate unused code, improving maintainability and reducing potential clutter in the project. --- app/core/views.py | 13 ------------- fbr/urls.py | 1 - 2 files changed, 14 deletions(-) diff --git a/app/core/views.py b/app/core/views.py index aa8b4dd..21bfd13 100644 --- a/app/core/views.py +++ b/app/core/views.py @@ -32,19 +32,6 @@ def home(request: HttpRequest) -> HttpResponse: return render(request, template_name="home.html", context=context) -@require_http_methods(["GET"]) -def test_sentry(request): - import logging - - logger = logging.getLogger(__name__) - - logger.info("sentry: [INFO] find business regulations test message") - logger.error("sentry: [ERROR] find business regulations test message") - logger.warning("sentry: [WARNING] find business regulations test message") - logger.debug("sentry: [DEBUG] find business regulations test message") - return HttpResponse("Log message written", status=200) - - @require_safe def health_check(request: HttpRequest) -> HttpResponse: """Healthcheck endpoint. diff --git a/fbr/urls.py b/fbr/urls.py index 0f788dc..ecd1336 100644 --- a/fbr/urls.py +++ b/fbr/urls.py @@ -134,7 +134,6 @@ def read_cachetempdata(self, request, *args, **kwargs): name="hide-cookie-banner", ), # path("search/", orp_views.search, name="search"), - path("test-sentry/", core_views.test_sentry, name="test-sentry"), ] if settings.DJANGO_ADMIN: