Skip to content

Commit

Permalink
Merge branch 'main' into feature/security-pen-test
Browse files Browse the repository at this point in the history
# Conflicts:
#	.gitignore
  • Loading branch information
hareshkainthdbt committed Jan 15, 2025
2 parents d48bcbf + d7cd7be commit 928316e
Show file tree
Hide file tree
Showing 13 changed files with 246 additions and 391 deletions.
4 changes: 0 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,3 @@ webpack-stats.json
# Other
.DS_Store
/key.pem
/-out
/cert.crt
/cert.key
/cert.pem
8 changes: 3 additions & 5 deletions app/search/tests/test_search.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# flake8: noqa

import os
import re
import unittest

from unittest.mock import MagicMock, call, patch

import django

from app.search.config import SearchDocumentConfig
from app.search.utils.search import create_search_query
from app.search.utils.terms import sanitize_input
Expand Down Expand Up @@ -382,7 +384,3 @@ def test_single_or_and_search_operator_query(self, mock_search_query):
# Assert the OR and AND operation was applied
mock_query1.__or__.assert_called_with(mock_query2)
# mock_query2.__and__.assert_called_with(mock_query3) # TODO:fix assert


if __name__ == "__main__":
unittest.main()
118 changes: 59 additions & 59 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
volumes:
postgres_data:
driver: local
redis_data:
driver: local
# redis_data:
# driver: local

services:
db:
Expand Down Expand Up @@ -43,63 +43,63 @@ services:
networks:
- proxynet

redis:
image: redis
# Expose port so we can query it for debugging
ports:
- "6379:6379"

celery-worker:
build:
context: .
cache_from:
- fbr/application:latest
image: fbr/application:latest
command: celery --app fbr.celery_app worker --task-events --loglevel INFO
entrypoint: ''
volumes:
- .:/app
healthcheck:
test: [ "CMD-SHELL", "python -m dbt_copilot_python.celery_health_check.healthcheck" ]
interval: 10s
timeout: 5s
retries: 2
start_period: 5s
depends_on:
- redis
- db
environment:
REDIS_ENDPOINT: redis://redis:6379
DEBUG: true
DJANGO_SETTINGS_MODULE: fbr.settings
RDS_POSTGRES_CREDENTIALS: '{"password":"postgres","dbname":"fbr","engine":"postgres","port":5432,"dbInstanceIdentifier":"xxx","host":"db","username":"postgres"}'
DATABASE_URL: postgres://postgres:[email protected]:5432/fbr # pragma: allowlist secret

celery-beats:
build:
context: .
cache_from:
- fbr/application:latest
image: fbr/application:latest
command: celery --app fbr.celery_app beat --loglevel INFO
entrypoint: ''
volumes:
- .:/app
healthcheck:
test: [ "CMD-SHELL", "python -m dbt_copilot_python.celery_health_check.healthcheck" ]
interval: 10s
timeout: 5s
retries: 2
start_period: 5s
depends_on:
- redis
- db
environment:
REDIS_ENDPOINT: redis://redis:6379
DEBUG: true
DJANGO_SETTINGS_MODULE: fbr.settings
RDS_POSTGRES_CREDENTIALS: '{"password":"postgres","dbname":"fbr","engine":"postgres","port":5432,"dbInstanceIdentifier":"xxx","host":"db","username":"postgres"}'
DATABASE_URL: postgres://postgres:[email protected]:5432/fbr # pragma: allowlist secret
# redis:
# image: redis
# # Expose port so we can query it for debugging
# ports:
# - "6379:6379"
#
# celery-worker:
# build:
# context: .
# cache_from:
# - fbr/application:latest
# image: fbr/application:latest
# command: celery --app fbr.celery_app worker --task-events --loglevel INFO
# entrypoint: ''
# volumes:
# - .:/app
# healthcheck:
# test: [ "CMD-SHELL", "python -m dbt_copilot_python.celery_health_check.healthcheck" ]
# interval: 10s
# timeout: 5s
# retries: 2
# start_period: 5s
# depends_on:
# - redis
# - db
# environment:
# REDIS_ENDPOINT: redis://redis:6379
# DEBUG: true
# DJANGO_SETTINGS_MODULE: fbr.settings
# RDS_POSTGRES_CREDENTIALS: '{"password":"postgres","dbname":"fbr","engine":"postgres","port":5432,"dbInstanceIdentifier":"xxx","host":"db","username":"postgres"}'
# DATABASE_URL: postgres://postgres:[email protected]:5432/fbr # pragma: allowlist secret
#
# celery-beats:
# build:
# context: .
# cache_from:
# - fbr/application:latest
# image: fbr/application:latest
# command: celery --app fbr.celery_app beat --loglevel INFO
# entrypoint: ''
# volumes:
# - .:/app
# healthcheck:
# test: [ "CMD-SHELL", "python -m dbt_copilot_python.celery_health_check.healthcheck" ]
# interval: 10s
# timeout: 5s
# retries: 2
# start_period: 5s
# depends_on:
# - redis
# - db
# environment:
# REDIS_ENDPOINT: redis://redis:6379
# DEBUG: true
# DJANGO_SETTINGS_MODULE: fbr.settings
# RDS_POSTGRES_CREDENTIALS: '{"password":"postgres","dbname":"fbr","engine":"postgres","port":5432,"dbInstanceIdentifier":"xxx","host":"db","username":"postgres"}'
# DATABASE_URL: postgres://postgres:[email protected]:5432/fbr # pragma: allowlist secret

networks:
proxynet:
Expand Down
46 changes: 46 additions & 0 deletions docs/cache-rebuild.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Cache rebuild

## Introduction
There are a few ways to rebuild the cache. The cache is a collection of data from legislation and data workspace.
The cache data is stored in a postgres database that is used to store the data that is used to build the search index.

This document outlies the steps to rebuild the cache using three different methods.

## Rebuild the cache locally using the `make` command
The `make setup_local_force_rebuild` command is a simple way to rebuild the cache locally including the entire service.
The `make` command is a build automation tool that automatically builds executable programs and libraries from source
code by reading files called `Makefiles` which specify how to derive the target program.

However, to rebuild the cache only, you can use the `make rebuild_cache` command.

## Rebuild the cache on environment using the automated celery task
The cache can be rebuilt on the environment using the automated celery task.
The task is scheduled to run every 24 hours as a cron job. The task is defined in the `tasks.py` file in the
`celery_worker` directory. However, the actual task is defined in the `celery_app.py` file in the `fbr` directory.

At present is set to run at 10:00 PM every day. To edit the time then change the following code:
```python
celery_app.conf.beat_schedule = {
"schedule-fbr-cache-task": {
"task": "celery_worker.tasks.rebuild_cache",
"schedule": crontab(hour="22", minute="00"), # Runs daily at 10:00 PM
},
}
```

## Rebuild the cache on environment using the django management command
The cache can be rebuilt on the environment using the django management command.
The command is defined in the `management/commands` directory in the `fbr` directory.

To run the command, use the following command:
```bash
$ poetry run python manage.py rebuild_cache
```

## Conclusion
The cache is a collection of data from legislation and data workspace. The cache data is stored in a postgres database
that is used to store the data that is used to build the search index. The cache can be rebuilt using the `make`
command, the automated celery task, or the django management command. The automated celery task is scheduled to run
every 24 hours as a cron job. The cache can be rebuilt on the environment using the automated celery task or the django
management command. The cache is an important part of the application and should be rebuilt regularly to ensure that the
data is up to date.
33 changes: 33 additions & 0 deletions docs/documents-url.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Documents URL
When selecting a document from the main search page, the user is taken to the document URL.
The document URL is a unique URL that contains the document ID. The document ID is used to retrieve the document from
the database and display it on the page.

The document URL is constructed as follows (using the dev environment as an example):
```
https://dev.find-business-regulations.uktrade.digital/document/<document_id>
```

# Example
For example, if the document ID is `hZtGUlRTTTmz_rtT5RCfsA`, the document URL would be:
```
https://dev.find-business-regulations.uktrade.digital/document/hZtGUlRTTTmz_rtT5RCfsA
```

## Document URL Structure
The document URL is structured as follows:
- The base URL is `https://dev.find-business-regulations.uktrade.digital`
- The path is `/document/<document_id>`
- The document ID is a unique identifier for the document
- The document ID is used to retrieve the document from the database
- The document is displayed on the page using the document ID
- The document URL is unique for each document
- The document URL is used to access the document directly
- The document URL is used to share the document with others


## Conclusion
The document URL is a unique URL that contains the document ID. The document ID is used to retrieve the document from
the database and display it on the page. The document URL is structured as
`https://dev.find-business-regulations.uktrade.digital/document/<document_id>`. The document URL is unique for each
document and is used to access the document directly and share it with others.
70 changes: 70 additions & 0 deletions docs/search-query-examples.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Search Query Examples
This document provides examples of search queries that can be used to test the search functionality of the application.

## Introduction
Using the search functionality of the application is a key feature. The search functionality allows users to search for
regulations and legislation using keywords.

## Search query examples
The following are examples of search queries that can be used to test the search functionality of the application (
using the dev environment, however other environments can be used as well):

- Search for regulations related to "fire" using all document types:
```bash
$ curl -X GET "https://dev.find-business-regulations.uktrade.digital/?search=fire&page=1"
```

- Search for regulations related to "fire" using only the "legislation" document type:
```bash
$ curl -X GET "https://dev.find-business-regulations.uktrade.digital/?search=fire&document_type=legislation&page=1"
```

- Search for regulations related to "fire" using only the "guidance" document type:
```bash
$ curl -X GET "https://dev.find-business-regulations.uktrade.digital/?search=fire&document_type=guidance&page=1"
```

- Search for regulations related to "fire" using only the "standard" document type:
- ```bash
$ curl -X GET "https://dev.find-business-regulations.uktrade.digital/?search=fire&document_type=standard&page=1"
```

# Create a python mock query (using local environment):

- Single word
```python
@patch("app.search.utils.search.SearchQuery", autospec=True)
def test_single_word_query(self, mock_search_query):
result = create_search_query("test")
mock_search_query.assert_called_with("test", search_type="plain")
self.assertEqual(result, mock_search_query.return_value)
```

- SQL Injection Prevention
```python
@patch("app.search.utils.search.SearchQuery", autospec=True)
def test_sql_injection_prevention(self, mock_search_query):
malicious_input = "test'; DROP TABLE users; --"
sanitized_query = sanitize_input(malicious_input)
config = SearchDocumentConfig(search_query=sanitized_query)
result = create_search_query(config.search_query)
calls = [
call("test", search_type="plain"),
call("DROP", search_type="plain"),
call("TABLE", search_type="plain"),
call("users", search_type="plain"),
]
mock_search_query.assert_has_calls(calls, any_order=False)
self.assertIsNotNone(result)
with self.assertRaises(AssertionError):
mock_search_query.assert_called_with("DROP TABLE users;")
```

- Phase Search Query
```python
@patch("app.search.utils.search.SearchQuery", autospec=True)
def test_phrase_search_query(self, mock_search_query):
result = create_search_query('"test trial"')
mock_search_query.assert_called_with("test trial", search_type="phrase")
self.assertEqual(result, mock_search_query.return_value)
```
16 changes: 16 additions & 0 deletions docs/search-tests.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Run Search Tests

This document describes how to run the search tests.

## Introduction

The search tests are a collection of tests that check the search functionality of the application. The tests are written
in Python using the `pytest` testing framework. The tests are run using the `pytest` command.

## Running the tests

To run the search tests, use the following command:

```bash
$ python manage.py test
```
3 changes: 0 additions & 3 deletions fbr/config/__init__.py

This file was deleted.

Empty file removed fbr/config/settings/__init__.py
Empty file.
Loading

0 comments on commit 928316e

Please sign in to comment.