From 611724b03facb35df07df4ab3ffa7914735b260b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Carlos=20Jos=C3=A9=20Camacho?= Date: Fri, 15 Dec 2023 09:58:31 -0600 Subject: [PATCH] [DH-4306] Split db methods for cardinality and logs (#269) * [DH-4306] Split db methods for cardinality and logs * Improve query history method for Snowflake and BigQuery * Document query_history endpoint --- README.md | 27 +++++- dataherald/api/__init__.py | 6 +- dataherald/api/fastapi.py | 15 ++- dataherald/db_scanner/__init__.py | 2 + dataherald/db_scanner/models/types.py | 9 ++ .../db_scanner/repository/query_history.py | 31 ++++++ dataherald/db_scanner/services/__init__.py | 0 .../db_scanner/services/abstract_scanner.py | 20 ++++ .../db_scanner/services/base_scanner.py | 28 ++++++ .../db_scanner/services/big_query_scanner.py | 52 ++++++++++ .../services/postgre_sql_scanner.py | 30 ++++++ .../db_scanner/services/snowflake_scanner.py | 53 +++++++++++ dataherald/db_scanner/sqlalchemy.py | 95 ++++++++----------- dataherald/server/fastapi/__init__.py | 13 ++- docs/api.list_query_history.rst | 59 ++++++++++++ docs/api.rst | 1 + docs/api.scan_table_description.rst | 5 +- 17 files changed, 386 insertions(+), 60 deletions(-) create mode 100644 dataherald/db_scanner/repository/query_history.py create mode 100644 dataherald/db_scanner/services/__init__.py create mode 100644 dataherald/db_scanner/services/abstract_scanner.py create mode 100644 dataherald/db_scanner/services/base_scanner.py create mode 100644 dataherald/db_scanner/services/big_query_scanner.py create mode 100644 dataherald/db_scanner/services/postgre_sql_scanner.py create mode 100644 dataherald/db_scanner/services/snowflake_scanner.py create mode 100644 docs/api.list_query_history.rst diff --git a/README.md b/README.md index 94ed107d..4a66f3f0 100644 --- a/README.md +++ b/README.md @@ -263,7 +263,9 @@ Once you have connected to the data warehouse, you should add context to the eng While only the Database scan part is required to start generating SQL, adding verified SQL and string descriptions are also important for the tool to generate accurate SQL. #### Scanning the Database -The database scan is used to gather information about the database including table and column names and identifying low cardinality columns and their values to be stored in the context store and used in the prompts to the LLM. You can trigger a scan of a database from the `POST /api/v1/table-descriptions/sync-schemas` endpoint. Example below +The database scan is used to gather information about the database including table and column names and identifying low cardinality columns and their values to be stored in the context store and used in the prompts to the LLM. +In addition, it retrieves logs, which consist of historical queries associated with each database table. These records are then stored within the query_history collection. The historical queries retrieved encompass data from the past three months and are grouped based on query and user. +You can trigger a scan of a database from the `POST /api/v1/table-descriptions/sync-schemas` endpoint. Example below ``` @@ -279,6 +281,29 @@ curl -X 'POST' \ Since the endpoint identifies low cardinality columns (and their values) it can take time to complete. Therefore while it is possible to trigger a scan on the entire DB by not specifying the `table_names`, we recommend against it for large databases. +#### Get logs per db connection +Once a database was scanned you can use this endpoint to retrieve the tables logs + +``` +curl -X 'GET' \ + 'http://localhost/api/v1/query-history?db_connection_id=656e52cb4d1fda50cae7b939' \ + -H 'accept: application/json' +``` + +Response example: +``` +[ + { + "id": "656e52cb4d1fda50cae7b939", + "db_connection_id": "656e52cb4d1fda50cae7b939", + "table_name": "table_name", + "query": "select QUERY_TEXT, USER_NAME, count(*) as occurrences from ....", + "user": "user_name", + "occurrences": 1 + } +] +``` + #### Get a scanned db Once a database was scanned you can use this endpoint to retrieve the tables names and columns diff --git a/dataherald/api/__init__.py b/dataherald/api/__init__.py index fdbc8f96..7e1f7854 100644 --- a/dataherald/api/__init__.py +++ b/dataherald/api/__init__.py @@ -6,7 +6,7 @@ from dataherald.api.types import Query from dataherald.config import Component -from dataherald.db_scanner.models.types import TableDescription +from dataherald.db_scanner.models.types import QueryHistory, TableDescription from dataherald.sql_database.models.types import DatabaseConnection, SSHSettings from dataherald.types import ( CancelFineTuningRequest, @@ -125,6 +125,10 @@ def create_response( ) -> Response: pass + @abstractmethod + def get_query_history(self, db_connection_id: str) -> list[QueryHistory]: + pass + @abstractmethod def get_responses(self, question_id: str | None = None) -> list[Response]: pass diff --git a/dataherald/api/fastapi.py b/dataherald/api/fastapi.py index 7c614143..bb3abb78 100644 --- a/dataherald/api/fastapi.py +++ b/dataherald/api/fastapi.py @@ -19,11 +19,16 @@ from dataherald.context_store import ContextStore from dataherald.db import DB from dataherald.db_scanner import Scanner -from dataherald.db_scanner.models.types import TableDescription, TableDescriptionStatus +from dataherald.db_scanner.models.types import ( + QueryHistory, + TableDescription, + TableDescriptionStatus, +) from dataherald.db_scanner.repository.base import ( InvalidColumnNameError, TableDescriptionRepository, ) +from dataherald.db_scanner.repository.query_history import QueryHistoryRepository from dataherald.eval import Evaluator from dataherald.finetuning.openai_finetuning import OpenAIFineTuning from dataherald.repositories.base import ResponseRepository @@ -71,6 +76,7 @@ def async_scanning(scanner, database, scanner_request, storage): scanner_request.db_connection_id, scanner_request.table_names, TableDescriptionRepository(storage), + QueryHistoryRepository(storage), ) @@ -381,6 +387,13 @@ def get_table_description(self, table_description_id: str) -> TableDescription: raise HTTPException(status_code=404, detail="Table description not found") return result + @override + def get_query_history(self, db_connection_id: str) -> list[QueryHistory]: + query_history_repository = QueryHistoryRepository(self.storage) + return query_history_repository.find_by( + {"db_connection_id": ObjectId(db_connection_id)} + ) + @override def get_responses(self, question_id: str | None = None) -> list[Response]: response_repository = ResponseRepository(self.storage) diff --git a/dataherald/db_scanner/__init__.py b/dataherald/db_scanner/__init__.py index da5e2c68..a98eecd4 100644 --- a/dataherald/db_scanner/__init__.py +++ b/dataherald/db_scanner/__init__.py @@ -3,6 +3,7 @@ from dataherald.config import Component from dataherald.db_scanner.repository.base import TableDescriptionRepository +from dataherald.db_scanner.repository.query_history import QueryHistoryRepository from dataherald.sql_database.base import SQLDatabase @@ -14,6 +15,7 @@ def scan( db_connection_id: str, table_names: list[str] | None, repository: TableDescriptionRepository, + query_history_repository: QueryHistoryRepository, ) -> None: """ "Scan a db""" diff --git a/dataherald/db_scanner/models/types.py b/dataherald/db_scanner/models/types.py index cf29700d..c29e912d 100644 --- a/dataherald/db_scanner/models/types.py +++ b/dataherald/db_scanner/models/types.py @@ -45,3 +45,12 @@ def parse_datetime_with_timezone(cls, value): if not value: return None return value.replace(tzinfo=timezone.utc) # Set the timezone to UTC + + +class QueryHistory(BaseModel): + id: str | None + db_connection_id: str + table_name: str + query: str + user: str + occurrences: int = 0 diff --git a/dataherald/db_scanner/repository/query_history.py b/dataherald/db_scanner/repository/query_history.py new file mode 100644 index 00000000..d86120a3 --- /dev/null +++ b/dataherald/db_scanner/repository/query_history.py @@ -0,0 +1,31 @@ +from bson.objectid import ObjectId + +from dataherald.db_scanner.models.types import QueryHistory + +DB_COLLECTION = "query_history" + + +class QueryHistoryRepository: + def __init__(self, storage): + self.storage = storage + + def insert(self, query_history: QueryHistory) -> QueryHistory: + query_history_dict = query_history.dict(exclude={"id"}) + query_history_dict["db_connection_id"] = ObjectId( + query_history.db_connection_id + ) + query_history.id = str( + self.storage.insert_one(DB_COLLECTION, query_history_dict) + ) + return query_history + + def find_by( + self, query: dict, page: int = 1, limit: int = 10 + ) -> list[QueryHistory]: + rows = self.storage.find(DB_COLLECTION, query, page=page, limit=limit) + result = [] + for row in rows: + row["id"] = str(row["_id"]) + row["db_connection_id"] = str(row["db_connection_id"]) + result.append(QueryHistory(**row)) + return result diff --git a/dataherald/db_scanner/services/__init__.py b/dataherald/db_scanner/services/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dataherald/db_scanner/services/abstract_scanner.py b/dataherald/db_scanner/services/abstract_scanner.py new file mode 100644 index 00000000..27a36056 --- /dev/null +++ b/dataherald/db_scanner/services/abstract_scanner.py @@ -0,0 +1,20 @@ +from abc import ABC, abstractmethod + +from sqlalchemy.sql.schema import Column + +from dataherald.db_scanner.models.types import QueryHistory +from dataherald.sql_database.base import SQLDatabase + + +class AbstractScanner(ABC): + @abstractmethod + def cardinality_values(self, column: Column, db_engine: SQLDatabase) -> list | None: + """Returns a list if it is a catalog otherwise return None""" + pass + + @abstractmethod + def get_logs( + self, table: str, db_engine: SQLDatabase, db_connection_id: str + ) -> list[QueryHistory]: + """Returns a list of logs""" + pass diff --git a/dataherald/db_scanner/services/base_scanner.py b/dataherald/db_scanner/services/base_scanner.py new file mode 100644 index 00000000..9c9af732 --- /dev/null +++ b/dataherald/db_scanner/services/base_scanner.py @@ -0,0 +1,28 @@ +import sqlalchemy +from overrides import override +from sqlalchemy.sql import func +from sqlalchemy.sql.schema import Column + +from dataherald.db_scanner.models.types import QueryHistory +from dataherald.db_scanner.services.abstract_scanner import AbstractScanner +from dataherald.sql_database.base import SQLDatabase + +MIN_CATEGORY_VALUE = 1 +MAX_CATEGORY_VALUE = 100 + + +class BaseScanner(AbstractScanner): + @override + def cardinality_values(self, column: Column, db_engine: SQLDatabase) -> list | None: + cardinality_query = sqlalchemy.select([func.distinct(column)]).limit(101) + cardinality = db_engine.engine.execute(cardinality_query).fetchall() + + if MAX_CATEGORY_VALUE > len(cardinality) > MIN_CATEGORY_VALUE: + return [str(category[0]) for category in cardinality] + return None + + @override + def get_logs( + self, table: str, db_engine: SQLDatabase, db_connection_id: str # noqa: ARG002 + ) -> list[QueryHistory]: + return [] diff --git a/dataherald/db_scanner/services/big_query_scanner.py b/dataherald/db_scanner/services/big_query_scanner.py new file mode 100644 index 00000000..0726a837 --- /dev/null +++ b/dataherald/db_scanner/services/big_query_scanner.py @@ -0,0 +1,52 @@ +from datetime import datetime, timedelta + +import sqlalchemy +from overrides import override +from sqlalchemy.sql import func +from sqlalchemy.sql.schema import Column + +from dataherald.db_scanner.models.types import QueryHistory +from dataherald.db_scanner.services.abstract_scanner import AbstractScanner +from dataherald.sql_database.base import SQLDatabase + +MIN_CATEGORY_VALUE = 1 +MAX_CATEGORY_VALUE = 100 +MAX_LOGS = 5_000 + + +class BigQueryScanner(AbstractScanner): + @override + def cardinality_values(self, column: Column, db_engine: SQLDatabase) -> list | None: + rs = db_engine.engine.execute( + f"SELECT APPROX_COUNT_DISTINCT({column.name}) FROM {column.table.name}" # noqa: S608 E501 + ).fetchall() + + if ( + len(rs) > 0 + and len(rs[0]) > 0 + and MIN_CATEGORY_VALUE < rs[0][0] <= MAX_CATEGORY_VALUE + ): + cardinality_query = sqlalchemy.select([func.distinct(column)]).limit(101) + cardinality = db_engine.engine.execute(cardinality_query).fetchall() + return [str(category[0]) for category in cardinality] + + return None + + @override + def get_logs( + self, table: str, db_engine: SQLDatabase, db_connection_id: str + ) -> list[QueryHistory]: + filter_date = (datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d") + rows = db_engine.engine.execute( + f"SELECT query, user_email, count(*) as occurrences FROM `region-us.INFORMATION_SCHEMA.JOBS`, UNNEST(referenced_tables) AS t where job_type = 'QUERY' and statement_type = 'SELECT' and t.table_id = '{table}' and state = 'DONE' and creation_time >='{filter_date}' group by query, user_email ORDER BY occurrences DESC limit {MAX_LOGS}" # noqa: S608 E501 + ).fetchall() + return [ + QueryHistory( + db_connection_id=db_connection_id, + table_name=table, + query=row[0], + user=row[1], + occurrences=row[2], + ) + for row in rows + ] diff --git a/dataherald/db_scanner/services/postgre_sql_scanner.py b/dataherald/db_scanner/services/postgre_sql_scanner.py new file mode 100644 index 00000000..ddb9b505 --- /dev/null +++ b/dataherald/db_scanner/services/postgre_sql_scanner.py @@ -0,0 +1,30 @@ +from overrides import override +from sqlalchemy.sql.schema import Column + +from dataherald.db_scanner.models.types import QueryHistory +from dataherald.db_scanner.services.abstract_scanner import AbstractScanner +from dataherald.sql_database.base import SQLDatabase + +MIN_CATEGORY_VALUE = 1 +MAX_CATEGORY_VALUE = 100 + + +class PostgreSqlScanner(AbstractScanner): + @override + def cardinality_values(self, column: Column, db_engine: SQLDatabase) -> list | None: + rs = db_engine.engine.execute( + f"SELECT n_distinct, most_common_vals::TEXT::TEXT[] FROM pg_catalog.pg_stats WHERE tablename = '{column.table.name}' AND attname = '{column.name}'" # noqa: S608 E501 + ).fetchall() + + if ( + len(rs) > 0 + and MIN_CATEGORY_VALUE < rs[0]["n_distinct"] <= MAX_CATEGORY_VALUE + ): + return rs[0]["most_common_vals"] + return None + + @override + def get_logs( + self, table: str, db_engine: SQLDatabase, db_connection_id: str # noqa: ARG002 + ) -> list[QueryHistory]: + return [] diff --git a/dataherald/db_scanner/services/snowflake_scanner.py b/dataherald/db_scanner/services/snowflake_scanner.py new file mode 100644 index 00000000..3285cf90 --- /dev/null +++ b/dataherald/db_scanner/services/snowflake_scanner.py @@ -0,0 +1,53 @@ +from datetime import datetime, timedelta + +import sqlalchemy +from overrides import override +from sqlalchemy.sql import func +from sqlalchemy.sql.schema import Column + +from dataherald.db_scanner.models.types import QueryHistory +from dataherald.db_scanner.services.abstract_scanner import AbstractScanner +from dataherald.sql_database.base import SQLDatabase + +MIN_CATEGORY_VALUE = 1 +MAX_CATEGORY_VALUE = 100 +MAX_LOGS = 5_000 + + +class SnowflakeScanner(AbstractScanner): + @override + def cardinality_values(self, column: Column, db_engine: SQLDatabase) -> list | None: + rs = db_engine.engine.execute( + f"select HLL({column.name}) from {column.table.name}" # noqa: S608 E501 + ).fetchall() + + if ( + len(rs) > 0 + and len(rs[0]) > 0 + and MIN_CATEGORY_VALUE < rs[0][0] <= MAX_CATEGORY_VALUE + ): + cardinality_query = sqlalchemy.select([func.distinct(column)]).limit(101) + cardinality = db_engine.engine.execute(cardinality_query).fetchall() + return [str(category[0]) for category in cardinality] + + return None + + @override + def get_logs( + self, table: str, db_engine: SQLDatabase, db_connection_id: str + ) -> list[QueryHistory]: + database_name = db_engine.engine.url.database.split("/")[0] + filter_date = (datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d") + rows = db_engine.engine.execute( + f"select QUERY_TEXT, USER_NAME, count(*) as occurrences from TABLE(INFORMATION_SCHEMA.QUERY_HISTORY()) where DATABASE_NAME = '{database_name}' and QUERY_TYPE = 'SELECT' and EXECUTION_STATUS = 'SUCCESS' and START_TIME > '{filter_date}' and QUERY_TEXT like '%FROM {table}%' and QUERY_TEXT not like '%QUERY_HISTORY%' group by QUERY_TEXT, USER_NAME ORDER BY occurrences DESC limit {MAX_LOGS}" # noqa: S608 E501 + ).fetchall() + return [ + QueryHistory( + db_connection_id=db_connection_id, + table_name=table, + query=row[0], + user=row[1], + occurrences=row[2], + ) + for row in rows + ] diff --git a/dataherald/db_scanner/sqlalchemy.py b/dataherald/db_scanner/sqlalchemy.py index dd29195d..646cdf70 100644 --- a/dataherald/db_scanner/sqlalchemy.py +++ b/dataherald/db_scanner/sqlalchemy.py @@ -1,3 +1,4 @@ +import logging from datetime import datetime from typing import Any, List @@ -5,7 +6,6 @@ from overrides import override from sqlalchemy import MetaData, inspect from sqlalchemy.schema import CreateTable -from sqlalchemy.sql import func from dataherald.db_scanner import Scanner from dataherald.db_scanner.models.types import ( @@ -14,14 +14,26 @@ TableDescriptionStatus, ) from dataherald.db_scanner.repository.base import TableDescriptionRepository +from dataherald.db_scanner.repository.query_history import QueryHistoryRepository +from dataherald.db_scanner.services.abstract_scanner import AbstractScanner +from dataherald.db_scanner.services.base_scanner import BaseScanner +from dataherald.db_scanner.services.big_query_scanner import BigQueryScanner +from dataherald.db_scanner.services.postgre_sql_scanner import PostgreSqlScanner +from dataherald.db_scanner.services.snowflake_scanner import SnowflakeScanner from dataherald.sql_database.base import SQLDatabase MIN_CATEGORY_VALUE = 1 MAX_CATEGORY_VALUE = 60 MAX_SIZE_LETTERS = 50 +logger = logging.getLogger(__name__) + class SqlAlchemyScanner(Scanner): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.scanner_service: AbstractScanner = None + @override def synchronizing( self, @@ -81,6 +93,7 @@ def get_processed_column( # noqa: PLR0911 ).limit(1) field_size = db_engine.engine.execute(field_size_query).first() + # Check if the column is empty if not field_size: field_size = [""] if len(str(str(field_size[0]))) > MAX_SIZE_LETTERS: @@ -89,64 +102,15 @@ def get_processed_column( # noqa: PLR0911 data_type=str(column["type"]), low_cardinality=False, ) - - # special case for PostgreSQL table - read query planner statistics from the pg_stats view - # TODO doesn't work for views, only tables - if db_engine.engine.driver == "psycopg2": - # TODO escape table and column names - rs = db_engine.engine.execute( - f"SELECT n_distinct, most_common_vals::TEXT::TEXT[] FROM pg_catalog.pg_stats WHERE tablename = '{table}' AND attname = '{column['name']}'" # noqa: S608 E501 - ).fetchall() - if ( - len(rs) > 0 - and MIN_CATEGORY_VALUE < rs[0]["n_distinct"] <= MAX_CATEGORY_VALUE - ): - return ColumnDetail( - name=column["name"], - data_type=str(column["type"]), - low_cardinality=True, - categories=rs[0]["most_common_vals"], - ) - return ColumnDetail( - name=column["name"], - data_type=str(column["type"]), - low_cardinality=False, - ) - - try: - cardinality_query = sqlalchemy.select( - [func.distinct(dynamic_meta_table.c[column["name"]])] - ).limit(200) - cardinality = db_engine.engine.execute(cardinality_query).fetchall() - except Exception: - return ColumnDetail( - name=column["name"], - data_type=str(column["type"]), - low_cardinality=False, - ) - - if len(cardinality) > MAX_CATEGORY_VALUE: - return ColumnDetail( - name=column["name"], - data_type=str(column["type"]), - low_cardinality=False, - ) - - query = sqlalchemy.select( - [ - dynamic_meta_table.c[column["name"]], - sqlalchemy.func.count(dynamic_meta_table.c[column["name"]]), - ] - ).group_by(dynamic_meta_table.c[column["name"]]) - - # get rows - categories = db_engine.engine.execute(query).fetchall() - if MIN_CATEGORY_VALUE < len(categories) <= MAX_CATEGORY_VALUE: + category_values = self.scanner_service.cardinality_values( + dynamic_meta_table.c[column["name"]], db_engine + ) + if category_values: return ColumnDetail( name=column["name"], data_type=str(column["type"]), low_cardinality=True, - categories=[str(category[0]) for category in categories], + categories=category_values, ) return ColumnDetail( name=column["name"], @@ -211,7 +175,17 @@ def scan( db_connection_id: str, table_names: list[str] | None, repository: TableDescriptionRepository, + query_history_repository: QueryHistoryRepository, ) -> None: + services = { + "snowflake": SnowflakeScanner, + "bigquery": BigQueryScanner, + "psycopg2": PostgreSqlScanner, + } + self.scanner_service = BaseScanner() + if db_engine.engine.driver in services.keys(): + self.scanner_service = services[db_engine.engine.driver]() + inspector = inspect(db_engine.engine) meta = MetaData(bind=db_engine.engine) MetaData.reflect(meta, views=True) @@ -242,3 +216,14 @@ def scan( error_message=f"{e}", ) ) + try: + logger.info(f"Get logs table: {table}") + query_history = self.scanner_service.get_logs( + table, db_engine, db_connection_id + ) + if len(query_history) > 0: + for query in query_history: + query_history_repository.insert(query) + + except Exception: # noqa: S112 + continue diff --git a/dataherald/server/fastapi/__init__.py b/dataherald/server/fastapi/__init__.py index fe180f56..0eb5d126 100644 --- a/dataherald/server/fastapi/__init__.py +++ b/dataherald/server/fastapi/__init__.py @@ -10,7 +10,7 @@ import dataherald from dataherald.api.types import Query from dataherald.config import Settings -from dataherald.db_scanner.models.types import TableDescription +from dataherald.db_scanner.models.types import QueryHistory, TableDescription from dataherald.sql_database.models.types import DatabaseConnection, SSHSettings from dataherald.types import ( CancelFineTuningRequest, @@ -101,6 +101,13 @@ def __init__(self, settings: Settings): tags=["Table descriptions"], ) + self.router.add_api_route( + "/api/v1/query-history", + self.get_query_history, + methods=["GET"], + tags=["Query history"], + ) + self.router.add_api_route( "/api/v1/golden-records/{golden_record_id}", self.delete_golden_record, @@ -320,6 +327,10 @@ def get_table_description(self, table_description_id: str) -> TableDescription: """Get description""" return self._api.get_table_description(table_description_id) + def get_query_history(self, db_connection_id: str) -> list[QueryHistory]: + """Get description""" + return self._api.get_query_history(db_connection_id) + def get_responses(self, question_id: str | None = None) -> list[Response]: """List responses""" return self._api.get_responses(question_id) diff --git a/docs/api.list_query_history.rst b/docs/api.list_query_history.rst new file mode 100644 index 00000000..48736977 --- /dev/null +++ b/docs/api.list_query_history.rst @@ -0,0 +1,59 @@ +List query history +======================= + +After executing the **POST sync-schemas** endpoint, you will be able to access the `query_history` rows specific to each database +connection. This data can be utilized to generate new Golden records, albeit currently, this process must be carried out +manually through the **POST golden-records endpoint**. + +Request this ``GET`` endpoint:: + + /api/v1/query-history + +**Parameters** + +.. csv-table:: + :header: "Name", "Type", "Description" + :widths: 20, 20, 60 + + "db_connection_id", "string", "Database connection we want to get the logs, ``Required``" + + +**Responses** + +HTTP 200 code response + +.. code-block:: rst + + [ + { + "id": "string", + "db_connection_id": "string", + "table_name": "string", + "query": "string", + "user": "string", + "occurrences": "integer" + } + ] + +**Request example** + +.. code-block:: rst + + curl -X 'GET' \ + 'http://localhost/api/v1/query-history?db_connection_id=656e52cb4d1fda50cae7b939' \ + -H 'accept: application/json' + +**Response example** + +.. code-block:: rst + + [ + { + "id": "656e52da4d1fda50cae7b93a", + "db_connection_id": "656e52cb4d1fda50cae7b939", + "table_name": "foo", + "query": "select QUERY_TEXT, USER_NAME, count(*) as occurrences from bar ...", + "user": "user_name", + "occurrences": 1 + } + ] diff --git a/docs/api.rst b/docs/api.rst index d546b2fc..8b6b5aec 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -180,6 +180,7 @@ Related endpoints are: api.list_table_description api.get_table_description api.update_table_descriptions + api.list_query_history api.add_instructions api.list_instructions diff --git a/docs/api.scan_table_description.rst b/docs/api.scan_table_description.rst index 97b86f52..1ccc9f8e 100644 --- a/docs/api.scan_table_description.rst +++ b/docs/api.scan_table_description.rst @@ -4,7 +4,10 @@ Create a table description ======================= Once you have set your db credentials request this endpoint to scan your database. It maps -all tables and columns so It will help the SQL Agent to generate an accurate answer. +all tables and columns so It will help the SQL Agent to generate an accurate answer. In addition, it retrieves logs, +which consist of historical queries associated with each database table. These records are then stored within the +query_history collection. The historical queries retrieved encompass data from the past three months and are grouped +based on query and user. It can scan all db tables or if you specify a `table_names` then It will only scan those tables.