Skip to content

Commit

Permalink
Add get_response_local_file endpoint to API and not use AWS S3 (#251)
Browse files Browse the repository at this point in the history
* Add get_response_local_file endpoint to API and not use AWS S3

* Updated to remove the stand alone "/api/v1/responses/{response_id}/localfile" route and associated get_response_local_file api method. Driven now soley by config.

---------

Co-authored-by: Paul Markus <[email protected]>
  • Loading branch information
ppmarkus and pmrockhampton authored Nov 27, 2023
1 parent 9e6ad59 commit ee22f5a
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 11 deletions.
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,4 @@ MONGODB_DB_PASSWORD = 'admin'
# The enncryption key is used to encrypt database connection info before storing in Mongo. Please refer to the README on how to set it.
S3_AWS_ACCESS_KEY_ID=
S3_AWS_SECRET_ACCESS_KEY=
ONLY_STORE_CSV_FILES_LOCALLY = True # Set to True if only want to save generated CSV files locally instead of S3. Note that if stored locally they should be treated as ephemeral, i.e., they will disappear when the engine is restarted.
1 change: 1 addition & 0 deletions dataherald/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def get_response_file(
) -> FileResponse:
pass


@abstractmethod
def delete_golden_record(self, golden_record_id: str) -> dict:
pass
Expand Down
20 changes: 13 additions & 7 deletions dataherald/api/fastapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
TableDescriptionRequest,
UpdateInstruction,
)
from dataherald.config import Settings
from dataherald.utils.s3 import S3

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -408,14 +409,19 @@ def get_response_file(
raise HTTPException(status_code=400, detail=str(e)) from e

if not result:
raise HTTPException(
status_code=404, detail="Question, response, or db_connection not found"
)

s3 = S3()
raise HTTPException(status_code=404, detail="Question, response, or db_connection not found")

# Check if the file is to be returned from server (locally) or from S3
if Settings().only_store_csv_files_locally:
file_location = result.csv_file_path
# check if the file exists
if not os.path.exists(file_location):
raise HTTPException(status_code=404, detail="CSV file not found. Possibly deleted/removed from server.")
else:
s3 = S3()

file_location = s3.download(result.csv_file_path, db_connection.file_storage)
background_tasks.add_task(delete_file, file_location)
file_location = s3.download(result.csv_file_path, db_connection.file_storage)
background_tasks.add_task(delete_file, file_location)

return FileResponse(
file_location,
Expand Down
1 change: 1 addition & 0 deletions dataherald/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class Settings(BaseSettings):
encrypt_key: str = os.environ.get("ENCRYPT_KEY")
s3_aws_access_key_id: str | None = os.environ.get("S3_AWS_ACCESS_KEY_ID")
s3_aws_secret_access_key: str | None = os.environ.get("S3_AWS_SECRET_ACCESS_KEY")
only_store_csv_files_locally: str | None = os.environ.get("ONLY_STORE_CSV_FILES_LOCALLY")

def require(self, key: str) -> Any:
val = self[key]
Expand Down
14 changes: 10 additions & 4 deletions dataherald/sql_generator/create_sql_query_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from dataherald.types import Response, SQLQueryResult
from dataherald.utils.s3 import S3

from dataherald.config import Settings



def format_error_message(response: Response, error_message: str) -> Response:
# Remove the complete query
Expand Down Expand Up @@ -40,10 +43,13 @@ def create_csv_file(
writer.writerow(rows[0].keys())
for row in rows:
writer.writerow(row.values())
s3 = S3()
response.csv_file_path = s3.upload(
file_location, database_connection.file_storage
)
if Settings().only_store_csv_files_locally:
response.csv_file_path = file_location
else:
s3 = S3()
response.csv_file_path = s3.upload(
file_location, database_connection.file_storage
)
response.sql_query_result = SQLQueryResult(columns=columns, rows=rows)


Expand Down
4 changes: 4 additions & 0 deletions docs/envars.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ provided in the .env.example file with the default values.
S3_AWS_ACCESS_KEY_ID =
S3_AWS_SECRET_ACCESS_KEY =
ONLY_STORE_CSV_FILES_LOCALLY =
DH_ENGINE_TIMEOUT =
UPPER_LIMIT_QUERY_RETURN_ROWS =
Expand Down Expand Up @@ -65,3 +67,5 @@ provided in the .env.example file with the default values.
"S3_AWS_SECRET_ACCESS_KEY", "The key used to access credential files if saved to S3", "None", "No"
"DH_ENGINE_TIMEOUT", "This is used to set the max seconds the process will wait for the response to be generate. If the specified time limit is exceeded, it will trigger an exception", "None", "No"
"UPPER_LIMIT_QUERY_RETURN_ROWS", "The upper limit on number of rows returned from the query engine (equivalent to using LIMIT N in PostgreSQL/MySQL/SQlite).", "None", "No"
"ONLY_STORE_CSV_FILES_LOCALLY", "Set to True if only want to save generated CSV files locally instead of S3. Note that if stored locally they should be treated as ephemeral, i.e., they will disappear when the engine is restarted.", "None", "No"

0 comments on commit ee22f5a

Please sign in to comment.