From d8e7a13f88a29d8592c9a0da738da882dc870d04 Mon Sep 17 00:00:00 2001 From: Brendan Barnes Date: Sat, 18 Mar 2023 00:01:22 +0000 Subject: [PATCH 01/12] save --- .devcontainer/devcontainer.json | 37 +++++++++++ .gitattributes | 1 + .gitignore | 86 +++++++++++++++++++++++++ .pylintrc | 4 ++ pytest.ini | 3 + requirements.txt | 12 ++++ src/cc_sdk/__init__.py | 7 ++ src/cc_sdk/aws_config.py | 63 ++++++++++++++++++ src/cc_sdk/cc_store.py | 45 +++++++++++++ src/cc_sdk/data_source.py | 47 ++++++++++++++ src/cc_sdk/data_store.py | 60 ++++++++++++++++++ src/cc_sdk/payload.py | 74 ++++++++++++++++++++++ src/cc_sdk/store_type.py | 49 ++++++++++++++ src/cc_sdk/validators.py | 32 ++++++++++ tests/test_aws_config.py | 74 ++++++++++++++++++++++ tests/test_data_source.py | 46 ++++++++++++++ tests/test_data_store.py | 105 ++++++++++++++++++++++++++++++ tests/test_payload.py | 109 ++++++++++++++++++++++++++++++++ 18 files changed, 854 insertions(+) create mode 100644 .devcontainer/devcontainer.json create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 .pylintrc create mode 100644 pytest.ini create mode 100644 requirements.txt create mode 100644 src/cc_sdk/__init__.py create mode 100644 src/cc_sdk/aws_config.py create mode 100644 src/cc_sdk/cc_store.py create mode 100644 src/cc_sdk/data_source.py create mode 100644 src/cc_sdk/data_store.py create mode 100644 src/cc_sdk/payload.py create mode 100644 src/cc_sdk/store_type.py create mode 100644 src/cc_sdk/validators.py create mode 100644 tests/test_aws_config.py create mode 100644 tests/test_data_source.py create mode 100644 tests/test_data_store.py create mode 100644 tests/test_payload.py diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..301d2f8 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,37 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/python +{ + "name": "Python 3", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + "image": "mcr.microsoft.com/devcontainers/python:0-3.11", + // Features to add to the dev container. More info: https://containers.dev/features. + // "features": {}, + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "sudo chown -R vscode ./ && git config --global --add safe.directory ./ && pip3 install --user -r requirements.txt", + // Configure tool-specific properties. + "customizations": { + "settings": { + "python.defaultInterpreterPath": "/usr/local/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.provider": "black", + "python.formatting.blackPath": "/usr/local/py-utils/bin/black", + "python.linting.pylintPath": "/usr/local/py-utils/bin/pylint", + "editor.formatOnSave": true, + "python.languageServer": "Pylance", + "python.linting.lintOnSave": true, + "python.analysis.extraPaths": ["${workspaceFolder}/src"] + }, + "vscode": { + "extensions": [ + "ms-python.python", + "njpwerner.autodocstring", + "ms-python.pylint" + ] + } + } + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root" +} \ No newline at end of file diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..94f480d --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d1e8f85 --- /dev/null +++ b/.gitignore @@ -0,0 +1,86 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# DotEnv configuration +.env + +# Database +*.db +*.rdb + +# Pycharm +.idea + +# Spyder +.spyproject/ + +# Jupyter NB Checkpoints +.ipynb_checkpoints/ + +# Mac OS-specific storage files +.DS_Store + +# vim +*.swp +*.swo + +# Mypy cache +.mypy_cache/ + +# MBI data folder (uncomment after first commit to keep large files out of version control) +/*data*/ \ No newline at end of file diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..a319294 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,4 @@ +[FORMAT] +max-line-length=120 +# Set the Python path +init-hook='import sys; sys.path.append("./src")' \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..1a40fa3 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +pythonpath = src +testpaths = tests diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1c9944b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +attrs==22.2.0 +boto3==1.26.93 +botocore==1.29.93 +iniconfig==2.0.0 +jmespath==1.0.1 +packaging==23.0 +pluggy==1.0.0 +pytest==7.2.2 +python-dateutil==2.8.2 +s3transfer==0.6.0 +six==1.16.0 +urllib3==1.26.15 diff --git a/src/cc_sdk/__init__.py b/src/cc_sdk/__init__.py new file mode 100644 index 0000000..894409a --- /dev/null +++ b/src/cc_sdk/__init__.py @@ -0,0 +1,7 @@ +from .data_store import * +from .aws_config import * +from .data_source import * +from .store_type import * +from .payload import * + +__all__ = ["DataStore", "AWSConfig", "DataSource", "StoreType", "Payload"] diff --git a/src/cc_sdk/aws_config.py b/src/cc_sdk/aws_config.py new file mode 100644 index 0000000..1c9d37f --- /dev/null +++ b/src/cc_sdk/aws_config.py @@ -0,0 +1,63 @@ +from attr import define, field, asdict, validators +import json + + +@define(auto_attribs=True) +class AWSConfig: + """ + This class provides configuration settings for using an AWS S3 data store. + + Attributes: + - aws_config_name: str + The name of the AWS configuration (optional). + - aws_access_key_id : str + The AWS access key ID to use for accessing the data store (optional). + - aws_secret_access_key_id : str + The AWS secret access key ID to use for accessing the data store (optional). + - aws_region : str + The AWS region where the data store is located (optional). + - aws_bucket : str + The name of the AWS S3 bucket to use as the data store (optional). + - aws_mock : bool + Whether to use a mock AWS S3 data store (optional, default is False). + - aws_endpoint : str + The endpoint URL for the AWS S3 data store (optional). + - aws_disable_ssl : bool + Whether to disable SSL when connecting to the AWS S3 data store (optional, default is False). + - aws_force_path_style : bool + Whether to use path-style addressing for the AWS S3 data store (optional, default is False). + + Methods: + - serialize(): Returns a JSON string representation of the attributes. + + Raises: + - ValueError: + If a non-serializable object is set for the session attribute. + - TypeError: + If the wrong type of object is set for an attribute. + """ + + aws_config_name: str = field(default="", validator=[validators.instance_of(str)]) + aws_access_key_id: str = field(default="", validator=[validators.instance_of(str)]) + aws_secret_access_key_id: str = field( + default="", validator=[validators.instance_of(str)] + ) + aws_region: str = field(default="", validator=[validators.instance_of(str)]) + aws_bucket: str = field(default="", validator=[validators.instance_of(str)]) + aws_mock: bool = field(default=False, validator=[validators.instance_of(bool)]) + aws_endpoint: str = field(default="", validator=[validators.instance_of(str)]) + aws_disable_ssl: bool = field( + default=False, validator=[validators.instance_of(bool)] + ) + aws_force_path_style: bool = field( + default=False, validator=[validators.instance_of(bool)] + ) + + def serialize(self) -> str: + """ + Serializes the AWSConfig object to a JSON string. + + Returns: + str: JSON string representation of the attributes. + """ + return json.dumps(asdict(self)) diff --git a/src/cc_sdk/cc_store.py b/src/cc_sdk/cc_store.py new file mode 100644 index 0000000..ffe8ebc --- /dev/null +++ b/src/cc_sdk/cc_store.py @@ -0,0 +1,45 @@ +from .payload import Payload +import abc + + +class CCStore(metaclass=abc.ABCMeta): + """A base class for implementing a data store. + + This class defines a set of abstract methods for storing and retrieving data. To use this class, you must create a + subclass and implement each of the abstract methods. + + Attributes: + None + + Methods: + - put_object(input): stores the given input in the store, returns true on success and false on failure + - pull_object(input): retrieves the input from the store, returns true on success and false on failure + - get_object(input): retrieves the object bytes from the store + - get_payload(): retrieves the payload from the store + - root_path(): retrieves the root path of the store + - handles_data_store_type(datastore_type): returns whether the given data store type is handled by this class + """ + + @abc.abstractmethod + def put_object(self, input) -> bool: + pass + + @abc.abstractmethod + def pull_object(self, input) -> bool: + pass + + @abc.abstractmethod + def get_object(self, input) -> bytes: + pass + + @abc.abstractmethod + def get_payload(self) -> Payload: + pass + + @abc.abstractmethod + def root_path(self) -> str: + pass + + @abc.abstractmethod + def handles_data_store_type(self, datastore_type) -> bool: + pass diff --git a/src/cc_sdk/data_source.py b/src/cc_sdk/data_source.py new file mode 100644 index 0000000..baa383c --- /dev/null +++ b/src/cc_sdk/data_source.py @@ -0,0 +1,47 @@ +from attr import define, field, asdict, validators +import json +from .validators import validate_string_list + + +@define(auto_attribs=True, frozen=True) +class DataSource: + """ + A class that represents a data source and its attributes. + + Attributes: + - name : str + The name of the data source. readonly + - id : str + The ID of the data source. readonly + - store_name : str + The name of the data store used by this data source. readonly + - paths : List[str] + The paths to the data in this data source. readonly + + Methods: + - serialize(): Returns a JSON string representation of the attributes. + + Raises: + - ValueError: + If a non-serializable object is set for the session attribute. + - TypeError: + If the wrong type of object is set for an attribute. + - FrozenInstanceError: + If any attribute is written to. + """ + + name: str = field(validator=[validators.instance_of(str)]) + id: str = field(validator=[validators.instance_of(str)]) + store_name: str = field( + validator=[validators.instance_of(str)], + ) + paths: list[str] = field(validator=[validate_string_list]) + + def serialize(self): + """ + Serializes the class as a json string + + Returns: + - str: JSON string representation of the attributes + """ + return json.dumps(asdict(self)) diff --git a/src/cc_sdk/data_store.py b/src/cc_sdk/data_store.py new file mode 100644 index 0000000..c06831c --- /dev/null +++ b/src/cc_sdk/data_store.py @@ -0,0 +1,60 @@ +from attr import define, field, setters, asdict, validators +import json +from .validators import validate_serializable +from .store_type import StoreType, StoreTypeEncoder + +@define(auto_attribs=True) +class DataStore: + """ + A class that represents a data store and its attributes. + + Attributes: + - name : str + The name of the data store. readonly + - id : str + The ID of the data store. readonly + - parameters : dict[str, str] + The parameters of the data store represented as a dictionary. readonly + - store_type : StoreType + The type of the data store. readonly + - ds_profile : str + The profile of the data store. readonly + - session : any, optional + The session object of the data store. It must be JSON serializable. + + Methods: + - serialize(): Returns a JSON string representation of the attributes. + + Raises: + - ValueError: + If a non-serializable object is set for an attribute. + - TypeError: + If the wrong type of object is set for an attribute. + - AttributeError: + If any readonly attribute is written to. + """ + + name: str = field( + on_setattr=setters.frozen, validator=[validators.instance_of(str)] + ) + id: str = field(on_setattr=setters.frozen, validator=[validators.instance_of(str)]) + parameters: dict[str, str] = field( + on_setattr=setters.frozen, + validator=[validators.instance_of(dict), validate_serializable], + ) + store_type: StoreType = field( + on_setattr=setters.frozen, validator=[validators.instance_of(StoreType)] + ) + ds_profile: str = field( + on_setattr=setters.frozen, validator=[validators.instance_of(str)] + ) + session: any = field(default=None, validator=[validate_serializable]) + + def serialize(self): + """ + Serializes the class as a json string + + Returns: + - str: JSON string representation of the attributes + """ + return json.dumps(asdict(self), cls=StoreTypeEncoder) diff --git a/src/cc_sdk/payload.py b/src/cc_sdk/payload.py new file mode 100644 index 0000000..7fdb311 --- /dev/null +++ b/src/cc_sdk/payload.py @@ -0,0 +1,74 @@ +from attr import define, field, setters, asdict, validators +import json +from .data_source import DataSource +from .data_store import DataStore +from .store_type import StoreTypeEncoder +from .validators import validate_serializable + + +def validate_stores(instance, attribute, value): + if not isinstance(value, list): + raise ValueError(f"stores must be a list of DataStores") + if not all(isinstance(store, DataStore) for store in value): + raise ValueError(f"stores must be a list of DataStores") + + +def validate_sources(instance, attribute, value): + if not isinstance(value, list): + raise ValueError(f"attribute must be a list of DataSources") + if not all(isinstance(ds, DataSource) for ds in value): + raise ValueError(f"attribute must be a list of DataSources") + + +@define(auto_attribs=True) +class Payload: + """ + A class that represents a payload for cloud compute. + + Attributes: + - attributes : dict[str, any] + A dictionary of attributes for the payload. readonly + - stores : list[DataStore] + A list of DataStores. + - inputs : list[DataSource] + The inputs for the payload. readonly + - inputs : list[DataSource] + The outputs for the payload. readonly + + Methods: + - serialize(): Returns a JSON string representation of the class. + + Raises: + - ValueError: + If a non-serializable object is set for an attribute. + - TypeError: + If the wrong type of object is set for an attribute. + - AttributeError: + If any readonly attribute is written to. + """ + + attributes: dict[str, any] = field( + on_setattr=setters.frozen, + validator=[validators.instance_of(dict), validate_serializable], + ) + stores: list[DataStore] = field(validator=[validate_stores]) + inputs: list[DataSource] = field( + on_setattr=setters.frozen, + validator=[validate_sources], + ) + outputs: list[DataSource] = field( + on_setattr=setters.frozen, + validator=[validate_sources], + ) + + def set_store(self, index: int, store: DataStore): + self.stores[index] = store + + def serialize(self): + """ + Serializes the class as a json string + + Returns: + - str: JSON string representation of the attributes + """ + return json.dumps(asdict(self), cls=StoreTypeEncoder) diff --git a/src/cc_sdk/store_type.py b/src/cc_sdk/store_type.py new file mode 100644 index 0000000..9a783be --- /dev/null +++ b/src/cc_sdk/store_type.py @@ -0,0 +1,49 @@ +from enum import Enum +import json + +class StoreType(Enum): + """ + The StoreType class is an enum representing different types of data stores. The following store types are available: + + S3: Amazon S3 data store + WS: ??? Need to ask Will + RDBMS: Relational database management system data store + EBS: Elastic Block Store data store + + Each store type has an associated integer value, with S3 having a value of 0, WS having a value of 1, RDBMS having + a value of 2, and EBS having a value of 3. This class can be used to ensure type safety when working with different + data stores. + + The enum values serialize to a string representation of the enum name, instead of the integer value, to improve + readability and prevent errors when deserializing. + """ + S3 = 0 + WS = 1 + RDBMS = 2 + EBS = 3 + +class StoreTypeEncoder(json.JSONEncoder): + """ + The StoreTypeEncoder is a custom JSON encoder that extends the default json.JSONEncoder class to handle the serialization of StoreType Enum values. + + It overrides the default() method of the JSONEncoder class to handle StoreType objects by returning their name attribute instead of the object itself. This ensures that StoreType objects are serialized to a JSON string that represents their name. + + Usage: + To use this encoder, pass it as the cls argument when calling json.dumps(), as shown below: + + ``` + import json + from .store_type import StoreTypeEncoder + + data = {"store_type": StoreType.S3} + json_string = json.dumps(data, cls=StoreTypeEncoder) + ``` + + Raises: + - TypeError: + If an object of an unsupported type is encountered. + """ + def default(self, obj): + if isinstance(obj, Enum): + return obj.name + return json.JSONEncoder.default(self, obj) \ No newline at end of file diff --git a/src/cc_sdk/validators.py b/src/cc_sdk/validators.py new file mode 100644 index 0000000..c242fe2 --- /dev/null +++ b/src/cc_sdk/validators.py @@ -0,0 +1,32 @@ +import json + + +def validate_serializable(instance, attribute, value): + """ + A validator that ensures an attribute is JSON serializable. + + Parameters: + ----------- + instance : class + The instance of the class. + attribute : str + The name of the attribute. + value : any + The value of the attribute. + + Raises: + ------- + ValueError: + If a non-serializable object is set for the attribute. + """ + try: + json.dumps(value) + except TypeError: + raise ValueError(f"Session attribute must be JSON serializable") + + +def validate_string_list(instance, attribute, value): + if not isinstance(value, list): + raise ValueError(f"paths must be a list of strings") + if not all(isinstance(path, str) for path in value): + raise ValueError(f"paths must be a list of strings") diff --git a/tests/test_aws_config.py b/tests/test_aws_config.py new file mode 100644 index 0000000..0c5e87f --- /dev/null +++ b/tests/test_aws_config.py @@ -0,0 +1,74 @@ +import pytest +from attr import asdict +from cc_sdk import AWSConfig + + +@pytest.fixture +def aws_config(): + return AWSConfig( + aws_config_name="test", + aws_access_key_id="my_access_key", + aws_secret_access_key_id="my_secret_key", + aws_region="us-west-2", + aws_bucket="my_bucket", + aws_mock=True, + aws_endpoint="https://my-endpoint.com", + aws_disable_ssl=True, + aws_force_path_style=True, + ) + + +def test_getters(aws_config): + assert aws_config.aws_config_name == "test" + assert aws_config.aws_access_key_id == "my_access_key" + assert aws_config.aws_secret_access_key_id == "my_secret_key" + assert aws_config.aws_region == "us-west-2" + assert aws_config.aws_bucket == "my_bucket" + assert aws_config.aws_mock is True + assert aws_config.aws_endpoint == "https://my-endpoint.com" + assert aws_config.aws_disable_ssl is True + assert aws_config.aws_force_path_style is True + + +def test_setters(aws_config): + aws_config.aws_config_name = "new_test" + assert aws_config.aws_config_name == "new_test" + + aws_config.aws_access_key_id = "new_access_key" + assert aws_config.aws_access_key_id == "new_access_key" + + aws_config.aws_secret_access_key_id = "new_secret_key" + assert aws_config.aws_secret_access_key_id == "new_secret_key" + + aws_config.aws_region = "us-east-1" + assert aws_config.aws_region == "us-east-1" + + aws_config.aws_bucket = "new_bucket" + assert aws_config.aws_bucket == "new_bucket" + + aws_config.aws_mock = False + assert aws_config.aws_mock is False + + aws_config.aws_endpoint = "https://new-endpoint.com" + assert aws_config.aws_endpoint == "https://new-endpoint.com" + + aws_config.aws_disable_ssl = False + assert aws_config.aws_disable_ssl is False + + aws_config.aws_force_path_style = False + assert aws_config.aws_force_path_style is False + + +def test_serialize(aws_config): + expected = { + "aws_config_name": "test", + "aws_access_key_id": "my_access_key", + "aws_secret_access_key_id": "my_secret_key", + "aws_region": "us-west-2", + "aws_bucket": "my_bucket", + "aws_mock": True, + "aws_endpoint": "https://my-endpoint.com", + "aws_disable_ssl": True, + "aws_force_path_style": True, + } + assert asdict(aws_config) == expected diff --git a/tests/test_data_source.py b/tests/test_data_source.py new file mode 100644 index 0000000..53e39a5 --- /dev/null +++ b/tests/test_data_source.py @@ -0,0 +1,46 @@ +import pytest +import json + +from attr.exceptions import FrozenInstanceError +from cc_sdk import DataSource + + +@pytest.fixture +def data_source(): + return DataSource( + name="test", id="123", store_name="test_store", paths=["path1", "path2"] + ) + + +def test_value_error(): + with pytest.raises(ValueError): + _ = DataSource( + name="test", id="123", store_name="test_store", paths=["path1", object()] + ) + + +def test_getters(data_source): + assert data_source.name == "test" + assert data_source.id == "123" + assert data_source.store_name == "test_store" + assert data_source.paths == ["path1", "path2"] + + +def test_setters(data_source): + with pytest.raises(FrozenInstanceError): + data_source.name = "new_test" + + with pytest.raises(FrozenInstanceError): + data_source.id = "456" + + with pytest.raises(FrozenInstanceError): + data_source.store_name = "new_test_store" + + with pytest.raises(FrozenInstanceError): + data_source.paths = ["new_path"] + + +def test_serialize(data_source): + expected_json = '{"name": "test", "id": "123", "store_name": "test_store", "paths": ["path1", "path2"]}' + assert data_source.serialize() == expected_json + assert json.loads(data_source.serialize()) == json.loads(expected_json) diff --git a/tests/test_data_store.py b/tests/test_data_store.py new file mode 100644 index 0000000..5edad50 --- /dev/null +++ b/tests/test_data_store.py @@ -0,0 +1,105 @@ +import pytest +from cc_sdk import DataStore, StoreType +import json + + +@pytest.fixture +def data_store(): + return DataStore( + name="test", + id="123", + parameters={"param1": "value1", "param2": "value2"}, + store_type=StoreType.S3, + ds_profile="test_profile", + ) + + +def test_type_errors(): + with pytest.raises(TypeError): + _ = DataStore( + name=object(), + id="123", + parameters={"param1": "value1", "param2": "value2"}, + store_type=StoreType.S3, + ds_profile="test_profile", + ) + with pytest.raises(TypeError): + _ = DataStore( + name="test", + id=object(), + parameters={"param1": "value1", "param2": "value2"}, + store_type=StoreType.S3, + ds_profile="test_profile", + ) + with pytest.raises(TypeError): + _ = DataStore( + name="test", + id="123", + parameters=object(), + store_type=StoreType.S3, + ds_profile="test_profile", + ) + with pytest.raises(TypeError): + _ = DataStore( + name="test", + id="123", + parameters={"param1": "value1", "param2": "value2"}, + store_type=object(), + ds_profile="test_profile", + ) + with pytest.raises(TypeError): + _ = DataStore( + name="test", + id="123", + parameters={"param1": "value1", "param2": "value2"}, + store_type=StoreType.S3, + ds_profile=object(), + ) + + +def test_getters(data_store): + assert data_store.name == "test" + assert data_store.id == "123" + assert data_store.parameters == {"param1": "value1", "param2": "value2"} + assert data_store.store_type == StoreType.S3 + assert data_store.ds_profile == "test_profile" + assert data_store.session is None + + +def test_setters(data_store): + with pytest.raises(AttributeError): + data_store.name = "new_test" + + with pytest.raises(AttributeError): + data_store.id = "456" + + with pytest.raises(AttributeError): + data_store.parameters = {"new_param1": "new_value1"} + + with pytest.raises(AttributeError): + data_store.store_type = StoreType.EBS + + with pytest.raises(AttributeError): + data_store.ds_profile = "new_test_profile" + + with pytest.raises(ValueError): + data_store.session = object() # non-serializable object + + data_store.session = {"key": "value"} # serializable object + assert data_store.session == {"key": "value"} + + +def test_serialize(data_store): + # Serialize the DataStore object + serialized = data_store.serialize() + + # Deserialize the JSON string back into a dictionary + deserialized = json.loads(serialized) + + # Check that the dictionary has the same attribute values as the original DataStore object + assert deserialized["name"] == data_store.name + assert deserialized["id"] == data_store.id + assert deserialized["parameters"] == data_store.parameters + assert deserialized["store_type"] == "S3" + assert deserialized["ds_profile"] == data_store.ds_profile + assert deserialized["session"] == data_store.session diff --git a/tests/test_payload.py b/tests/test_payload.py new file mode 100644 index 0000000..6608ea5 --- /dev/null +++ b/tests/test_payload.py @@ -0,0 +1,109 @@ +import json +import pytest +from cc_sdk import Payload, DataSource, DataStore, StoreType + + +@pytest.fixture +def payload(): + return Payload( + attributes={"attr1": "value1", "attr2": 2}, + stores=[ + DataStore( + name="store1", + id="store_id1", + parameters={"param1": "value1"}, + store_type=StoreType.S3, + ds_profile="profile1", + ), + DataStore( + name="store2", + id="store_id2", + parameters={"param2": "value2"}, + store_type=StoreType.S3, + ds_profile="profile2", + ), + ], + inputs=[ + DataSource( + name="input1", + id="input_id1", + store_name="store1", + paths=["/path/to/data1"], + ), + DataSource( + name="input2", + id="input_id2", + store_name="store2", + paths=["/path/to/data2"], + ), + ], + outputs=[ + DataSource( + name="output1", + id="output_id1", + store_name="store1", + paths=["/path/to/output1"], + ), + DataSource( + name="output2", + id="output_id2", + store_name="store2", + paths=["/path/to/output2"], + ), + ], + ) + + +def test_attributes_getter(payload): + assert payload.attributes == {"attr1": "value1", "attr2": 2} + + +def test_stores_getter(payload): + assert len(payload.stores) == 2 + assert payload.stores[0].name == "store1" + assert payload.stores[1].name == "store2" + + +def test_inputs_getter(payload): + assert len(payload.inputs) == 2 + assert payload.inputs[0].name == "input1" + assert payload.inputs[1].name == "input2" + + +def test_outputs_getter(payload): + assert len(payload.outputs) == 2 + assert payload.outputs[0].name == "output1" + assert payload.outputs[1].name == "output2" + + +def test_attributes_setter_frozen(payload): + with pytest.raises(AttributeError): + payload.attributes = {"attr1": "new_value1", "attr2": 3} + + +def test_inputs_setter_frozen(payload): + with pytest.raises(AttributeError): + payload.inputs[0].paths = ["/path/to/new_input"] + + +def test_outputs_setter_frozen(payload): + with pytest.raises(AttributeError): + payload.outputs[0].paths = ["/path/to/new_output"] + + +def test_stores_setter(payload): + store = DataStore( + name="new_store", + id="new_store_id", + parameters={"param": "value"}, + store_type=StoreType.S3, + ds_profile="profile", + ) + payload.set_store(0, store) + assert payload.stores[0].name == "new_store" + + +def test_serialize(payload): + serialized = payload.serialize() + expected = '{"attributes": {"attr1": "value1", "attr2": 2}, "stores": [{"name": "store1", "id": "store_id1", "parameters": {"param1": "value1"}, "store_type": "S3", "ds_profile": "profile1", "session": null}, {"name": "store2", "id": "store_id2", "parameters": {"param2": "value2"}, "store_type": "S3", "ds_profile": "profile2", "session": null}], "inputs": [{"name": "input1", "id": "input_id1", "store_name": "store1", "paths": ["/path/to/data1"]}, {"name": "input2", "id": "input_id2", "store_name": "store2", "paths": ["/path/to/data2"]}], "outputs": [{"name": "output1", "id": "output_id1", "store_name": "store1", "paths": ["/path/to/output1"]}, {"name": "output2", "id": "output_id2", "store_name": "store2", "paths": ["/path/to/output2"]}]}' + assert serialized == expected From d1b879c9d1a0665e5ae3e7332db89b493fe6f301 Mon Sep 17 00:00:00 2001 From: Brendan Barnes Date: Sun, 19 Mar 2023 00:31:34 +0000 Subject: [PATCH 02/12] save --- .devcontainer/devcontainer.json | 73 ++--- .gitignore | 7 +- .pylintrc | 3 + README.md | 33 ++- requirements.txt | 15 ++ src/cc_sdk/__init__.py | 39 ++- src/cc_sdk/aws_config.py | 6 +- src/cc_sdk/cc_store.py | 29 +- src/cc_sdk/cc_store_s3.py | 282 ++++++++++++++++++++ src/cc_sdk/config.py | 36 +++ src/cc_sdk/constants.py | 4 + src/cc_sdk/data_source.py | 16 +- src/cc_sdk/data_store.py | 29 +- src/cc_sdk/environment_variables.py | 17 ++ src/cc_sdk/error.py | 94 +++++++ src/cc_sdk/file_data_store.py | 44 +++ src/cc_sdk/get_object_input.py | 35 +++ src/cc_sdk/json_encoder.py | 33 +++ src/cc_sdk/logger.py | 81 ++++++ src/cc_sdk/message.py | 32 +++ src/cc_sdk/object_state.py | 22 ++ src/cc_sdk/payload.py | 80 ++++-- src/cc_sdk/pull_object_input.py | 35 +++ src/cc_sdk/put_object_input.py | 42 +++ src/cc_sdk/seed_set.py | 35 +++ src/cc_sdk/status.py | 80 ++++++ src/cc_sdk/store_type.py | 29 +- src/cc_sdk/validators.py | 58 +++- tests/test_aws_config.py | 17 +- tests/test_cc_store_s3.py | 400 ++++++++++++++++++++++++++++ tests/test_config.py | 55 ++++ tests/test_data_source.py | 7 - tests/test_data_store.py | 59 +--- tests/test_error.py | 141 ++++++++++ tests/test_get_object_input.py | 35 +++ tests/test_logger.py | 48 ++++ tests/test_message.py | 24 ++ tests/test_payload.py | 34 +-- tests/test_pull_object_input.py | 35 +++ tests/test_put_object_input.py | 44 +++ tests/test_seed_set.py | 27 ++ tests/test_status.py | 35 +++ 42 files changed, 2032 insertions(+), 218 deletions(-) create mode 100644 src/cc_sdk/cc_store_s3.py create mode 100644 src/cc_sdk/config.py create mode 100644 src/cc_sdk/constants.py create mode 100644 src/cc_sdk/environment_variables.py create mode 100644 src/cc_sdk/error.py create mode 100644 src/cc_sdk/file_data_store.py create mode 100644 src/cc_sdk/get_object_input.py create mode 100644 src/cc_sdk/json_encoder.py create mode 100644 src/cc_sdk/logger.py create mode 100644 src/cc_sdk/message.py create mode 100644 src/cc_sdk/object_state.py create mode 100644 src/cc_sdk/pull_object_input.py create mode 100644 src/cc_sdk/put_object_input.py create mode 100644 src/cc_sdk/seed_set.py create mode 100644 src/cc_sdk/status.py create mode 100644 tests/test_cc_store_s3.py create mode 100644 tests/test_config.py create mode 100644 tests/test_error.py create mode 100644 tests/test_get_object_input.py create mode 100644 tests/test_logger.py create mode 100644 tests/test_message.py create mode 100644 tests/test_pull_object_input.py create mode 100644 tests/test_put_object_input.py create mode 100644 tests/test_seed_set.py create mode 100644 tests/test_status.py diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 301d2f8..3d845b7 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,37 +1,44 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/python { - "name": "Python 3", - // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile - "image": "mcr.microsoft.com/devcontainers/python:0-3.11", - // Features to add to the dev container. More info: https://containers.dev/features. - // "features": {}, - // Use 'forwardPorts' to make a list of ports inside the container available locally. - // "forwardPorts": [], - // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "sudo chown -R vscode ./ && git config --global --add safe.directory ./ && pip3 install --user -r requirements.txt", - // Configure tool-specific properties. - "customizations": { - "settings": { - "python.defaultInterpreterPath": "/usr/local/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.provider": "black", - "python.formatting.blackPath": "/usr/local/py-utils/bin/black", - "python.linting.pylintPath": "/usr/local/py-utils/bin/pylint", - "editor.formatOnSave": true, - "python.languageServer": "Pylance", - "python.linting.lintOnSave": true, - "python.analysis.extraPaths": ["${workspaceFolder}/src"] - }, - "vscode": { - "extensions": [ - "ms-python.python", - "njpwerner.autodocstring", - "ms-python.pylint" - ] - } - } - // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. - // "remoteUser": "root" + "name": "Python 3", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + "image": "mcr.microsoft.com/devcontainers/python:0-3.11", + // Features to add to the dev container. More info: https://containers.dev/features. + // "features": {}, + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "sudo chown -R vscode ./ && git config --global --add safe.directory ./ && pip3 install --user -r requirements.txt", + // Configure tool-specific properties. + "customizations": { + "settings": { + "python.defaultInterpreterPath": "/usr/local/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "editor.defaultFormatter": "ms-python.python", + "python.formatting.provider": "black", + "python.formatting.blackPath": "/usr/local/py-utils/bin/black", + "python.linting.pylintPath": "/usr/local/py-utils/bin/pylint", + "editor.formatOnSave": true, + "python.languageServer": "Pylance", + "python.linting.lintOnSave": true, + "python.analysis.extraPaths": [ + "${workspaceFolder}/src" + ] + }, + "vscode": { + "extensions": [ + "ms-python.python", + "njpwerner.autodocstring", + "ms-python.pylint" + ] + } + }, + "remoteUser": "vscode", + "containerUser": "vscode", + "workspaceMount": "", + "runArgs": [ + "--volume=${localWorkspaceFolder}:/workspaces/${localWorkspaceFolderBasename}:Z" + ] } \ No newline at end of file diff --git a/.gitignore b/.gitignore index d1e8f85..c997132 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] +.pytest_cache/ # C extensions *.so @@ -66,6 +67,9 @@ target/ # Pycharm .idea +# vscode +.vscode/ + # Spyder .spyproject/ @@ -81,6 +85,3 @@ target/ # Mypy cache .mypy_cache/ - -# MBI data folder (uncomment after first commit to keep large files out of version control) -/*data*/ \ No newline at end of file diff --git a/.pylintrc b/.pylintrc index a319294..3db9cc7 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,4 +1,7 @@ [FORMAT] max-line-length=120 +disable = missing-docstring +min-public-methods=0 +disable=unused-argument # Set the Python path init-hook='import sys; sys.path.append("./src")' \ No newline at end of file diff --git a/README.md b/README.md index e66fbc5..e21daf4 100644 --- a/README.md +++ b/README.md @@ -1 +1,32 @@ -# cc-python-sdk \ No newline at end of file +# cc-python-sdk +The Python SDK for developing plugins for Cloud Compute + +## TODO / Questions +1. We need to make the JSON serialzers consistent across all SDKs. Should we use camelCase for all attributes since this is the JavaScript convention? + +2. Will we need to support using an S3 data store with files that cannot fit in memory? We could use multipart upload/downloads instead of reading entire files to memory. + +3. In the Java sdk, pull/put/get methods seem to be writing to predetermined paths, why do we need source/dest paths in the input? In this sdk I've made them read and write to the supplied paths in the input parameter. + +4. Should `put_object()` use source and dest root paths instead of full paths? Currently we are using root paths (directories?) for pull and get + +5. Do we want to support directory paths with trailing slashes? It is not supported now. + +6. What about files with no extension? We need to remove the "." from paths in that case + +7. In writeInputStreamToDisk in the Java SDK, there is a bug if the filepath containes the filename in any parent directory + +```Java + String[] fileparts = outputDestination.split("/"); + String fileName = fileparts[fileparts.length-1]; + String directory = outputDestination.replace(fileName,""); // <- dangerous if any parent directory contains the fileName + + // replace with this + String outputDestination = "path/to/output/destination"; + String directory = new File(outputDestination).getParent(); + + File dir = new File(directory); + if (!dir.exists()) { + dir.mkdirs(); + } +``` \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1c9944b..0a82885 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,27 @@ attrs==22.2.0 boto3==1.26.93 botocore==1.29.93 +certifi==2022.12.7 +cffi==1.15.1 +charset-normalizer==3.1.0 +cryptography==39.0.2 +idna==3.4 iniconfig==2.0.0 +Jinja2==3.1.2 jmespath==1.0.1 +MarkupSafe==2.1.2 +moto==4.1.4 packaging==23.0 pluggy==1.0.0 +pycparser==2.21 pytest==7.2.2 python-dateutil==2.8.2 +PyYAML==6.0 +requests==2.28.2 +responses==0.23.1 s3transfer==0.6.0 six==1.16.0 +types-PyYAML==6.0.12.8 urllib3==1.26.15 +Werkzeug==2.2.3 +xmltodict==0.13.0 diff --git a/src/cc_sdk/__init__.py b/src/cc_sdk/__init__.py index 894409a..f3ac645 100644 --- a/src/cc_sdk/__init__.py +++ b/src/cc_sdk/__init__.py @@ -3,5 +3,42 @@ from .data_source import * from .store_type import * from .payload import * +from .get_object_input import * +from .pull_object_input import * +from .put_object_input import * +from .config import * +from . import constants +from . import environment_variables +from .message import Message +from .error import Error, ErrorLevel, ErrorLevelOptions +from .status import Status, StatusLevel +from .seed_set import SeedSet +from .cc_store import CCStore +from .file_data_store import FileDataStore +from .cc_store_s3 import CCStoreS3 +from .json_encoder import EnumEncoder -__all__ = ["DataStore", "AWSConfig", "DataSource", "StoreType", "Payload"] +__all__ = [ + "DataStore", + "AWSConfig", + "DataSource", + "StoreType", + "Payload", + "GetObjectInput", + "PullObjectInput", + "PutObjectInput", + "Config", + "constants", + "environment_variables", + "Message", + "Error", + "ErrorLevel", + "ErrorLevelOptions", + "Status", + "StatusLevel", + "SeedSet", + "CCStore", + "FileDataStore", + "CCStoreS3", + "EnumEncoder", +] diff --git a/src/cc_sdk/aws_config.py b/src/cc_sdk/aws_config.py index 1c9d37f..f537424 100644 --- a/src/cc_sdk/aws_config.py +++ b/src/cc_sdk/aws_config.py @@ -1,5 +1,5 @@ -from attr import define, field, asdict, validators import json +from attr import define, field, asdict, validators @define(auto_attribs=True) @@ -29,10 +29,8 @@ class AWSConfig: Methods: - serialize(): Returns a JSON string representation of the attributes. - + Raises: - - ValueError: - If a non-serializable object is set for the session attribute. - TypeError: If the wrong type of object is set for an attribute. """ diff --git a/src/cc_sdk/cc_store.py b/src/cc_sdk/cc_store.py index ffe8ebc..29f7af4 100644 --- a/src/cc_sdk/cc_store.py +++ b/src/cc_sdk/cc_store.py @@ -1,35 +1,42 @@ -from .payload import Payload import abc - +from .payload import Payload +from .store_type import StoreType +from .get_object_input import GetObjectInput +from .pull_object_input import PullObjectInput +from .put_object_input import PutObjectInput class CCStore(metaclass=abc.ABCMeta): """A base class for implementing a data store. - This class defines a set of abstract methods for storing and retrieving data. To use this class, you must create a - subclass and implement each of the abstract methods. + This class defines a set of abstract methods for storing and retrieving + data. To use this class, you must create a subclass and implement each of + the abstract methods. Attributes: None Methods: - - put_object(input): stores the given input in the store, returns true on success and false on failure - - pull_object(input): retrieves the input from the store, returns true on success and false on failure + - put_object(input): stores the given input in the store, returns true + on success and false on failure + - pull_object(input): retrieves the input from the store, returns true + on success and false on failure - get_object(input): retrieves the object bytes from the store - get_payload(): retrieves the payload from the store - root_path(): retrieves the root path of the store - - handles_data_store_type(datastore_type): returns whether the given data store type is handled by this class + - handles_data_store_type(datastore_type): returns whether the given + data store type is handled by this class """ @abc.abstractmethod - def put_object(self, input) -> bool: + def put_object(self, input: PutObjectInput) -> bool: pass @abc.abstractmethod - def pull_object(self, input) -> bool: + def pull_object(self, input: PullObjectInput) -> bool: pass @abc.abstractmethod - def get_object(self, input) -> bytes: + def get_object(self, input: GetObjectInput) -> bytes: pass @abc.abstractmethod @@ -41,5 +48,5 @@ def root_path(self) -> str: pass @abc.abstractmethod - def handles_data_store_type(self, datastore_type) -> bool: + def handles_data_store_type(self, data_store_type: StoreType) -> bool: pass diff --git a/src/cc_sdk/cc_store_s3.py b/src/cc_sdk/cc_store_s3.py new file mode 100644 index 0000000..f37e300 --- /dev/null +++ b/src/cc_sdk/cc_store_s3.py @@ -0,0 +1,282 @@ +import boto3 +from botocore.exceptions import ClientError +from .cc_store import CCStore +from .get_object_input import GetObjectInput +from .pull_object_input import PullObjectInput +from .put_object_input import PutObjectInput +from .payload import Payload +from .store_type import StoreType +from . import environment_variables +from .aws_config import AWSConfig +from . import constants +from botocore.client import Config +from .object_state import ObjectState +import io +import os +import boto3 + + +class CCStoreS3(CCStore): + """An implementation of the abstract CCStore class for use with AWS S3 as the data store. + You must set the following required and options environment variables to construct an object of this class: + + Required: + - CC_AWS_ACCESS_KEY_ID: AWS credentials + - CC_AWS_SECRET_ACCESS_KEY: AWS credentials + - CC_AWS_DEFAULT_REGION: the region the bucket is in + - CC_AWS_S3_BUCKET: the bucket name to use + - CC_S3_ENDPOINT: the AWS S3 endpoint for the bucket + - CC_EVENT_NUMBER: CC event number to use + - CC_ROOT: The root prefix on S3 where the payload will be stored in s3://///payload + + Optional: + - CC_S3_MOCK: True or False. If true, bucket will be mocked + - CC_S3_DISABLE_SSL: True or False. If true, bucket will not use SSL + - CC_S3_FORCE_PATH_STYLE: True or False. If true, bucket will force path style + """ + + def __init__(self): + self.local_root_path = "" + self.bucket = "" + self.root = "" + self.manifest_id = "" + self.store_type = StoreType.S3 + self.aws_s3 = None + self.config = AWSConfig() + self._initialize() + + def _initialize(self): + """Initalizes the class using environment variables + + Raises: + EnvironmentError: if a required env variable is not set + """ + required_env_vars = { + "aws_access_key_id_env_key": environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_ACCESS_KEY_ID, + "aws_secret_access_key_id_env_key": environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_SECRET_ACCESS_KEY, + "aws_region_env_key": environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_DEFAULT_REGION, + "aws_bucket_env_key": environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_S3_BUCKET, + "aws_endpoint_env_key": environment_variables.CC_PROFILE + + "_" + + environment_variables.S3_ENDPOINT, + "manifest_id_env_key": environment_variables.CC_MANIFEST_ID, + "cc_root_env_key": environment_variables.CC_ROOT, + } + for _, value in required_env_vars.items(): + if value not in os.environ: + raise EnvironmentError(f"{value} environment variable not set") + + aws_access_key_id = str( + os.getenv(required_env_vars["aws_access_key_id_env_key"]) + ) + aws_secret_access_key_id = str( + os.getenv(required_env_vars["aws_secret_access_key_id_env_key"]) + ) + # automatically convert to standard format e.g. "us-east-1" + aws_region = ( + str(os.getenv(required_env_vars["aws_region_env_key"])) + .lower() + .replace("_", "-") + ) + aws_bucket = str(os.getenv(required_env_vars["aws_bucket_env_key"])) + aws_endpoint = str(os.getenv(required_env_vars["aws_endpoint_env_key"])) + + optional_env_vars = { + "aws_mock_env_key": environment_variables.CC_PROFILE + + "_" + + environment_variables.S3_MOCK, + "aws_disable_ssl_env_key": environment_variables.CC_PROFILE + + "_" + + environment_variables.S3_DISABLE_SSL, + "aws_force_path_style_env_key": environment_variables.CC_PROFILE + + "_" + + environment_variables.S3_FORCE_PATH_STYLE, + } + + aws_mock = bool(os.getenv(optional_env_vars["aws_mock_env_key"])) + aws_disable_ssl = bool(os.getenv(optional_env_vars["aws_disable_ssl_env_key"])) + aws_force_path_style = bool( + os.getenv(optional_env_vars["aws_force_path_style_env_key"]) + ) + + acfg = AWSConfig( + aws_access_key_id=aws_access_key_id, + aws_secret_access_key_id=aws_secret_access_key_id, + aws_region=aws_region, + aws_bucket=aws_bucket, + aws_endpoint=aws_endpoint, + aws_mock=aws_mock, + aws_disable_ssl=aws_disable_ssl, + aws_force_path_style=aws_force_path_style, + ) + self.config = acfg + + self.aws_s3 = self.create_s3_client(self.config) + + self.store_type = StoreType.S3 + self.manifest_id = str(os.getenv(required_env_vars["manifest_id_env_key"])) + self.local_root_path = constants.LOCAL_ROOT_PATH + self.bucket = self.config.aws_bucket + self.root = str(os.getenv(required_env_vars["cc_root_env_key"])) + + @staticmethod + def create_s3_client(config: AWSConfig): + """Initalize the S3 client using the config settings. When mocked, all config settings are ignored + + Args: + config (AWSConfig): the config settings used to create the s3 client + + Returns: + The boto3 AWS S3 Client object + """ + if config.aws_mock: + return boto3.client("s3") + if config.aws_force_path_style: + client_config = Config( + signature_version="s3v4", s3={"addressing_style": "path"} + ) + else: + client_config = Config(signature_version="s3v4") + s3_client = boto3.client( + "s3", + aws_access_key_id=config.aws_access_key_id, + aws_secret_access_key=config.aws_secret_access_key_id, + region_name=config.aws_region, + endpoint_url=config.aws_endpoint, + use_ssl=not config.aws_disable_ssl, + verify=not config.aws_disable_ssl, + config=client_config, + ) + return s3_client + + def handles_data_store_type(self, data_store_type: StoreType) -> bool: + return self.store_type == data_store_type + + def put_object(self, input: PutObjectInput) -> bool: + """Put an object on S3. Object can be in memory or on disk. + + Args: + input (PutObjectInput): inputs + + Raises: + FileNotFoundError: given file does not exist on disk + IOError: error reading file from disk + + Returns: + bool: True is put is successful + """ + match input.object_state: + case ObjectState.LocalDisk: + # read from local + try: + with open(input.source_path, "rb") as f: + data = f.read() + self.upload_to_s3(input.dest_path, data) + except FileNotFoundError: + # TODO + raise FileNotFoundError + except IOError: + # TODO + raise IOError + return True + + case ObjectState.Memory: + self.upload_to_s3(input.dest_path, input.data) + return True + case _: + return False + + def pull_object(self, input: PullObjectInput) -> bool: + """Pull an object from S3 to a local file path + + Args: + input (PullObjectInput): inputs + + Returns: + bool: True is pull is successful + """ + remote_path = input.source_root_path + "/" + input.file_name + local_path = input.dest_root_path + "/" + input.file_name + if len(input.file_extension) > 0: + remote_path += "." + input.file_extension + local_path += "." + input.file_extension + try: + data = self.download_bytes_from_s3(remote_path) + self.write_input_stream_to_disk(io.BytesIO(data), local_path) + except Exception: + return False + return True + + def get_object(self, input: GetObjectInput) -> bytes: + """Get an object from S3 to memory + + Args: + input (GetObjectInput): inputs + + Returns: + bytes: data from the get request + """ + remote_path = input.source_root_path + "/" + input.file_name + if len(input.file_extension) > 0: + remote_path += "." + input.file_extension + try: + return self.download_bytes_from_s3(remote_path) + except ClientError as e: + raise Exception(str(e)) + + def get_payload(self) -> Payload: + """Get the payload from S3. The payload is always at: + s3://///payload + + Returns: + Payload: the payload object + """ + path = self.root + "/" + self.manifest_id + "/" + constants.PAYLOAD_FILE_NAME + try: + body = self.download_bytes_from_s3(path) + return self.read_json_model_payload_from_bytes(body) + except ClientError as e: + raise Exception(str(e)) + + @staticmethod + def read_json_model_payload_from_bytes(data: bytes) -> Payload: + """Helper method to decode the JSON to a Payload object""" + try: + return Payload.from_json(data.decode("utf-8")) + except Exception as e: + raise e + + def write_input_stream_to_disk( + self, input_stream: io.BytesIO, output_destination: str + ) -> None: + directory = os.path.dirname(output_destination) + if not os.path.exists(directory): + os.makedirs(directory) + bytes_data = input_stream.read() + with open(output_destination, "wb") as output_file: + output_file.write(bytes_data) + + def upload_to_s3(self, object_key: str, file_bytes: bytes) -> None: + if self.aws_s3 is not None: + self.aws_s3.put_object(Bucket=self.bucket, Key=object_key, Body=file_bytes) + else: + raise RuntimeError("AWS config not set.") + + def download_bytes_from_s3(self, object_key: str) -> bytes: + if self.aws_s3 is not None: + response = self.aws_s3.get_object(Bucket=self.bucket, Key=object_key) + file_bytes = response["Body"].read() + return file_bytes + else: + raise RuntimeError("AWS config not set.") + + def root_path(self) -> str: + return self.bucket diff --git a/src/cc_sdk/config.py b/src/cc_sdk/config.py new file mode 100644 index 0000000..eec00f3 --- /dev/null +++ b/src/cc_sdk/config.py @@ -0,0 +1,36 @@ +import json +from attrs import define, field, asdict +from .aws_config import AWSConfig +from .validators import validate_homogeneous_list + + +@define(auto_attribs=True) +class Config: + """ + Stores the configurations for various data stores + + Attributes: + - aws_configs : list[AWSConfig] + The configs for AWS + + Raises: + - TypeError: + If the wrong type of object is set for an attribute. + """ + + aws_configs: list[AWSConfig] = field( + validator=[ + lambda instance, attribute, value: validate_homogeneous_list( + instance, attribute, value, AWSConfig + ) + ] + ) + + def serialize(self) -> str: + """ + Serializes the class as a json string + + Returns: + - str: JSON string representation of the attributes + """ + return json.dumps(asdict(self, recurse=True)) diff --git a/src/cc_sdk/constants.py b/src/cc_sdk/constants.py new file mode 100644 index 0000000..1160b4a --- /dev/null +++ b/src/cc_sdk/constants.py @@ -0,0 +1,4 @@ +from typing import Final + +PAYLOAD_FILE_NAME: Final[str] = "payload" +LOCAL_ROOT_PATH: Final[str] = "/data" diff --git a/src/cc_sdk/data_source.py b/src/cc_sdk/data_source.py index baa383c..4585bcf 100644 --- a/src/cc_sdk/data_source.py +++ b/src/cc_sdk/data_source.py @@ -1,6 +1,6 @@ -from attr import define, field, asdict, validators import json -from .validators import validate_string_list +from attr import define, field, asdict, validators +from .validators import validate_homogeneous_list @define(auto_attribs=True, frozen=True) @@ -23,7 +23,7 @@ class DataSource: Raises: - ValueError: - If a non-serializable object is set for the session attribute. + If a non-serializable object is set for the attribute. - TypeError: If the wrong type of object is set for an attribute. - FrozenInstanceError: @@ -35,9 +35,15 @@ class DataSource: store_name: str = field( validator=[validators.instance_of(str)], ) - paths: list[str] = field(validator=[validate_string_list]) + paths: list[str] = field( + validator=[ + lambda instance, attribute, value: validate_homogeneous_list( + instance, attribute, value, str + ) + ] + ) - def serialize(self): + def serialize(self) -> str: """ Serializes the class as a json string diff --git a/src/cc_sdk/data_store.py b/src/cc_sdk/data_store.py index c06831c..6f780bc 100644 --- a/src/cc_sdk/data_store.py +++ b/src/cc_sdk/data_store.py @@ -1,9 +1,26 @@ -from attr import define, field, setters, asdict, validators import json +from typing import Any +from attr import define, field, setters, asdict, validators from .validators import validate_serializable -from .store_type import StoreType, StoreTypeEncoder +from .store_type import StoreType +from .json_encoder import EnumEncoder + + +def convert_store_type(cls, fields): + results = [] + for field in fields: + if field.converter is not None: + results.append(field) + continue + if field.type in {StoreType, "store_type"}: + converter = lambda s: StoreType.__members__[s] if isinstance(s, str) else s + else: + converter = None + results.append(field.evolve(converter=converter)) + return results + -@define(auto_attribs=True) +@define(auto_attribs=True, field_transformer=convert_store_type) class DataStore: """ A class that represents a data store and its attributes. @@ -48,13 +65,13 @@ class DataStore: ds_profile: str = field( on_setattr=setters.frozen, validator=[validators.instance_of(str)] ) - session: any = field(default=None, validator=[validate_serializable]) + session: Any = field(default=None, validator=[validate_serializable]) - def serialize(self): + def serialize(self) -> str: """ Serializes the class as a json string Returns: - str: JSON string representation of the attributes """ - return json.dumps(asdict(self), cls=StoreTypeEncoder) + return json.dumps(asdict(self), cls=EnumEncoder) diff --git a/src/cc_sdk/environment_variables.py b/src/cc_sdk/environment_variables.py new file mode 100644 index 0000000..1071776 --- /dev/null +++ b/src/cc_sdk/environment_variables.py @@ -0,0 +1,17 @@ +from typing import Final + +CC_MANIFEST_ID: Final[str] = "CC_EVENT_NUMBER" +CC_EVENT_NUMBER: Final[str] = "CC_EVENT_NUMBER" +CC_EVENT_ID: Final[str] = "CC_EVENT_ID" +CC_ROOT: Final[str] = "CC_ROOT" +CC_PLUGIN_DEFINITION: Final[str] = "CC_PLUGIN_DEFINITION" +CC_PROFILE: Final[str] = "CC" +CC_PAYLOAD_FORMATTED: Final[str] = "CC_PAYLOAD_FORMATTED" +AWS_ACCESS_KEY_ID: Final[str] = "AWS_ACCESS_KEY_ID" +AWS_SECRET_ACCESS_KEY: Final[str] = "AWS_SECRET_ACCESS_KEY" +AWS_DEFAULT_REGION: Final[str] = "AWS_DEFAULT_REGION" +AWS_S3_BUCKET: Final[str] = "AWS_S3_BUCKET" +S3_MOCK: Final[str] = "S3_MOCK" +S3_ENDPOINT: Final[str] = "S3_ENDPOINT" +S3_DISABLE_SSL: Final[str] = "S3_DISABLE_SSL" +S3_FORCE_PATH_STYLE: Final[str] = "S3_FORCE_PATH_STYLE" diff --git a/src/cc_sdk/error.py b/src/cc_sdk/error.py new file mode 100644 index 0000000..5975870 --- /dev/null +++ b/src/cc_sdk/error.py @@ -0,0 +1,94 @@ +from enum import Enum +import json +from typing import Final +from attrs import define, field, validators, asdict +from .json_encoder import EnumEncoder +from functools import total_ordering + + +@total_ordering +class ErrorLevel(Enum): + """ + The ErrorLevel class is an enum representing different levels of error. The + following error levels are available: + + DEBUG: a debug statement + INFO: some information + WARN: a warning + ERROR: an error + FATAL: a fatal message + PANIC: a paniced state + DISABLED: no messages will be reported + + The enum has an ordering from least severe to most severe + + The enum values serialize to a string representation of the enum name, + instead of the integer value, to improve readability and prevent errors + when deserializing. + """ + + DEBUG = 0 + INFO = 1 + WARN = 2 + ERROR = 3 + FATAL = 4 + PANIC = 5 + DISABLED = 6 + + def __lt__(self, other): + if isinstance(other, self.__class__): + return self.value < other.value + return NotImplemented + + +# a set of all the enum values in the ErrorLevel enum is namespaced by the +# ErrorLevelOptions class +class ErrorLevelOptions: + all_opts: Final[set[ErrorLevel]] = set(ErrorLevel) + + +def convert_error_level(cls, fields): + results = [] + for field in fields: + if field.converter is not None: + results.append(field) + continue + if field.type in {ErrorLevel, "error_level"}: + converter = lambda s: ErrorLevel.__members__[s] if isinstance(s, str) else s + else: + converter = None + results.append(field.evolve(converter=converter)) + return results + + +@define(auto_attribs=True, frozen=True, field_transformer=convert_error_level) +class Error: + """ + A class that represents an error for the logger. + + Attributes: + - error : str + The error message as a string. readonly + - error_level : ErrorLevel + The error level of the error. readonly + + Methods: + - serialize(): Returns a JSON string representation of the attributes. + + Raises: + - TypeError: + If the wrong type of object is set for an attribute. + - FrozenInstanceError: + If any attribute is written to. + """ + + error: str = field(validator=[validators.instance_of(str)]) + error_level: ErrorLevel = field(validator=[validators.instance_of(ErrorLevel)]) + + def serialize(self) -> str: + """ + Serializes the class as a json string + Returns: + - str: JSON string representation of the attributes + """ + return json.dumps(asdict(self), cls=EnumEncoder) diff --git a/src/cc_sdk/file_data_store.py b/src/cc_sdk/file_data_store.py new file mode 100644 index 0000000..8061aec --- /dev/null +++ b/src/cc_sdk/file_data_store.py @@ -0,0 +1,44 @@ +import abc +import io +from typing import Type + + +class FileDataStore(metaclass=abc.ABCMeta): + """A base class for implementing a file data store. + + This class defines a set of abstract methods for modifying data in a file + store. To use this class, you must create a subclass and implement each of + the abstract methods. + + Attributes: + None + + Methods: + - copy(dest_store, src_path, dest_path): copies the specified file from + this file data store to another, returns true on success and false on + failure + - get(path): retrieves a file from the store. Returns a byte stream of + the file. + - put(data, path): puts data from a byte stream into a file in the + store, returns true on success and false on failure + - delete(path): deletes a file from the store, returns true on success + and false on failure. + """ + + @abc.abstractmethod + def copy( + self, dest_store: Type["FileDataStore"], src_path: str, dest_path: str + ) -> bool: + pass + + @abc.abstractmethod + def get(self, path: str) -> io.BytesIO: + pass + + @abc.abstractmethod + def put(self, data: io.BytesIO, path: str) -> bool: + pass + + @abc.abstractmethod + def delete(self, path: str) -> bool: + pass diff --git a/src/cc_sdk/get_object_input.py b/src/cc_sdk/get_object_input.py new file mode 100644 index 0000000..0c90942 --- /dev/null +++ b/src/cc_sdk/get_object_input.py @@ -0,0 +1,35 @@ +from attr import define, field, validators +from .store_type import StoreType + + +@define(auto_attribs=True, frozen=True) +class GetObjectInput: + """ + A class that represents an input to the CCStore.GetObject command. + This command will return bytes in memory. + + Attributes: + - file_name : str + The file name of the object to pull not including its extension. readonly + - file_extension : str + The extension of the file to pull. readonly + - source_store_type : StoreType + The type of data store the object will be pulled from + (does not include file name or extension and must not have a trailing slash). readonly + - source_root_path : string + The source path of the object on the data store + (does not include file name or extension and must not include a trailing slash). readonly + + Raises: + - TypeError: + If the wrong type of object is set for an attribute. + - FrozenInstanceError: + If any readonly attribute is written to. + """ + + file_name: str = field(validator=[validators.instance_of(str)]) + file_extension: str = field(validator=[validators.instance_of(str)]) + source_store_type: StoreType = field( + validator=[validators.instance_of(StoreType)], + ) + source_root_path: str = field(validator=validators.instance_of(str)) diff --git a/src/cc_sdk/json_encoder.py b/src/cc_sdk/json_encoder.py new file mode 100644 index 0000000..4af567c --- /dev/null +++ b/src/cc_sdk/json_encoder.py @@ -0,0 +1,33 @@ +import json +from enum import Enum + + +class EnumEncoder(json.JSONEncoder): + """ + The EnumEncoder is a custom JSON encoder that extends the default json.JSONEncoder class to handle the + serialization of Enum values. + + It overrides the default() method of the JSONEncoder class to handle Enum objects by returning their name + attribute instead of the object itself. This ensures that Enum objects are serialized to a JSON string that + represents their name. + + Usage: + To use this encoder, pass it as the cls argument when calling json.dumps(), as shown below: + + ``` + import json + from .json_encoder import EnumEncoder + + data = {"store_type": StoreType.S3} + json_string = json.dumps(data, cls=EnumEncoder) + ``` + + Raises: + - TypeError: + If an object of an unsupported type is encountered. + """ + + def default(self, obj): + if isinstance(obj, Enum): + return obj.name + return json.JSONEncoder.default(self, obj) diff --git a/src/cc_sdk/logger.py b/src/cc_sdk/logger.py new file mode 100644 index 0000000..a5e40bb --- /dev/null +++ b/src/cc_sdk/logger.py @@ -0,0 +1,81 @@ +from datetime import date +import sys +from attrs import define, field, validators +from .error import ErrorLevel +from .message import Message +from .error import Error +from .status import Status + + +@define(auto_attribs=True) +class Logger: + """ + A class that represents a logger for the sdk. + + Attributes: + - error_level : ErrorLevel + The current error level of the logger. private + - sender : str + The sender of the current log. private + + Methods: + - set_error_level(level): sets the current error level for the logger to + use. Only errors at this level or more severe levels will be reported. + - log_message(message): logs a message from the sdk. Currenly prints + to stdout + - log_error(error): logs an error from the sdk. Currenly prints + to stderr + - report_status(report): logs a status object from the sdk. Currenly prints + to stdout + + Raises: + - TypeError: + If the wrong type of object is set for an attribute. + """ + + _error_level: ErrorLevel = field(validator=[validators.instance_of(ErrorLevel)]) + _sender: str = field(validator=[validators.instance_of(str)]) + + def set_error_level(self, level: ErrorLevel) -> None: + self._error_level = level + + def log_message(self, message: Message) -> None: + today = date.today() + formatted_date = today.strftime("%Y-%m-%d") + line = self._sender + ":" + formatted_date + "\n\t" + message.message + "\n" + sys.stdout.write(line) + + def log_error(self, error: Error) -> None: + if ( + error.error_level >= self._error_level + and self._error_level != ErrorLevel.DISABLED + ): + today = date.today() + formatted_date = today.strftime("%Y-%m-%d") + line = ( + self._sender + + " issues a " + + error.error_level.name + + " error:" + + formatted_date + + "\n\t" + + error.error + + "\n" + ) + sys.stderr.write(line) + + def report_status(self, report: Status) -> None: + today = date.today() + formatted_date = today.strftime("%Y-%m-%d") + line = ( + self._sender + + ":" + + report.status_level.name + + ":" + + formatted_date + + "\n\t" + + str(report.progress) + + " percent complete." + + "\n" + ) + sys.stdout.write(line) diff --git a/src/cc_sdk/message.py b/src/cc_sdk/message.py new file mode 100644 index 0000000..45a3416 --- /dev/null +++ b/src/cc_sdk/message.py @@ -0,0 +1,32 @@ +import json +from attrs import define, field, validators, asdict + + +@define(auto_attribs=True, frozen=True) +class Message: + """ + A class that represents an error for the logger. + + Attributes: + - message : str + The message as a string. readonly + + Methods: + - serialize(): Returns a JSON string representation of the attributes. + + Raises: + - TypeError: + If the wrong type of object is set for an attribute. + - FrozenInstanceError: + If any attribute is written to. + """ + + message: str = field(validator=[validators.instance_of(str)]) + + def serialize(self) -> str: + """ + Serializes the class as a json string + Returns: + - str: JSON string representation of the attributes + """ + return json.dumps(asdict(self)) diff --git a/src/cc_sdk/object_state.py b/src/cc_sdk/object_state.py new file mode 100644 index 0000000..ae0326c --- /dev/null +++ b/src/cc_sdk/object_state.py @@ -0,0 +1,22 @@ +from enum import Enum + + +class ObjectState(Enum): + """ + The ObjectState class is an enum representing storage state of the object. + The following states are available: + + Memory: Object stored in memory + LocalDisk: Object stored on local filesystem + RemoteDisk: TODO + + Each state has an associated integer value. + + The enum values serialize to a string representation of the enum name, + instead of the integer value, to improve readability and prevent errors + when deserializing. + """ + + Memory = 0 + LocalDisk = 1 + # RemoteDisk = 2 diff --git a/src/cc_sdk/payload.py b/src/cc_sdk/payload.py index 7fdb311..f3a3035 100644 --- a/src/cc_sdk/payload.py +++ b/src/cc_sdk/payload.py @@ -1,23 +1,10 @@ -from attr import define, field, setters, asdict, validators import json +from typing import Any, Type +from attr import define, field, setters, asdict, validators from .data_source import DataSource from .data_store import DataStore -from .store_type import StoreTypeEncoder -from .validators import validate_serializable - - -def validate_stores(instance, attribute, value): - if not isinstance(value, list): - raise ValueError(f"stores must be a list of DataStores") - if not all(isinstance(store, DataStore) for store in value): - raise ValueError(f"stores must be a list of DataStores") - - -def validate_sources(instance, attribute, value): - if not isinstance(value, list): - raise ValueError(f"attribute must be a list of DataSources") - if not all(isinstance(ds, DataSource) for ds in value): - raise ValueError(f"attribute must be a list of DataSources") +from .json_encoder import EnumEncoder +from .validators import validate_homogeneous_list, validate_serializable @define(auto_attribs=True) @@ -26,7 +13,7 @@ class Payload: A class that represents a payload for cloud compute. Attributes: - - attributes : dict[str, any] + - attributes : dict[str, Any] A dictionary of attributes for the payload. readonly - stores : list[DataStore] A list of DataStores. @@ -47,28 +34,71 @@ class Payload: If any readonly attribute is written to. """ - attributes: dict[str, any] = field( + attributes: dict[str, Any] = field( on_setattr=setters.frozen, validator=[validators.instance_of(dict), validate_serializable], ) - stores: list[DataStore] = field(validator=[validate_stores]) + stores: list[DataStore] = field( + validator=[ + lambda instance, attribute, value: validate_homogeneous_list( + instance, attribute, value, DataStore + ) + ] + ) inputs: list[DataSource] = field( on_setattr=setters.frozen, - validator=[validate_sources], + validator=[ + lambda instance, attribute, value: validate_homogeneous_list( + instance, attribute, value, DataSource + ) + ], ) outputs: list[DataSource] = field( on_setattr=setters.frozen, - validator=[validate_sources], + validator=[ + lambda instance, attribute, value: validate_homogeneous_list( + instance, attribute, value, DataSource + ) + ], ) - def set_store(self, index: int, store: DataStore): + def set_store(self, index: int, store: DataStore) -> None: self.stores[index] = store - def serialize(self): + def serialize(self) -> str: """ Serializes the class as a json string Returns: - str: JSON string representation of the attributes """ - return json.dumps(asdict(self), cls=StoreTypeEncoder) + return json.dumps(asdict(self, recurse=True), cls=EnumEncoder) + + @staticmethod + def from_json(json_str: str): + """ + Converts a JSON string to a Payload object. + + Args: + json_str (str): The JSON string to convert. + + Returns: + Payload: The deserialized Payload object. + + Raises: + JSONDecodeError: If the JSON string cannot be decoded. + + Example: + >>> json_str = '{"attributes": {"attr1": "value1", "attr2": 2}, "stores": [{"name": "store1", "id": "store_id1", "parameters": {"param1": "value1"}, "store_type": "S3", "ds_profile": "profile1", "session": null}], "inputs": [{"name": "input1", "id": "input_id1", "store_name": "store1", "paths": ["/path/to/data1"]}], "outputs": [{"name": "output1", "id": "output_id1", "store_name": "store1", "paths": ["/path/to/output1"]}]}' + >>> payload = Payload.from_json(json_str) + """ + json_dict = json.loads(json_str) + stores = [DataStore(**store) for store in json_dict["stores"]] + inputs = [DataSource(**input) for input in json_dict["inputs"]] + outputs = [DataSource(**output) for output in json_dict["outputs"]] + return Payload( + attributes=json_dict["attributes"], + stores=stores, + inputs=inputs, + outputs=outputs, + ) diff --git a/src/cc_sdk/pull_object_input.py b/src/cc_sdk/pull_object_input.py new file mode 100644 index 0000000..30289d7 --- /dev/null +++ b/src/cc_sdk/pull_object_input.py @@ -0,0 +1,35 @@ +from attr import define, field, validators +from .store_type import StoreType + + +@define(auto_attribs=True, frozen=True) +class PullObjectInput: + """ + A class that represents an input to the CCStore.PullObject command. + + Attributes: + - file_name : str + The file name of the object to pull not including its extension. readonly + - file_extension : str + The extension of the file to pull. readonly + - source_store_type : StoreType + The type of data store the object will be pulled from. readonly + - source_root_path : string + The source path of the object on the data store (does not include file name or extension and must not include a trailing slash). readonly + - dest_root_path : string + The destination path of the object (does not include file name or extension and must not include a trailing slash). readonly + + Raises: + - TypeError: + If the wrong type of object is set for an attribute. + - FrozenInstanceError: + If any readonly attribute is written to. + """ + + file_name: str = field(validator=[validators.instance_of(str)]) + file_extension: str = field(validator=[validators.instance_of(str)]) + source_store_type: StoreType = field( + validator=[validators.instance_of(StoreType)], + ) + source_root_path: str = field(validator=validators.instance_of(str)) + dest_root_path: str = field(validator=validators.instance_of(str)) diff --git a/src/cc_sdk/put_object_input.py b/src/cc_sdk/put_object_input.py new file mode 100644 index 0000000..d1db693 --- /dev/null +++ b/src/cc_sdk/put_object_input.py @@ -0,0 +1,42 @@ +from attr import define, field, validators +from .store_type import StoreType +from .object_state import ObjectState + + +@define(auto_attribs=True, frozen=True) +class PutObjectInput: + """ + A class that represents an input to the CCStore.PutObject command. + + Attributes: + - file_name : str + The file name of the object to put not including its extension. readonly + - file_extension : str + The extension of the file to put. readonly + - dest_store_type : StoreType + The type of data store the object will be put in. readonly + - object_state : ObjectState + The storage state of the object. readonly + - data : bytes + The raw bytes of the data. readonly + - source_path : string + The source path of the object (includes file name or extension). readonly + - dest_path : string + The destination path of the object on the data store (includes file name or extension). readonly + + Raises: + - TypeError: + If the wrong type of object is set for an attribute. + - FrozenInstanceError: + If any readonly attribute is written to. + """ + + file_name: str = field(validator=[validators.instance_of(str)]) + file_extension: str = field(validator=[validators.instance_of(str)]) + dest_store_type: StoreType = field( + validator=[validators.instance_of(StoreType)], + ) + object_state: ObjectState = field(validator=validators.instance_of(ObjectState)) + data: bytes = field(validator=validators.instance_of(bytes)) + source_path: str = field(validator=validators.instance_of(str)) + dest_path: str = field(validator=validators.instance_of(str)) diff --git a/src/cc_sdk/seed_set.py b/src/cc_sdk/seed_set.py new file mode 100644 index 0000000..37a5835 --- /dev/null +++ b/src/cc_sdk/seed_set.py @@ -0,0 +1,35 @@ +import json +from attrs import define, field, validators, asdict + + +@define(auto_attribs=True, frozen=True) +class SeedSet: + """ + A class that represents a seed set for a computation. + + Attributes: + - event_seed : int + The seed as an integer. readonly + - realization_seed : int + The realized seed as an integer. readonly + + Methods: + - serialize(): Returns a JSON string representation of the attributes. + + Raises: + - TypeError: + If the wrong type of object is set for an attribute. + - FrozenInstanceError: + If any attribute is written to. + """ + + event_seed: int = field(validator=[validators.instance_of(int)]) + realization_seed: int = field(validator=[validators.instance_of(int)]) + + def serialize(self) -> str: + """ + Serializes the class as a json string + Returns: + - str: JSON string representation of the attributes + """ + return json.dumps(asdict(self)) diff --git a/src/cc_sdk/status.py b/src/cc_sdk/status.py new file mode 100644 index 0000000..3ae21ac --- /dev/null +++ b/src/cc_sdk/status.py @@ -0,0 +1,80 @@ +from enum import Enum +import json +from attrs import define, field, validators, asdict +from .json_encoder import EnumEncoder +from .validators import validate_range + + +class StatusLevel(Enum): + """ + The StatusLevel class is an enum representing different status levels of + a computation. The following status levels are available: + + Computing: currently computing + FAILED: failed to complete + SUCCEEDED: completed successfully + + The enum values serialize to a string representation of the enum name, + instead of the integer value, to improve readability and prevent errors + when deserializing. + """ + + COMPUTING = 0 + FAILED = 1 + SUCCEEDED = 2 + + +def convert_status_level(cls, fields): + results = [] + for field in fields: + if field.converter is not None: + results.append(field) + continue + if field.type in {StatusLevel, "status_level"}: + converter = ( + lambda s: StatusLevel.__members__[s] if isinstance(s, str) else s + ) + else: + converter = None + results.append(field.evolve(converter=converter)) + return results + + +@define(auto_attribs=True, frozen=True, field_transformer=convert_status_level) +class Status: + """ + A class that represents a status for a computation. + + Attributes: + - progress : int + The progress of the computation as a percent 0-100. readonly + - status : StatusLevel + The status level of the computation. readonly + + Methods: + - serialize(): Returns a JSON string representation of the attributes. + + Raises: + - TypeError: + If the wrong type of object is set for an attribute. + - FrozenInstanceError: + If any attribute is written to. + """ + + progress: int = field( + validator=[ + validators.instance_of(int), + lambda instance, attribute, value: validate_range( + instance, attribute, value, 0, 100 + ), + ] + ) + status_level: StatusLevel = field(validator=[validators.instance_of(StatusLevel)]) + + def serialize(self) -> str: + """ + Serializes the class as a json string + Returns: + - str: JSON string representation of the attributes + """ + return json.dumps(asdict(self), cls=EnumEncoder) diff --git a/src/cc_sdk/store_type.py b/src/cc_sdk/store_type.py index 9a783be..577a22d 100644 --- a/src/cc_sdk/store_type.py +++ b/src/cc_sdk/store_type.py @@ -1,5 +1,5 @@ from enum import Enum -import json + class StoreType(Enum): """ @@ -17,33 +17,8 @@ class StoreType(Enum): The enum values serialize to a string representation of the enum name, instead of the integer value, to improve readability and prevent errors when deserializing. """ + S3 = 0 WS = 1 RDBMS = 2 EBS = 3 - -class StoreTypeEncoder(json.JSONEncoder): - """ - The StoreTypeEncoder is a custom JSON encoder that extends the default json.JSONEncoder class to handle the serialization of StoreType Enum values. - - It overrides the default() method of the JSONEncoder class to handle StoreType objects by returning their name attribute instead of the object itself. This ensures that StoreType objects are serialized to a JSON string that represents their name. - - Usage: - To use this encoder, pass it as the cls argument when calling json.dumps(), as shown below: - - ``` - import json - from .store_type import StoreTypeEncoder - - data = {"store_type": StoreType.S3} - json_string = json.dumps(data, cls=StoreTypeEncoder) - ``` - - Raises: - - TypeError: - If an object of an unsupported type is encountered. - """ - def default(self, obj): - if isinstance(obj, Enum): - return obj.name - return json.JSONEncoder.default(self, obj) \ No newline at end of file diff --git a/src/cc_sdk/validators.py b/src/cc_sdk/validators.py index c242fe2..b6aab70 100644 --- a/src/cc_sdk/validators.py +++ b/src/cc_sdk/validators.py @@ -17,16 +17,62 @@ def validate_serializable(instance, attribute, value): Raises: ------- ValueError: - If a non-serializable object is set for the attribute. + If an incorrect value is set for the attribute. """ try: json.dumps(value) except TypeError: - raise ValueError(f"Session attribute must be JSON serializable") + raise ValueError(f"Attributes must be JSON serializable") -def validate_string_list(instance, attribute, value): +def validate_range(instance, attribute, value, lower_bound, upper_bound): + """ + A validator that ensures an value is great than or less than a set of + bounding variables. + + Parameters: + ----------- + instance : class + The instance of the class. + attribute : str + The name of the attribute. + value : any + The value of the attribute. + lower_bound : any + The lower bound on the value of the attribute. + upper_bound : any + The upper bound on the value of the attribute. + + Raises: + ------- + ValueError: + If an incorrect value is set for the attribute. + """ + if value > upper_bound or value < lower_bound: + raise ValueError("Value out of bounds") + + +def validate_homogeneous_list(instance, attribute, value, type): + """ + A validator that ensures an attribute is a list of strings + + Parameters: + ----------- + instance : class + The instance of the class. + attribute : str + The name of the attribute. + value : any + The value of the attribute. + type: Type + The type of the elements of the list. + + Raises: + ------- + ValueError: + If an incorrect value is set for the attribute. + """ if not isinstance(value, list): - raise ValueError(f"paths must be a list of strings") - if not all(isinstance(path, str) for path in value): - raise ValueError(f"paths must be a list of strings") + raise ValueError(f"{str(attribute)} must be a list of {str(type)}") + if not all(isinstance(path, type) for path in value): + raise ValueError(f"{str(attribute)} must be a list of {str(type)}") diff --git a/tests/test_aws_config.py b/tests/test_aws_config.py index 0c5e87f..f965db5 100644 --- a/tests/test_aws_config.py +++ b/tests/test_aws_config.py @@ -1,5 +1,5 @@ +import json import pytest -from attr import asdict from cc_sdk import AWSConfig @@ -60,15 +60,6 @@ def test_setters(aws_config): def test_serialize(aws_config): - expected = { - "aws_config_name": "test", - "aws_access_key_id": "my_access_key", - "aws_secret_access_key_id": "my_secret_key", - "aws_region": "us-west-2", - "aws_bucket": "my_bucket", - "aws_mock": True, - "aws_endpoint": "https://my-endpoint.com", - "aws_disable_ssl": True, - "aws_force_path_style": True, - } - assert asdict(aws_config) == expected + expected_json = '{"aws_config_name": "test", "aws_access_key_id": "my_access_key", "aws_secret_access_key_id": "my_secret_key", "aws_region": "us-west-2", "aws_bucket": "my_bucket", "aws_mock": true, "aws_endpoint": "https://my-endpoint.com", "aws_disable_ssl": true, "aws_force_path_style": true}' + assert aws_config.serialize() == expected_json + assert json.loads(aws_config.serialize()) == json.loads(expected_json) diff --git a/tests/test_cc_store_s3.py b/tests/test_cc_store_s3.py new file mode 100644 index 0000000..3c7c696 --- /dev/null +++ b/tests/test_cc_store_s3.py @@ -0,0 +1,400 @@ +import json +import pytest +from cc_sdk import ( + CCStoreS3, + AWSConfig, + StoreType, + environment_variables, + constants, + PutObjectInput, + PullObjectInput, + GetObjectInput, + ObjectState, + Payload, + DataSource, + DataStore, +) +from unittest.mock import Mock +import tempfile +import os +import shutil +from moto import mock_s3 +import boto3 + + +def test_initialize(monkeypatch): + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_ACCESS_KEY_ID, + "my_access_key", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_SECRET_ACCESS_KEY, + "my_secret_key", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_DEFAULT_REGION, + "us-west-2", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + "_" + environment_variables.AWS_S3_BUCKET, + "my_bucket", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + "_" + environment_variables.S3_ENDPOINT, + "http://localhost:9000", + ) + monkeypatch.setenv(environment_variables.CC_MANIFEST_ID, "my_manifest") + monkeypatch.setenv(environment_variables.CC_ROOT, "my_root") + monkeypatch.setenv( + environment_variables.CC_PROFILE + "_" + environment_variables.S3_MOCK, "True" + ) + + # Set up mock for create_s3_client + mock_create_s3_client = Mock() + monkeypatch.setattr(CCStoreS3, "create_s3_client", mock_create_s3_client) + store = CCStoreS3() + # Assert that create_s3_client was called with the correct arguments + mock_create_s3_client.assert_called_with(store.config) + # Assert that the instance variables were set correctly + assert isinstance(store.config, AWSConfig) + assert store.config.aws_access_key_id == "my_access_key" + assert store.config.aws_secret_access_key_id == "my_secret_key" + assert store.config.aws_region == "us-west-2" + assert store.config.aws_bucket == "my_bucket" + assert store.config.aws_endpoint == "http://localhost:9000" + assert store.manifest_id == "my_manifest" + assert store.local_root_path == constants.LOCAL_ROOT_PATH + assert store.bucket == "my_bucket" + assert store.root == "my_root" + assert store.store_type == StoreType.S3 + + +def test_initialize_missing_required_env_var(): + with pytest.raises(EnvironmentError): + CCStoreS3() + + +@pytest.fixture +def store(monkeypatch): + with mock_s3(): + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_ACCESS_KEY_ID, + "my_access_key", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_SECRET_ACCESS_KEY, + "my_secret_key", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_DEFAULT_REGION, + "us-west-2", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_S3_BUCKET, + "my_bucket", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + "_" + environment_variables.S3_ENDPOINT, + "http://localhost:9000", + ) + monkeypatch.setenv(environment_variables.CC_MANIFEST_ID, "my_manifest") + monkeypatch.setenv(environment_variables.CC_ROOT, "/tmp") + monkeypatch.setenv( + environment_variables.CC_PROFILE + "_" + environment_variables.S3_MOCK, + "True", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.S3_DISABLE_SSL, + "True", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.S3_FORCE_PATH_STYLE, + "False", + ) + + # create a mock S3 client + s3 = boto3.client("s3") + # create a mock S3 bucket + s3.create_bucket(Bucket="my_bucket") + # create and return an instance of the Store class + store = CCStoreS3() + + yield store + response = s3.list_objects_v2(Bucket="my_bucket") + if "Contents" in response: + delete_keys = [{"Key": obj["Key"]} for obj in response["Contents"]] + s3.delete_objects(Bucket="my_bucket", Delete={"Objects": delete_keys}) + s3.delete_bucket(Bucket="my_bucket") + + +def test_handles_data_store_type(store): + assert store.handles_data_store_type(StoreType.S3) is True + + +def test_put_object_local_disk_file_not_found(store): + input_data = { + "file_name": "test_file", + "file_extension": "txt", + "dest_store_type": StoreType.S3, + "object_state": ObjectState.LocalDisk, + "data": bytes(), + "source_path": "/no/file/here", + "dest_path": "place/to/put/file", + } + with pytest.raises(FileNotFoundError): + store.put_object(PutObjectInput(**input_data)) + + +def test_put_object_local_disk_error_reading_file(store): + # Create a temporary file + with tempfile.NamedTemporaryFile(delete=False) as tmp_file: + tmp_file.write(b"Hello, world!") + tmp_file.flush() + # Set the file permissions to read-only + os.chmod(tmp_file.name, 0o400) + + input_data = { + "file_name": os.path.basename(tmp_file.name), + "file_extension": "", + "dest_store_type": StoreType.S3, + "object_state": ObjectState.LocalDisk, + "data": bytes(), + "source_path": os.path.dirname(tmp_file.name), + "dest_path": "place/to/put/file", + } + + # Check that an IOError is raised when `store.put_object` is called + with pytest.raises(IOError): + store.put_object(PutObjectInput(**input_data)) + + # Set the file permissions back to read-write + os.chmod(tmp_file.name, 0o600) + + # Clean up the temporary file + os.remove(tmp_file.name) + + +@pytest.fixture +def temp_dir(): + temp_dir = "/tmp/my_manifest" + if not os.path.exists(temp_dir): + os.makedirs(temp_dir) + yield temp_dir + shutil.rmtree(temp_dir) + + +def test_put_object_local_disk_success(store, temp_dir): + # Create a temporary file + with tempfile.NamedTemporaryFile(dir=temp_dir) as tmp_file: + tmp_file.write(b"Hello, world!") + tmp_file.flush() + object_key = "place/to/put/file/" + os.path.basename(tmp_file.name) + input_data = { + "file_name": os.path.basename(tmp_file.name), + "file_extension": "", + "dest_store_type": StoreType.S3, + "object_state": ObjectState.LocalDisk, + "data": bytes(), + "source_path": tmp_file.name, + "dest_path": object_key, + } + assert store.put_object(PutObjectInput(**input_data)) is True + s3 = CCStoreS3.create_s3_client(store.config) + objects = s3.list_objects_v2(Bucket="my_bucket", Prefix=object_key) + assert any( + obj["Key"] == object_key for obj in objects.get("Contents", []) + ), f"Object '{object_key}' does not exist in bucket '{store.config.bucket}'" + response = s3.get_object(Bucket="my_bucket", Key=object_key) + assert ( + response["Body"].read() == b"Hello, world!" + ), f"Object '{object_key}' in bucket '{store.config.bucket}' has unexpected contents" + + +def test_put_object_memory_success(store): + object_key = "place/to/put/file/" + "memory_put_test" + input_data = { + "file_name": "memory_put_test", + "file_extension": "", + "dest_store_type": StoreType.S3, + "object_state": ObjectState.Memory, + "data": b"Hello, world!", + "source_path": "memory", + "dest_path": object_key, + } + assert store.put_object(PutObjectInput(**input_data)) is True + s3 = CCStoreS3.create_s3_client(store.config) + objects = s3.list_objects_v2(Bucket="my_bucket", Prefix=object_key) + assert any( + obj["Key"] == object_key for obj in objects.get("Contents", []) + ), f"Object '{object_key}' does not exist in bucket '{store.config.bucket}'" + response = s3.get_object(Bucket="my_bucket", Key=object_key) + assert ( + response["Body"].read() == b"Hello, world!" + ), f"Object '{object_key}' in bucket '{store.config.bucket}' has unexpected contents" + + +def test_pull_object_success(store, temp_dir): + # put the object + object_key = "place/to/put/file/" + "memory_put_test" + input_data = { + "file_name": "memory_put_test", + "file_extension": "", + "dest_store_type": StoreType.S3, + "object_state": ObjectState.Memory, + "data": b"Hello, world!", + "source_path": "memory", + "dest_path": object_key, + } + assert store.put_object(PutObjectInput(**input_data)) is True + # pull the object + input_data = { + "file_name": "memory_put_test", + "file_extension": "", + "source_store_type": StoreType.S3, + "source_root_path": "place/to/put/file", + "dest_root_path": temp_dir, + } + assert store.pull_object(PullObjectInput(**input_data)) is True + pulled_filepath = os.path.join(temp_dir, "memory_put_test") + with open(pulled_filepath, "rb") as f: + contents = f.read() + assert ( + contents == b"Hello, world!" + ), f"File at '{pulled_filepath}' has unexpected contents" + + +def test_pull_object_error(store, temp_dir): + # pull the object that doesn't exist + input_data = { + "file_name": "not_a_real_file", + "file_extension": "", + "source_store_type": StoreType.S3, + "source_root_path": "place/to/put/file", + "dest_root_path": temp_dir, + } + assert store.pull_object(PullObjectInput(**input_data)) is False + + +def test_get_object_success(store): + # put the object + object_key = "place/to/put/file/" + "memory_put_test" + input_data = { + "file_name": "memory_put_test", + "file_extension": "", + "dest_store_type": StoreType.S3, + "object_state": ObjectState.Memory, + "data": b"Hello, world!", + "source_path": "memory", + "dest_path": object_key, + } + assert store.put_object(PutObjectInput(**input_data)) is True + # pull the object + input_data = { + "file_name": "memory_put_test", + "file_extension": "", + "source_store_type": StoreType.S3, + "source_root_path": "place/to/put/file", + } + assert store.get_object(GetObjectInput(**input_data)) == b"Hello, world!" + + +def test_get_object_error(store): + with pytest.raises(Exception): + # get the object that doesn't exist + input_data = { + "file_name": "not_a_real_file", + "file_extension": "", + "source_store_type": StoreType.S3, + "source_root_path": "place/to/put/file", + } + _ = store.pull_object(PullObjectInput(**input_data)) + + +@pytest.fixture +def payload(): + return Payload( + attributes={"attr1": "value1", "attr2": 2}, + stores=[ + DataStore( + name="store1", + id="store_id1", + parameters={"param1": "value1"}, + store_type=StoreType.S3, + ds_profile="profile1", + ), + DataStore( + name="store2", + id="store_id2", + parameters={"param2": "value2"}, + store_type=StoreType.S3, + ds_profile="profile2", + ), + ], + inputs=[ + DataSource( + name="input1", + id="input_id1", + store_name="store1", + paths=["/path/to/data1"], + ), + DataSource( + name="input2", + id="input_id2", + store_name="store2", + paths=["/path/to/data2"], + ), + ], + outputs=[ + DataSource( + name="output1", + id="output_id1", + store_name="store1", + paths=["/path/to/output1"], + ), + DataSource( + name="output2", + id="output_id2", + store_name="store2", + paths=["/path/to/output2"], + ), + ], + ) + + +def test_read_json_model_payload_from_bytes(payload): + payload_bytes = b'{"attributes": {"attr1": "value1", "attr2": 2}, "stores": [{"name": "store1", "id": "store_id1", "parameters": {"param1": "value1"}, "store_type": "S3", "ds_profile": "profile1", "session": null}, {"name": "store2", "id": "store_id2", "parameters": {"param2": "value2"}, "store_type": "S3", "ds_profile": "profile2", "session": null}], "inputs": [{"name": "input1", "id": "input_id1", "store_name": "store1", "paths": ["/path/to/data1"]}, {"name": "input2", "id": "input_id2", "store_name": "store2", "paths": ["/path/to/data2"]}], "outputs": [{"name": "output1", "id": "output_id1", "store_name": "store1", "paths": ["/path/to/output1"]}, {"name": "output2", "id": "output_id2", "store_name": "store2", "paths": ["/path/to/output2"]}]}' + assert payload == CCStoreS3.read_json_model_payload_from_bytes(payload_bytes) + + +def test_get_payload(payload, store): + # Create a temporary file for the payload and put on S3 + path = store.root + "/" + store.manifest_id + "/" + constants.PAYLOAD_FILE_NAME + input_data = { + "file_name": constants.PAYLOAD_FILE_NAME, + "file_extension": "", + "dest_store_type": StoreType.S3, + "object_state": ObjectState.Memory, + "data": payload.serialize().encode(), + "source_path": "memory", + "dest_path": path, + } + assert store.put_object(PutObjectInput(**input_data)) is True + assert store.get_payload() == payload diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..69d123f --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,55 @@ +import json +from pytest import fixture +from cc_sdk import AWSConfig, Config + + +@fixture +def config(): + aws_config = AWSConfig( + aws_config_name="test", + aws_access_key_id="my_access_key", + aws_secret_access_key_id="my_secret_key", + aws_region="us-west-2", + aws_bucket="my_bucket", + aws_mock=True, + aws_endpoint="https://my-endpoint.com", + aws_disable_ssl=True, + aws_force_path_style=True, + ) + return Config(aws_configs=[aws_config]) + + +def test_getters(config): + aws_config = AWSConfig( + aws_config_name="test", + aws_access_key_id="my_access_key", + aws_secret_access_key_id="my_secret_key", + aws_region="us-west-2", + aws_bucket="my_bucket", + aws_mock=True, + aws_endpoint="https://my-endpoint.com", + aws_disable_ssl=True, + aws_force_path_style=True, + ) + assert config.aws_configs == [aws_config] + + +def test_setters(config): + aws_config = AWSConfig( + aws_config_name="test", + aws_access_key_id="my_access_key", + aws_secret_access_key_id="my_secret_key", + aws_region="us-west-2", + aws_bucket="my_bucket", + aws_mock=True, + aws_endpoint="https://my-endpoint.com", + aws_disable_ssl=True, + aws_force_path_style=True, + ) + config.aws_configs = [aws_config] + + +def test_serialize(config): + expected_json = '{"aws_configs": [{"aws_config_name": "test", "aws_access_key_id": "my_access_key", "aws_secret_access_key_id": "my_secret_key", "aws_region": "us-west-2", "aws_bucket": "my_bucket", "aws_mock": true, "aws_endpoint": "https://my-endpoint.com", "aws_disable_ssl": true, "aws_force_path_style": true}]}' + assert config.serialize() == expected_json + assert json.loads(config.serialize()) == json.loads(expected_json) diff --git a/tests/test_data_source.py b/tests/test_data_source.py index 53e39a5..d0a1655 100644 --- a/tests/test_data_source.py +++ b/tests/test_data_source.py @@ -12,13 +12,6 @@ def data_source(): ) -def test_value_error(): - with pytest.raises(ValueError): - _ = DataSource( - name="test", id="123", store_name="test_store", paths=["path1", object()] - ) - - def test_getters(data_source): assert data_source.name == "test" assert data_source.id == "123" diff --git a/tests/test_data_store.py b/tests/test_data_store.py index 5edad50..afdac9a 100644 --- a/tests/test_data_store.py +++ b/tests/test_data_store.py @@ -14,49 +14,6 @@ def data_store(): ) -def test_type_errors(): - with pytest.raises(TypeError): - _ = DataStore( - name=object(), - id="123", - parameters={"param1": "value1", "param2": "value2"}, - store_type=StoreType.S3, - ds_profile="test_profile", - ) - with pytest.raises(TypeError): - _ = DataStore( - name="test", - id=object(), - parameters={"param1": "value1", "param2": "value2"}, - store_type=StoreType.S3, - ds_profile="test_profile", - ) - with pytest.raises(TypeError): - _ = DataStore( - name="test", - id="123", - parameters=object(), - store_type=StoreType.S3, - ds_profile="test_profile", - ) - with pytest.raises(TypeError): - _ = DataStore( - name="test", - id="123", - parameters={"param1": "value1", "param2": "value2"}, - store_type=object(), - ds_profile="test_profile", - ) - with pytest.raises(TypeError): - _ = DataStore( - name="test", - id="123", - parameters={"param1": "value1", "param2": "value2"}, - store_type=StoreType.S3, - ds_profile=object(), - ) - - def test_getters(data_store): assert data_store.name == "test" assert data_store.id == "123" @@ -90,16 +47,6 @@ def test_setters(data_store): def test_serialize(data_store): - # Serialize the DataStore object - serialized = data_store.serialize() - - # Deserialize the JSON string back into a dictionary - deserialized = json.loads(serialized) - - # Check that the dictionary has the same attribute values as the original DataStore object - assert deserialized["name"] == data_store.name - assert deserialized["id"] == data_store.id - assert deserialized["parameters"] == data_store.parameters - assert deserialized["store_type"] == "S3" - assert deserialized["ds_profile"] == data_store.ds_profile - assert deserialized["session"] == data_store.session + expected_json = '{"name": "test", "id": "123", "parameters": {"param1": "value1", "param2": "value2"}, "store_type": "S3", "ds_profile": "test_profile", "session": null}' + assert data_store.serialize() == expected_json + assert json.loads(data_store.serialize()) == json.loads(expected_json) diff --git a/tests/test_error.py b/tests/test_error.py new file mode 100644 index 0000000..3cfead7 --- /dev/null +++ b/tests/test_error.py @@ -0,0 +1,141 @@ +from cc_sdk.error import Error, ErrorLevel +import json +import pytest +from attr.exceptions import FrozenInstanceError + + +@pytest.fixture +def error(): + return Error("test error", ErrorLevel.DEBUG) + + +def test_getters(error): + assert error.error == "test error" + assert error.error_level == ErrorLevel.DEBUG + + +def test_setters(error): + with pytest.raises(FrozenInstanceError): + error.error = "new error" + + with pytest.raises(FrozenInstanceError): + error.error_level = ErrorLevel.INFO + + +def test_serialize(error): + expected_json = json.dumps({"error": "test error", "error_level": "DEBUG"}) + assert expected_json == error.serialize() + assert json.loads(error.serialize()) == json.loads(expected_json) + +def test_error_level_comparison(): + debug = ErrorLevel.DEBUG + info = ErrorLevel.INFO + warn = ErrorLevel.WARN + error = ErrorLevel.ERROR + fatal = ErrorLevel.FATAL + panic = ErrorLevel.PANIC + disabled = ErrorLevel.DISABLED + + # Test less than + assert debug < info + assert info < warn + assert warn < error + assert error < fatal + assert fatal < panic + assert panic < disabled + + # Test less than or equal + assert debug <= debug + assert debug <= info + assert info <= info + assert info <= warn + assert warn <= warn + assert warn <= error + assert error <= error + assert error <= fatal + assert fatal <= fatal + assert fatal <= panic + assert panic <= panic + assert panic <= disabled + assert disabled <= disabled + + # Test greater than + assert info > debug + assert warn > info + assert error > warn + assert fatal > error + assert panic > fatal + assert disabled > panic + + # Test greater than or equal + assert debug >= debug + assert info >= debug + assert info >= info + assert warn >= info + assert warn >= warn + assert error >= warn + assert error >= error + assert fatal >= error + assert fatal >= fatal + assert panic >= fatal + assert panic >= panic + assert disabled >= disabled + + # Test equality + assert debug == debug + assert info == info + assert warn == warn + assert error == error + assert fatal == fatal + assert panic == panic + assert disabled == disabled + + # Test inequality + assert debug != info + assert debug != warn + assert debug != error + assert debug != fatal + assert debug != panic + assert debug != disabled + + assert info != debug + assert info != warn + assert info != error + assert info != fatal + assert info != panic + assert info != disabled + + assert warn != debug + assert warn != info + assert warn != error + assert warn != fatal + assert warn != panic + assert warn != disabled + + assert error != debug + assert error != info + assert error != warn + assert error != fatal + assert error != panic + assert error != disabled + + assert fatal != debug + assert fatal != info + assert fatal != warn + assert fatal != error + assert fatal != panic + assert fatal != disabled + + assert panic != debug + assert panic != info + assert panic != warn + assert panic != error + assert panic != fatal + assert panic != disabled + + assert disabled != debug + assert disabled != info + assert disabled != warn + assert disabled != error + assert disabled != fatal + assert disabled != panic diff --git a/tests/test_get_object_input.py b/tests/test_get_object_input.py new file mode 100644 index 0000000..9f18454 --- /dev/null +++ b/tests/test_get_object_input.py @@ -0,0 +1,35 @@ +from attr.exceptions import FrozenInstanceError +from pytest import fixture, raises +from cc_sdk.get_object_input import GetObjectInput +from cc_sdk.store_type import StoreType + + +@fixture +def input_obj(): + # Fixture to create a GetObjectInput object with valid arguments + return GetObjectInput( + file_name="test_file", + file_extension=".txt", + source_store_type=StoreType.S3, + source_root_path="/path/to/object", + ) + + +def test_getters(input_obj): + # Test the getters for a GetObjectInput object + assert input_obj.file_name == "test_file" + assert input_obj.file_extension == ".txt" + assert input_obj.source_store_type == StoreType.S3 + assert input_obj.source_root_path == "/path/to/object" + + +def test_setters(input_obj): + # Test that readonly attributes raise a FrozenInstanceError when written to + with raises(FrozenInstanceError): + input_obj.file_name = "new_name" + with raises(FrozenInstanceError): + input_obj.file_extension = ".md" + with raises(FrozenInstanceError): + input_obj.source_store_type = StoreType.S3 + with raises(FrozenInstanceError): + input_obj.source_root_path = "/new/path/to/object" diff --git a/tests/test_logger.py b/tests/test_logger.py new file mode 100644 index 0000000..65ee237 --- /dev/null +++ b/tests/test_logger.py @@ -0,0 +1,48 @@ +import sys +from datetime import date +from io import StringIO +from cc_sdk.logger import Logger +from cc_sdk.message import Message +from cc_sdk.error import Error, ErrorLevel +from cc_sdk.status import Status, StatusLevel + + +def test_logger_set_error_level(): + logger = Logger(ErrorLevel.INFO, "test_sender") + logger.set_error_level(ErrorLevel.WARN) + assert logger._error_level == ErrorLevel.WARN + + +def test_logger_log_message(): + logger = Logger(ErrorLevel.INFO, "test_sender") + message = Message("test message") + expected_output = ( + f"test_sender:{date.today().strftime('%Y-%m-%d')}\n\ttest message\n" + ) + with StringIO() as output: + sys.stdout = output + logger.log_message(message) + assert output.getvalue() == expected_output + sys.stdout = sys.__stdout__ + + +def test_logger_log_error(): + logger = Logger(ErrorLevel.INFO, "test_sender") + error = Error("test error", ErrorLevel.ERROR) + expected_output = f"test_sender issues a ERROR error:{date.today().strftime('%Y-%m-%d')}\n\ttest error\n" + with StringIO() as output: + sys.stderr = output + logger.log_error(error) + assert output.getvalue() == expected_output + sys.stderr = sys.__stderr__ + + +def test_logger_report_status(): + logger = Logger(ErrorLevel.INFO, "test_sender") + status = Status(50, StatusLevel.COMPUTING) + expected_output = f"test_sender:COMPUTING:{date.today().strftime('%Y-%m-%d')}\n\t50 percent complete.\n" + with StringIO() as output: + sys.stdout = output + logger.report_status(status) + assert output.getvalue() == expected_output + sys.stdout = sys.__stdout__ diff --git a/tests/test_message.py b/tests/test_message.py new file mode 100644 index 0000000..086c3d1 --- /dev/null +++ b/tests/test_message.py @@ -0,0 +1,24 @@ +from cc_sdk.message import Message +import json +import pytest +from attr.exceptions import FrozenInstanceError + + +@pytest.fixture +def message(): + return Message("test message") + + +def test_getters(message): + assert message.message == "test message" + + +def test_setters(message): + with pytest.raises(FrozenInstanceError): + message.message = "new message" + + +def test_serialize(message): + expected_json = json.dumps({"message": "test message"}) + assert expected_json == message.serialize() + assert json.loads(message.serialize()) == json.loads(expected_json) diff --git a/tests/test_payload.py b/tests/test_payload.py index 6608ea5..93eb05c 100644 --- a/tests/test_payload.py +++ b/tests/test_payload.py @@ -54,39 +54,28 @@ def payload(): ) -def test_attributes_getter(payload): +def test_getters(payload): + # attributes assert payload.attributes == {"attr1": "value1", "attr2": 2} - - -def test_stores_getter(payload): + # stores assert len(payload.stores) == 2 assert payload.stores[0].name == "store1" assert payload.stores[1].name == "store2" - - -def test_inputs_getter(payload): + # inputs assert len(payload.inputs) == 2 assert payload.inputs[0].name == "input1" assert payload.inputs[1].name == "input2" - - -def test_outputs_getter(payload): + # outputs assert len(payload.outputs) == 2 assert payload.outputs[0].name == "output1" assert payload.outputs[1].name == "output2" -def test_attributes_setter_frozen(payload): +def test_setter_frozen(payload): with pytest.raises(AttributeError): payload.attributes = {"attr1": "new_value1", "attr2": 3} - - -def test_inputs_setter_frozen(payload): with pytest.raises(AttributeError): payload.inputs[0].paths = ["/path/to/new_input"] - - -def test_outputs_setter_frozen(payload): with pytest.raises(AttributeError): payload.outputs[0].paths = ["/path/to/new_output"] @@ -104,6 +93,11 @@ def test_stores_setter(payload): def test_serialize(payload): - serialized = payload.serialize() - expected = '{"attributes": {"attr1": "value1", "attr2": 2}, "stores": [{"name": "store1", "id": "store_id1", "parameters": {"param1": "value1"}, "store_type": "S3", "ds_profile": "profile1", "session": null}, {"name": "store2", "id": "store_id2", "parameters": {"param2": "value2"}, "store_type": "S3", "ds_profile": "profile2", "session": null}], "inputs": [{"name": "input1", "id": "input_id1", "store_name": "store1", "paths": ["/path/to/data1"]}, {"name": "input2", "id": "input_id2", "store_name": "store2", "paths": ["/path/to/data2"]}], "outputs": [{"name": "output1", "id": "output_id1", "store_name": "store1", "paths": ["/path/to/output1"]}, {"name": "output2", "id": "output_id2", "store_name": "store2", "paths": ["/path/to/output2"]}]}' - assert serialized == expected + expected_json = '{"attributes": {"attr1": "value1", "attr2": 2}, "stores": [{"name": "store1", "id": "store_id1", "parameters": {"param1": "value1"}, "store_type": "S3", "ds_profile": "profile1", "session": null}, {"name": "store2", "id": "store_id2", "parameters": {"param2": "value2"}, "store_type": "S3", "ds_profile": "profile2", "session": null}], "inputs": [{"name": "input1", "id": "input_id1", "store_name": "store1", "paths": ["/path/to/data1"]}, {"name": "input2", "id": "input_id2", "store_name": "store2", "paths": ["/path/to/data2"]}], "outputs": [{"name": "output1", "id": "output_id1", "store_name": "store1", "paths": ["/path/to/output1"]}, {"name": "output2", "id": "output_id2", "store_name": "store2", "paths": ["/path/to/output2"]}]}' + assert payload.serialize() == expected_json + assert json.loads(payload.serialize()) == json.loads(expected_json) + + +def test_from_json(payload): + payload_str = '{"attributes": {"attr1": "value1", "attr2": 2}, "stores": [{"name": "store1", "id": "store_id1", "parameters": {"param1": "value1"}, "store_type": "S3", "ds_profile": "profile1", "session": null}, {"name": "store2", "id": "store_id2", "parameters": {"param2": "value2"}, "store_type": "S3", "ds_profile": "profile2", "session": null}], "inputs": [{"name": "input1", "id": "input_id1", "store_name": "store1", "paths": ["/path/to/data1"]}, {"name": "input2", "id": "input_id2", "store_name": "store2", "paths": ["/path/to/data2"]}], "outputs": [{"name": "output1", "id": "output_id1", "store_name": "store1", "paths": ["/path/to/output1"]}, {"name": "output2", "id": "output_id2", "store_name": "store2", "paths": ["/path/to/output2"]}]}' + assert payload == Payload.from_json(payload_str) diff --git a/tests/test_pull_object_input.py b/tests/test_pull_object_input.py new file mode 100644 index 0000000..021eae1 --- /dev/null +++ b/tests/test_pull_object_input.py @@ -0,0 +1,35 @@ +import pytest +from cc_sdk.pull_object_input import PullObjectInput +from cc_sdk.store_type import StoreType + + +@pytest.fixture +def pull_object_input(): + return PullObjectInput( + file_name="test_file", + file_extension=".txt", + source_store_type=StoreType.S3, + source_root_path="/path/to/source", + dest_root_path="/path/to/dest", + ) + + +def test_getters(pull_object_input): + assert pull_object_input.file_name == "test_file" + assert pull_object_input.file_extension == ".txt" + assert pull_object_input.source_store_type == StoreType.S3 + assert pull_object_input.source_root_path == "/path/to/source" + assert pull_object_input.dest_root_path == "/path/to/dest" + + +def test_setters(pull_object_input): + with pytest.raises(AttributeError): + pull_object_input.file_name = "new_file" + with pytest.raises(AttributeError): + pull_object_input.file_extension = ".csv" + with pytest.raises(AttributeError): + pull_object_input.source_store_type = StoreType.EBS + with pytest.raises(AttributeError): + pull_object_input.source_root_path = "/new/source/path" + with pytest.raises(AttributeError): + pull_object_input.dest_root_path = "/new/dest/path" diff --git a/tests/test_put_object_input.py b/tests/test_put_object_input.py new file mode 100644 index 0000000..66f1bb9 --- /dev/null +++ b/tests/test_put_object_input.py @@ -0,0 +1,44 @@ +import pytest +from cc_sdk.put_object_input import PutObjectInput +from cc_sdk.store_type import StoreType +from cc_sdk.object_state import ObjectState + + +@pytest.fixture +def put_object_input(): + return PutObjectInput( + file_name="test", + file_extension="txt", + dest_store_type=StoreType.S3, + object_state=ObjectState.LocalDisk, + data=b"test data", + source_path="/path/to/source", + dest_path="/path/to/destination", + ) + + +def test_getters(put_object_input): + assert put_object_input.file_name == "test" + assert put_object_input.file_extension == "txt" + assert put_object_input.dest_store_type == StoreType.S3 + assert put_object_input.object_state == ObjectState.LocalDisk + assert put_object_input.data == b"test data" + assert put_object_input.source_path == "/path/to/source" + assert put_object_input.dest_path == "/path/to/destination" + + +def test_setters(put_object_input): + with pytest.raises(AttributeError): + put_object_input.file_name = "new_name" + with pytest.raises(AttributeError): + put_object_input.file_extension = "new_ext" + with pytest.raises(AttributeError): + put_object_input.dest_store_type = StoreType.EBS + with pytest.raises(AttributeError): + put_object_input.object_state = ObjectState.Memory + with pytest.raises(AttributeError): + put_object_input.data = b"new data" + with pytest.raises(AttributeError): + put_object_input.source_path = "/new/source/path" + with pytest.raises(AttributeError): + put_object_input.dest_path = "/new/destination/path" diff --git a/tests/test_seed_set.py b/tests/test_seed_set.py new file mode 100644 index 0000000..180718b --- /dev/null +++ b/tests/test_seed_set.py @@ -0,0 +1,27 @@ +from cc_sdk.seed_set import SeedSet +import json +import pytest +from attr.exceptions import FrozenInstanceError + + +@pytest.fixture +def seed_set(): + return SeedSet(123, 321) + + +def test_getters(seed_set): + assert seed_set.event_seed == 123 + assert seed_set.realization_seed == 321 + + +def test_setters(seed_set): + with pytest.raises(FrozenInstanceError): + seed_set.event_seed = 456 + with pytest.raises(FrozenInstanceError): + seed_set.realization_seed = 654 + + +def test_serialize(seed_set): + expected_json = json.dumps({"event_seed": 123, "realization_seed": 321}) + assert expected_json == seed_set.serialize() + assert json.loads(seed_set.serialize()) == json.loads(expected_json) diff --git a/tests/test_status.py b/tests/test_status.py new file mode 100644 index 0000000..37f7adb --- /dev/null +++ b/tests/test_status.py @@ -0,0 +1,35 @@ +from cc_sdk.status import Status, StatusLevel +import json +import pytest +from attr.exceptions import FrozenInstanceError + + +@pytest.fixture +def status(): + return Status(0, StatusLevel.COMPUTING) + + +def test_getters(status): + assert status.progress == 0 + assert status.status_level == StatusLevel.COMPUTING + + +def test_setters(status): + with pytest.raises(FrozenInstanceError): + status.progress = 1 + + with pytest.raises(FrozenInstanceError): + status.status_level = StatusLevel.FAILED + + +def test_bounds(): + with pytest.raises(ValueError): + _ = Status(-1, StatusLevel.COMPUTING) + with pytest.raises(ValueError): + _ = Status(101, StatusLevel.COMPUTING) + + +def test_serialize(status): + expected_json = json.dumps({"progress": 0, "status_level": "COMPUTING"}) + assert expected_json == status.serialize() + assert json.loads(status.serialize()) == json.loads(expected_json) From 8eb1ad0ffe66aeb9cb60993491b6ce3bc514509e Mon Sep 17 00:00:00 2001 From: Brendan Barnes Date: Sun, 19 Mar 2023 21:37:28 +0000 Subject: [PATCH 03/12] save --- .github/workflows/lint_and_test.yml | 41 ++++ .pylintrc | 3 +- README.md | 32 +-- src/cc_sdk/__init__.py | 4 + src/cc_sdk/aws_config.py | 4 +- src/cc_sdk/cc_store.py | 6 +- src/cc_sdk/cc_store_s3.py | 240 ++++++++++++++--------- src/cc_sdk/data_store.py | 22 ++- src/cc_sdk/error.py | 16 +- src/cc_sdk/file_data_store_s3.py | 75 +++++++ src/cc_sdk/get_object_input.py | 4 +- src/cc_sdk/json_encoder.py | 8 +- src/cc_sdk/object_state.py | 4 +- src/cc_sdk/payload.py | 19 +- src/cc_sdk/plugin_manager.py | 290 ++++++++++++++++++++++++++++ src/cc_sdk/pull_object_input.py | 4 +- src/cc_sdk/put_object_input.py | 17 +- src/cc_sdk/status.py | 12 +- src/cc_sdk/validators.py | 18 +- tests/test_aws_config.py | 8 +- tests/test_cc_store_s3.py | 140 +++++++------- tests/test_config.py | 9 +- tests/test_data_source.py | 5 +- tests/test_data_store.py | 15 +- tests/test_error.py | 6 +- tests/test_file_data_store_s3.py | 90 +++++++++ tests/test_get_object_input.py | 2 + tests/test_logger.py | 3 + tests/test_message.py | 4 +- tests/test_payload.py | 22 ++- tests/test_plugin_manager.py | 261 +++++++++++++++++++++++++ tests/test_pull_object_input.py | 2 + tests/test_put_object_input.py | 20 +- tests/test_seed_set.py | 4 +- tests/test_status.py | 4 +- 35 files changed, 1137 insertions(+), 277 deletions(-) create mode 100644 .github/workflows/lint_and_test.yml create mode 100644 src/cc_sdk/file_data_store_s3.py create mode 100644 src/cc_sdk/plugin_manager.py create mode 100644 tests/test_file_data_store_s3.py create mode 100644 tests/test_plugin_manager.py diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml new file mode 100644 index 0000000..36683d5 --- /dev/null +++ b/.github/workflows/lint_and_test.yml @@ -0,0 +1,41 @@ +name: Lint and Test + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.11"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run pytest and pylint + run: | + pytest + pylint --fail-under=9 src/cc_sdk/*.py + + - name: Stop the build if there are any linting errors or test failures + run: | + if [ $? -eq 0 ]; then + echo "All tests and linting passed." + else + echo "There are test failures or linting errors. Aborting the build." >&2 + exit 1 + fi diff --git a/.pylintrc b/.pylintrc index 3db9cc7..2fbd3dc 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,7 +1,6 @@ [FORMAT] max-line-length=120 -disable = missing-docstring +disable = missing-docstring, unnecessary-lambda-assignment min-public-methods=0 -disable=unused-argument # Set the Python path init-hook='import sys; sys.path.append("./src")' \ No newline at end of file diff --git a/README.md b/README.md index e21daf4..a5b4290 100644 --- a/README.md +++ b/README.md @@ -1,32 +1,2 @@ # cc-python-sdk -The Python SDK for developing plugins for Cloud Compute - -## TODO / Questions -1. We need to make the JSON serialzers consistent across all SDKs. Should we use camelCase for all attributes since this is the JavaScript convention? - -2. Will we need to support using an S3 data store with files that cannot fit in memory? We could use multipart upload/downloads instead of reading entire files to memory. - -3. In the Java sdk, pull/put/get methods seem to be writing to predetermined paths, why do we need source/dest paths in the input? In this sdk I've made them read and write to the supplied paths in the input parameter. - -4. Should `put_object()` use source and dest root paths instead of full paths? Currently we are using root paths (directories?) for pull and get - -5. Do we want to support directory paths with trailing slashes? It is not supported now. - -6. What about files with no extension? We need to remove the "." from paths in that case - -7. In writeInputStreamToDisk in the Java SDK, there is a bug if the filepath containes the filename in any parent directory - -```Java - String[] fileparts = outputDestination.split("/"); - String fileName = fileparts[fileparts.length-1]; - String directory = outputDestination.replace(fileName,""); // <- dangerous if any parent directory contains the fileName - - // replace with this - String outputDestination = "path/to/output/destination"; - String directory = new File(outputDestination).getParent(); - - File dir = new File(directory); - if (!dir.exists()) { - dir.mkdirs(); - } -``` \ No newline at end of file +The Python SDK for developing plugins for Cloud Compute \ No newline at end of file diff --git a/src/cc_sdk/__init__.py b/src/cc_sdk/__init__.py index f3ac645..8eb4bf7 100644 --- a/src/cc_sdk/__init__.py +++ b/src/cc_sdk/__init__.py @@ -17,6 +17,8 @@ from .file_data_store import FileDataStore from .cc_store_s3 import CCStoreS3 from .json_encoder import EnumEncoder +from .file_data_store_s3 import FileDataStoreS3 +from .plugin_manager import PluginManager __all__ = [ "DataStore", @@ -41,4 +43,6 @@ "FileDataStore", "CCStoreS3", "EnumEncoder", + "FileDataStoreS3", + "PluginManager", ] diff --git a/src/cc_sdk/aws_config.py b/src/cc_sdk/aws_config.py index f537424..8b4290a 100644 --- a/src/cc_sdk/aws_config.py +++ b/src/cc_sdk/aws_config.py @@ -43,7 +43,9 @@ class AWSConfig: aws_region: str = field(default="", validator=[validators.instance_of(str)]) aws_bucket: str = field(default="", validator=[validators.instance_of(str)]) aws_mock: bool = field(default=False, validator=[validators.instance_of(bool)]) - aws_endpoint: str = field(default="", validator=[validators.instance_of(str)]) + aws_endpoint: str | None = field( + default=None, validator=[validators.instance_of(str | None)] + ) aws_disable_ssl: bool = field( default=False, validator=[validators.instance_of(bool)] ) diff --git a/src/cc_sdk/cc_store.py b/src/cc_sdk/cc_store.py index 29f7af4..3d7852a 100644 --- a/src/cc_sdk/cc_store.py +++ b/src/cc_sdk/cc_store.py @@ -28,15 +28,15 @@ class CCStore(metaclass=abc.ABCMeta): """ @abc.abstractmethod - def put_object(self, input: PutObjectInput) -> bool: + def put_object(self, put_input: PutObjectInput) -> bool: pass @abc.abstractmethod - def pull_object(self, input: PullObjectInput) -> bool: + def pull_object(self, pull_input: PullObjectInput) -> bool: pass @abc.abstractmethod - def get_object(self, input: GetObjectInput) -> bytes: + def get_object(self, get_input: GetObjectInput) -> bytes: pass @abc.abstractmethod diff --git a/src/cc_sdk/cc_store_s3.py b/src/cc_sdk/cc_store_s3.py index f37e300..456a927 100644 --- a/src/cc_sdk/cc_store_s3.py +++ b/src/cc_sdk/cc_store_s3.py @@ -1,5 +1,8 @@ +import io +import os import boto3 from botocore.exceptions import ClientError +from botocore.client import Config from .cc_store import CCStore from .get_object_input import GetObjectInput from .pull_object_input import PullObjectInput @@ -9,11 +12,7 @@ from . import environment_variables from .aws_config import AWSConfig from . import constants -from botocore.client import Config from .object_state import ObjectState -import io -import os -import boto3 class CCStoreS3(CCStore): @@ -27,7 +26,8 @@ class CCStoreS3(CCStore): - CC_AWS_S3_BUCKET: the bucket name to use - CC_S3_ENDPOINT: the AWS S3 endpoint for the bucket - CC_EVENT_NUMBER: CC event number to use - - CC_ROOT: The root prefix on S3 where the payload will be stored in s3://///payload + - CC_ROOT: The root prefix on S3 where the payload will be stored in: + s3://///payload Optional: - CC_S3_MOCK: True or False. If true, bucket will be mocked @@ -51,24 +51,43 @@ def _initialize(self): Raises: EnvironmentError: if a required env variable is not set """ + self.config = self.create_aws_config_from_env() + + self.aws_s3 = self.create_s3_client(self.config) + + self.store_type = StoreType.S3 + manifest_id = os.getenv(environment_variables.CC_MANIFEST_ID) + if manifest_id is None: + raise EnvironmentError( + f"{environment_variables.CC_MANIFEST_ID} environment variable not set" + ) + self.manifest_id = manifest_id + self.local_root_path = constants.LOCAL_ROOT_PATH + self.bucket = self.config.aws_bucket + root = os.getenv(environment_variables.CC_ROOT) + if root is None: + raise EnvironmentError( + f"{environment_variables.CC_ROOT} environment variable not set" + ) + self.root = root + + @staticmethod + def create_aws_config_from_env( + env_prefix=environment_variables.CC_PROFILE, + ) -> AWSConfig: required_env_vars = { - "aws_access_key_id_env_key": environment_variables.CC_PROFILE + "aws_access_key_id_env_key": env_prefix + "_" + environment_variables.AWS_ACCESS_KEY_ID, - "aws_secret_access_key_id_env_key": environment_variables.CC_PROFILE + "aws_secret_access_key_id_env_key": env_prefix + "_" + environment_variables.AWS_SECRET_ACCESS_KEY, - "aws_region_env_key": environment_variables.CC_PROFILE + "aws_region_env_key": env_prefix + "_" + environment_variables.AWS_DEFAULT_REGION, - "aws_bucket_env_key": environment_variables.CC_PROFILE + "aws_bucket_env_key": env_prefix + "_" + environment_variables.AWS_S3_BUCKET, - "aws_endpoint_env_key": environment_variables.CC_PROFILE - + "_" - + environment_variables.S3_ENDPOINT, - "manifest_id_env_key": environment_variables.CC_MANIFEST_ID, - "cc_root_env_key": environment_variables.CC_ROOT, } for _, value in required_env_vars.items(): if value not in os.environ: @@ -87,49 +106,52 @@ def _initialize(self): .replace("_", "-") ) aws_bucket = str(os.getenv(required_env_vars["aws_bucket_env_key"])) - aws_endpoint = str(os.getenv(required_env_vars["aws_endpoint_env_key"])) optional_env_vars = { - "aws_mock_env_key": environment_variables.CC_PROFILE + "aws_endpoint_env_key": env_prefix + "_" - + environment_variables.S3_MOCK, - "aws_disable_ssl_env_key": environment_variables.CC_PROFILE + + environment_variables.S3_ENDPOINT, + "aws_mock_env_key": env_prefix + "_" + environment_variables.S3_MOCK, + "aws_disable_ssl_env_key": env_prefix + "_" + environment_variables.S3_DISABLE_SSL, - "aws_force_path_style_env_key": environment_variables.CC_PROFILE + "aws_force_path_style_env_key": env_prefix + "_" + environment_variables.S3_FORCE_PATH_STYLE, } aws_mock = bool(os.getenv(optional_env_vars["aws_mock_env_key"])) + # endpoint override if mocking is used, may be None in which case default is used + aws_endpoint = os.getenv(optional_env_vars["aws_endpoint_env_key"]) + # disable SSL if mocking is used aws_disable_ssl = bool(os.getenv(optional_env_vars["aws_disable_ssl_env_key"])) + # force path style if mocking is used aws_force_path_style = bool( os.getenv(optional_env_vars["aws_force_path_style_env_key"]) ) - - acfg = AWSConfig( - aws_access_key_id=aws_access_key_id, - aws_secret_access_key_id=aws_secret_access_key_id, - aws_region=aws_region, - aws_bucket=aws_bucket, - aws_endpoint=aws_endpoint, - aws_mock=aws_mock, - aws_disable_ssl=aws_disable_ssl, - aws_force_path_style=aws_force_path_style, - ) - self.config = acfg - - self.aws_s3 = self.create_s3_client(self.config) - - self.store_type = StoreType.S3 - self.manifest_id = str(os.getenv(required_env_vars["manifest_id_env_key"])) - self.local_root_path = constants.LOCAL_ROOT_PATH - self.bucket = self.config.aws_bucket - self.root = str(os.getenv(required_env_vars["cc_root_env_key"])) + if aws_mock: + acfg = AWSConfig( + aws_access_key_id=aws_access_key_id, + aws_secret_access_key_id=aws_secret_access_key_id, + aws_region=aws_region, + aws_bucket=aws_bucket, + aws_endpoint=aws_endpoint, + aws_mock=aws_mock, + aws_disable_ssl=aws_disable_ssl, + aws_force_path_style=aws_force_path_style, + ) + else: + acfg = AWSConfig( + aws_access_key_id=aws_access_key_id, + aws_secret_access_key_id=aws_secret_access_key_id, + aws_region=aws_region, + aws_bucket=aws_bucket, + ) + return acfg @staticmethod def create_s3_client(config: AWSConfig): - """Initalize the S3 client using the config settings. When mocked, all config settings are ignored + """Initalize the S3 client using the config settings. When mocked, optional config settings are used Args: config (AWSConfig): the config settings used to create the s3 client @@ -138,33 +160,38 @@ def create_s3_client(config: AWSConfig): The boto3 AWS S3 Client object """ if config.aws_mock: - return boto3.client("s3") - if config.aws_force_path_style: - client_config = Config( - signature_version="s3v4", s3={"addressing_style": "path"} + if config.aws_force_path_style: + client_config = Config( + signature_version="s3v4", s3={"addressing_style": "path"} + ) + else: + client_config = Config(signature_version="s3v4") + return boto3.client( + "s3", + aws_access_key_id=config.aws_access_key_id, + aws_secret_access_key=config.aws_secret_access_key_id, + region_name=config.aws_region, + endpoint_url=config.aws_endpoint, + use_ssl=not config.aws_disable_ssl, + verify=not config.aws_disable_ssl, + config=client_config, ) - else: - client_config = Config(signature_version="s3v4") s3_client = boto3.client( "s3", aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key_id, region_name=config.aws_region, - endpoint_url=config.aws_endpoint, - use_ssl=not config.aws_disable_ssl, - verify=not config.aws_disable_ssl, - config=client_config, ) return s3_client def handles_data_store_type(self, data_store_type: StoreType) -> bool: return self.store_type == data_store_type - def put_object(self, input: PutObjectInput) -> bool: + def put_object(self, put_input: PutObjectInput) -> bool: """Put an object on S3. Object can be in memory or on disk. Args: - input (PutObjectInput): inputs + put_input (PutObjectInput): inputs Raises: FileNotFoundError: given file does not exist on disk @@ -173,64 +200,83 @@ def put_object(self, input: PutObjectInput) -> bool: Returns: bool: True is put is successful """ - match input.object_state: - case ObjectState.LocalDisk: + # use S3 file path separator convention + remote_path = os.path.join( + put_input.dest_root_path, put_input.file_name + ).replace("\\", "/") + # use local system file path separator convention + local_path = os.path.join(put_input.source_root_path, put_input.file_name) + if len(put_input.file_extension) > 0: + # add extensions if used + remote_path += "." + put_input.file_extension + local_path += "." + put_input.file_extension + match put_input.object_state: + case ObjectState.LOCAL_DISK: # read from local try: - with open(input.source_path, "rb") as f: - data = f.read() - self.upload_to_s3(input.dest_path, data) - except FileNotFoundError: - # TODO - raise FileNotFoundError - except IOError: - # TODO - raise IOError + with open(local_path, "rb") as the_file: + data = the_file.read() + self.upload_to_s3(remote_path, data) + except FileNotFoundError as exc: + raise FileNotFoundError from exc + except IOError as exc: + raise IOError from exc return True - case ObjectState.Memory: - self.upload_to_s3(input.dest_path, input.data) + case ObjectState.MEMORY: + self.upload_to_s3(remote_path, put_input.data) return True case _: return False - def pull_object(self, input: PullObjectInput) -> bool: + def pull_object(self, pull_input: PullObjectInput) -> bool: """Pull an object from S3 to a local file path Args: - input (PullObjectInput): inputs + pull_input (PullObjectInput): inputs Returns: bool: True is pull is successful """ - remote_path = input.source_root_path + "/" + input.file_name - local_path = input.dest_root_path + "/" + input.file_name - if len(input.file_extension) > 0: - remote_path += "." + input.file_extension - local_path += "." + input.file_extension + # use S3 file path separator convention + remote_path = os.path.join( + pull_input.source_root_path, pull_input.file_name + ).replace("\\", "/") + # use local system file path separator convention + local_path = os.path.join(pull_input.dest_root_path, pull_input.file_name) + if len(pull_input.file_extension) > 0: + # add extensions if used + remote_path += "." + pull_input.file_extension + local_path += "." + pull_input.file_extension try: data = self.download_bytes_from_s3(remote_path) self.write_input_stream_to_disk(io.BytesIO(data), local_path) - except Exception: + except ClientError: + return False + except IOError: return False return True - def get_object(self, input: GetObjectInput) -> bytes: + def get_object(self, get_input: GetObjectInput) -> bytes: """Get an object from S3 to memory Args: - input (GetObjectInput): inputs + get_input (GetObjectInput): inputs Returns: bytes: data from the get request """ - remote_path = input.source_root_path + "/" + input.file_name - if len(input.file_extension) > 0: - remote_path += "." + input.file_extension + # use S3 file path separator convention + remote_path = os.path.join( + get_input.source_root_path, get_input.file_name + ).replace("\\", "/") + if len(get_input.file_extension) > 0: + # add extensions if used + remote_path += "." + get_input.file_extension try: return self.download_bytes_from_s3(remote_path) - except ClientError as e: - raise Exception(str(e)) + except ClientError as exc: + raise exc def get_payload(self) -> Payload: """Get the payload from S3. The payload is always at: @@ -239,20 +285,41 @@ def get_payload(self) -> Payload: Returns: Payload: the payload object """ - path = self.root + "/" + self.manifest_id + "/" + constants.PAYLOAD_FILE_NAME + # use S3 file path separator convention + path = os.path.join( + self.root, self.manifest_id, constants.PAYLOAD_FILE_NAME + ).replace("\\", "/") try: body = self.download_bytes_from_s3(path) return self.read_json_model_payload_from_bytes(body) - except ClientError as e: - raise Exception(str(e)) + except ClientError as exc: + raise exc + + def set_payload(self, payload: Payload) -> bool: + """Set the payload on S3. The payload is always at: + s3://///payload + + This is for use in cloud compute, not for use inside a plugin + Returns: + Payload: the payload object + """ + # use S3 file path separator convention + path = os.path.join( + self.root, self.manifest_id, constants.PAYLOAD_FILE_NAME + ).replace("\\", "/") + try: + self.upload_to_s3(path, payload.serialize().encode()) + return True + except ClientError: + return False @staticmethod def read_json_model_payload_from_bytes(data: bytes) -> Payload: """Helper method to decode the JSON to a Payload object""" try: return Payload.from_json(data.decode("utf-8")) - except Exception as e: - raise e + except Exception as exc: + raise exc def write_input_stream_to_disk( self, input_stream: io.BytesIO, output_destination: str @@ -275,8 +342,7 @@ def download_bytes_from_s3(self, object_key: str) -> bytes: response = self.aws_s3.get_object(Bucket=self.bucket, Key=object_key) file_bytes = response["Body"].read() return file_bytes - else: - raise RuntimeError("AWS config not set.") + raise RuntimeError("AWS config not set.") def root_path(self) -> str: return self.bucket diff --git a/src/cc_sdk/data_store.py b/src/cc_sdk/data_store.py index 6f780bc..2098379 100644 --- a/src/cc_sdk/data_store.py +++ b/src/cc_sdk/data_store.py @@ -1,22 +1,22 @@ import json from typing import Any -from attr import define, field, setters, asdict, validators +from attr import define, field, setters, asdict, validators, filters, fields from .validators import validate_serializable from .store_type import StoreType from .json_encoder import EnumEncoder -def convert_store_type(cls, fields): +def convert_store_type(_, all_fields): results = [] - for field in fields: - if field.converter is not None: - results.append(field) + for the_field in all_fields: + if the_field.converter is not None: + results.append(the_field) continue - if field.type in {StoreType, "store_type"}: + if the_field.type in {StoreType, "store_type"}: converter = lambda s: StoreType.__members__[s] if isinstance(s, str) else s else: converter = None - results.append(field.evolve(converter=converter)) + results.append(the_field.evolve(converter=converter)) return results @@ -65,7 +65,7 @@ class DataStore: ds_profile: str = field( on_setattr=setters.frozen, validator=[validators.instance_of(str)] ) - session: Any = field(default=None, validator=[validate_serializable]) + session: Any = field(default=None) def serialize(self) -> str: """ @@ -74,4 +74,8 @@ def serialize(self) -> str: Returns: - str: JSON string representation of the attributes """ - return json.dumps(asdict(self), cls=EnumEncoder) + # do not serialize the session object + return json.dumps( + asdict(self, filter=filters.exclude(fields(DataStore).session)), + cls=EnumEncoder, + ) diff --git a/src/cc_sdk/error.py b/src/cc_sdk/error.py index 5975870..1b90d12 100644 --- a/src/cc_sdk/error.py +++ b/src/cc_sdk/error.py @@ -1,9 +1,9 @@ from enum import Enum import json from typing import Final +from functools import total_ordering from attrs import define, field, validators, asdict from .json_encoder import EnumEncoder -from functools import total_ordering @total_ordering @@ -44,20 +44,20 @@ def __lt__(self, other): # a set of all the enum values in the ErrorLevel enum is namespaced by the # ErrorLevelOptions class class ErrorLevelOptions: - all_opts: Final[set[ErrorLevel]] = set(ErrorLevel) + ALL_OPS: Final[set[ErrorLevel]] = set(ErrorLevel) -def convert_error_level(cls, fields): +def convert_error_level(_, fields): results = [] - for field in fields: - if field.converter is not None: - results.append(field) + for the_field in fields: + if the_field.converter is not None: + results.append(the_field) continue - if field.type in {ErrorLevel, "error_level"}: + if the_field.type in {ErrorLevel, "error_level"}: converter = lambda s: ErrorLevel.__members__[s] if isinstance(s, str) else s else: converter = None - results.append(field.evolve(converter=converter)) + results.append(the_field.evolve(converter=converter)) return results diff --git a/src/cc_sdk/file_data_store_s3.py b/src/cc_sdk/file_data_store_s3.py new file mode 100644 index 0000000..55da3a3 --- /dev/null +++ b/src/cc_sdk/file_data_store_s3.py @@ -0,0 +1,75 @@ +import io +import os +from .file_data_store import FileDataStore +from .store_type import StoreType +from .aws_config import AWSConfig +from .data_store import DataStore +from .cc_store_s3 import CCStoreS3 + + +class FileDataStoreS3(FileDataStore): + S3_ROOT = "root" + + def __init__(self, data_store: DataStore): + self.bucket = "" + self.post_fix = "" + self.store_type = StoreType.S3 + self.aws_s3 = None + self.config = AWSConfig + self._initialize(data_store) + + def _initialize(self, data_store: DataStore): + """Initalizes the class using environment variables + + Raises: + EnvironmentError: if a required env variable is not set + """ + self.config = CCStoreS3.create_aws_config_from_env( + env_prefix=data_store.ds_profile + ) + + self.aws_s3 = CCStoreS3.create_s3_client(self.config) + + self.store_type = StoreType.S3 + self.bucket = self.config.aws_bucket + try: + # TODO post_fix is used as a prefix, should we rename? + self.post_fix = data_store.parameters[self.S3_ROOT] + except KeyError: + # TODO, throw error? + print("Missing S3 Root Paramter. Cannot create the store.") + + def get_object(self, path: str): + return self.download_bytes_from_s3(path) + + def download_bytes_from_s3(self, object_key: str) -> bytes: + # standard file separators, replace \ with / + key = os.path.join(self.post_fix, object_key).replace("\\", "/") + if self.aws_s3 is not None: + response = self.aws_s3.get_object(Bucket=self.bucket, Key=key) + file_bytes = response["Body"].read() + return file_bytes + raise RuntimeError("AWS config not set.") + + def upload_to_s3(self, object_key: str, file_bytes: bytes) -> bool: + if self.aws_s3 is not None: + self.aws_s3.put_object(Bucket=self.bucket, Key=object_key, Body=file_bytes) + return True + return False + + def copy(self, dest_store: FileDataStore, src_path: str, dest_path: str) -> bool: + data = self.get_object(src_path) + return dest_store.put(io.BytesIO(data), dest_path) + + def get(self, path: str) -> io.BytesIO: + return io.BytesIO(self.get_object(path)) + + def put(self, data: io.BytesIO, path: str) -> bool: + return self.upload_to_s3(self.post_fix + "/" + path, data.getvalue()) + + def delete(self, path: str) -> bool: + key = self.post_fix + "/" + path + if self.aws_s3 is not None: + self.aws_s3.delete_object(Bucket=self.bucket, Key=key) + return True + return False diff --git a/src/cc_sdk/get_object_input.py b/src/cc_sdk/get_object_input.py index 0c90942..c564575 100644 --- a/src/cc_sdk/get_object_input.py +++ b/src/cc_sdk/get_object_input.py @@ -15,10 +15,10 @@ class GetObjectInput: The extension of the file to pull. readonly - source_store_type : StoreType The type of data store the object will be pulled from - (does not include file name or extension and must not have a trailing slash). readonly + (does not include file name or extension). readonly - source_root_path : string The source path of the object on the data store - (does not include file name or extension and must not include a trailing slash). readonly + (does not include file name or extension). readonly Raises: - TypeError: diff --git a/src/cc_sdk/json_encoder.py b/src/cc_sdk/json_encoder.py index 4af567c..2415dbe 100644 --- a/src/cc_sdk/json_encoder.py +++ b/src/cc_sdk/json_encoder.py @@ -27,7 +27,7 @@ class EnumEncoder(json.JSONEncoder): If an object of an unsupported type is encountered. """ - def default(self, obj): - if isinstance(obj, Enum): - return obj.name - return json.JSONEncoder.default(self, obj) + def default(self, o): + if isinstance(o, Enum): + return o.name + return json.JSONEncoder.default(self, o) diff --git a/src/cc_sdk/object_state.py b/src/cc_sdk/object_state.py index ae0326c..77600a0 100644 --- a/src/cc_sdk/object_state.py +++ b/src/cc_sdk/object_state.py @@ -17,6 +17,6 @@ class ObjectState(Enum): when deserializing. """ - Memory = 0 - LocalDisk = 1 + MEMORY = 0 + LOCAL_DISK = 1 # RemoteDisk = 2 diff --git a/src/cc_sdk/payload.py b/src/cc_sdk/payload.py index f3a3035..9154922 100644 --- a/src/cc_sdk/payload.py +++ b/src/cc_sdk/payload.py @@ -1,6 +1,6 @@ import json -from typing import Any, Type -from attr import define, field, setters, asdict, validators +from typing import Any +from attr import define, field, setters, asdict, validators, filters, fields from .data_source import DataSource from .data_store import DataStore from .json_encoder import EnumEncoder @@ -63,6 +63,8 @@ class Payload: ) def set_store(self, index: int, store: DataStore) -> None: + # assignment op does work, pylint just doesn't know it + # pylint: disable=unsupported-assignment-operation self.stores[index] = store def serialize(self) -> str: @@ -72,7 +74,14 @@ def serialize(self) -> str: Returns: - str: JSON string representation of the attributes """ - return json.dumps(asdict(self, recurse=True), cls=EnumEncoder) + # TODO, should we serialize to camelCase for attribute names? + # do not serialize DataStore.session + return json.dumps( + asdict( + self, recurse=True, filter=filters.exclude(fields(DataStore).session) + ), + cls=EnumEncoder, + ) @staticmethod def from_json(json_str: str): @@ -88,10 +97,8 @@ def from_json(json_str: str): Raises: JSONDecodeError: If the JSON string cannot be decoded. - Example: - >>> json_str = '{"attributes": {"attr1": "value1", "attr2": 2}, "stores": [{"name": "store1", "id": "store_id1", "parameters": {"param1": "value1"}, "store_type": "S3", "ds_profile": "profile1", "session": null}], "inputs": [{"name": "input1", "id": "input_id1", "store_name": "store1", "paths": ["/path/to/data1"]}], "outputs": [{"name": "output1", "id": "output_id1", "store_name": "store1", "paths": ["/path/to/output1"]}]}' - >>> payload = Payload.from_json(json_str) """ + # TODO, should we expect camelCase for attribute names? json_dict = json.loads(json_str) stores = [DataStore(**store) for store in json_dict["stores"]] inputs = [DataSource(**input) for input in json_dict["inputs"]] diff --git a/src/cc_sdk/plugin_manager.py b/src/cc_sdk/plugin_manager.py new file mode 100644 index 0000000..7288935 --- /dev/null +++ b/src/cc_sdk/plugin_manager.py @@ -0,0 +1,290 @@ +import re +import os +import io +from botocore.exceptions import ClientError +from .cc_store_s3 import CCStoreS3 +from .payload import Payload +from .logger import Logger +from .error import ErrorLevel +from .store_type import StoreType +from .file_data_store_s3 import FileDataStoreS3 +from .file_data_store import FileDataStore +from . import environment_variables +from .data_store import DataStore +from .data_source import DataSource +from .message import Message +from .error import Error +from .status import Status + + +class PluginManager: + """ + The PluginManager class manages plugins and their associated data sources and data stores. It loads and processes + a payload that defines the input and output data sources and data stores to be used by a plugin. It also provides + methods to retrieve files from and store files in the data stores. + + + Methods: + get_payload(self) -> Payload: + Returns the payload object associated with the current plugin. + + get_file_store(self, store_name: str) -> FileDataStore: + Finds the data store with the given name and returns its file data store session object. + + get_store(self, store_name: str) -> DataStore: + Finds the data store with the given name and returns the data store object. + + get_input_data_source(self, name: str) -> DataSource: + Finds the input data source with the given name and returns its data source object. + + get_output_data_source(self, name: str) -> DataSource: + Finds the output data source with the given name and returns its data source object. + + get_input_data_sources(self) -> list[DataSource]: + Returns a list of all input data sources in the payload. + + get_output_data_sources(self) -> list[DataSource]: + Returns a list of all output data sources in the payload. + + get_file(self, data_source: DataSource, path_index: int) -> bytes | None: + Returns the content of a file associated with the specified data source and path index. + + put_file(self, data: bytes, data_source: DataSource, path_index: int) -> bool: + Stores the given data in the file associated with the specified data source and path index. + + file_writer(self, input_stream: io.BytesIO, dest_data_source: DataSource, dest_path_index: int) -> bool: + Stores data from the given input stream in the file associated with the specified data source and path index. + + file_reader(self, data_source: DataSource, path_index: int) -> io.BytesIO: + Returns a stream object that can be used to read the contents of the file associated with the specified data + source and path index. + + file_reader_by_name(self, data_source_name: str, path_index: int) -> io.BytesIO: + Returns a stream object that can be used to read the contents of the file associated with the data source with + the specified name and path index. + + set_log_level(self, level: ErrorLevel) -> None: + Sets the logging level for the current instance. + + log_message(self, message: Message) -> None: + Logs the given message object. + + log_error(self, error: Error) -> None: + Logs the given error object. + + report_progress(self, report: Status) -> None: + Logs a progress report. + + event_number(self) -> int: + Returns the event number associated with the current instance. + + find_data_source(self, name: str, data_sources: list[DataSource]) -> DataSource | None: + Finds and returns the data source object with the given name from the specified list of data sources. + + find_data_store(self, name: str) -> DataStore | None: + Finds and returns the data store object with the given name from the payload stores. + """ + + def __init__(self): + self._pattern = re.compile(r"(?<=\{).+?(?=\})") + sender = os.getenv(environment_variables.CC_PLUGIN_DEFINITION) + if sender is None: + raise EnvironmentError( + f"{environment_variables.CC_PLUGIN_DEFINITION} environment variable not set" + ) + self._logger = Logger(ErrorLevel.DEBUG, sender) + self._cc_store = CCStoreS3() + try: + self._payload: Payload = self._cc_store.get_payload() + # pylint can't determine stores type + # pylint: disable=not-an-iterable + for store in self._payload.stores: + match store.store_type: + case StoreType.S3: + # store is a reference so this updates the payload object + store.session = FileDataStoreS3(store) + case StoreType.WS: + # TODO + pass + case StoreType.RDBMS: + # TODO + pass + case _: + raise RuntimeError("Payload contains invalid StoreType.") + except EnvironmentError as exc: + raise exc + except Exception as exc: + raise RuntimeError( + f"Could not acquire payload file. ERROR: {str(exc)}" + ) from exc + + def _substitute_path_variables(self) -> None: + """ + Substitute placeholders in all input and output paths of the payload. + + For each input and output path in the payload, this function calls the + `_substitute_data_source_path` method to substitute any placeholders + found in the path with their corresponding values. + + This function modifies the input and output paths in-place. + + Returns: + None + """ + # pylint can't determine types from Payload + # pylint: disable=unsubscriptable-object + for i, input_obj in enumerate(self._payload.inputs): + for j, path in enumerate(input_obj.paths): + self._payload.inputs[i].paths[j] = self._substitute_data_source_path( + path + ) + + for i, output_obj in enumerate(self._payload.outputs): + for j, path in enumerate(output_obj.paths): + self._payload.outputs[i].paths[j] = self._substitute_data_source_path( + path + ) + + def _substitute_data_source_path(self, path) -> str: + """ + Substitute placeholders in a data source path with their corresponding values. + + Args: + path (str): A string containing placeholders to substitute. + + Returns: + str: The `path` string with all placeholders substituted with their values. + + Raises: + RuntimeError: If a placeholder refers to a missing attribute in the payload's `attributes` dictionary. + """ + the_match = self._pattern.search(path) + while the_match: + result = the_match.group() + parts = result.split("::", 1) + prefix = parts[0] + if prefix == "ENV": + val = os.getenv(parts[1]) + path = path.replace(result, val) + elif prefix == "ATTR": + try: + # pylint can't determine attributes type + # pylint: disable=unsubscriptable-object + valattr = str(self._payload.attributes[parts[1]]) + path = path.replace(result, valattr) + except KeyError as exc: + raise RuntimeError( + f"Payload attributes has no key {parts[1]}." + ) from exc + the_match = self._pattern.search(path) + return path + + def get_payload(self) -> Payload: + return self._payload + + def get_file_store(self, store_name: str) -> FileDataStore: + data_store = self.find_data_store(store_name) + if data_store is None: + raise RuntimeError(f"DataStore with name '{store_name}' was not found.") + if isinstance(data_store.session, FileDataStore): + return data_store.session + raise RuntimeError("DataStore session object is invalid.") + + def get_store(self, store_name: str) -> DataStore: + data_store = self.find_data_store(store_name) + if data_store is None: + raise RuntimeError(f"DataStore with name '{store_name}' was not found") + return data_store + + def get_input_data_source(self, name: str) -> DataSource: + data_source = self.find_data_source(name, self.get_input_data_sources()) + if data_source is None: + raise RuntimeError(f"Input DataSource with name '{name}' was not found") + return data_source + + def get_output_data_source(self, name: str) -> DataSource: + data_source = self.find_data_source(name, self.get_output_data_sources()) + if data_source is None: + raise RuntimeError(f"Output DataSource with name '{name}' was not found") + return data_source + + def get_input_data_sources(self) -> list[DataSource]: + return self._payload.inputs + + def get_output_data_sources(self) -> list[DataSource]: + return self._payload.outputs + + def get_file(self, data_source: DataSource, path_index: int) -> bytes | None: + store = self.get_file_store(data_source.store_name) + try: + reader = store.get(data_source.paths[path_index]) + data = reader.getvalue() + return data + except ClientError: + return None + except IndexError: + return None + + def put_file(self, data: bytes, data_source: DataSource, path_index: int) -> bool: + store = self.get_file_store(data_source.store_name) + return store.put(io.BytesIO(data), data_source.paths[path_index]) + + def file_writer( + self, + input_stream: io.BytesIO, + dest_data_source: DataSource, + dest_path_index: int, + ) -> bool: + store = self.get_file_store(dest_data_source.store_name) + return store.put(input_stream, dest_data_source.paths[dest_path_index]) + + def file_reader(self, data_source: DataSource, path_index: int) -> io.BytesIO: + store = self.get_file_store(data_source.store_name) + return store.get(data_source.paths[path_index]) + + def file_reader_by_name(self, data_source_name: str, path_index: int) -> io.BytesIO: + data_source = self.find_data_source( + data_source_name, self.get_input_data_sources() + ) + if data_source is None: + raise RuntimeError( + f"Input DataSource with name: '{data_source_name}' not found." + ) + return self.file_reader(data_source, path_index) + + def set_log_level(self, level: ErrorLevel) -> None: + self._logger.set_error_level(level) + + def log_message(self, message: Message) -> None: + self._logger.log_message(message) + + def log_error(self, error: Error) -> None: + self._logger.log_error(error) + + def report_progress(self, report: Status) -> None: + self._logger.report_status(report) + + def event_number(self) -> int: + val = os.getenv(environment_variables.CC_EVENT_NUMBER) + if val is None: + raise EnvironmentError( + f"{environment_variables.CC_EVENT_NUMBER} environment variable not set" + ) + event_number = int(val) + return event_number + + def find_data_source( + self, name: str, data_sources: list[DataSource] + ) -> DataSource | None: + for data_source in data_sources: + if data_source.name.lower() == name.lower(): + return data_source + return None + + def find_data_store(self, name: str) -> DataStore | None: + # pylint can't determine attributes type + # pylint: disable=not-an-iterable + for data_store in self._payload.stores: + if data_store.name.lower() == name.lower(): + return data_store + return None diff --git a/src/cc_sdk/pull_object_input.py b/src/cc_sdk/pull_object_input.py index 30289d7..19a3884 100644 --- a/src/cc_sdk/pull_object_input.py +++ b/src/cc_sdk/pull_object_input.py @@ -15,9 +15,9 @@ class PullObjectInput: - source_store_type : StoreType The type of data store the object will be pulled from. readonly - source_root_path : string - The source path of the object on the data store (does not include file name or extension and must not include a trailing slash). readonly + The source directory path of the object on the data store (does not include file name or extension). readonly. - dest_root_path : string - The destination path of the object (does not include file name or extension and must not include a trailing slash). readonly + The destination directory directory path of the object (does not include file name or extension). readonly Raises: - TypeError: diff --git a/src/cc_sdk/put_object_input.py b/src/cc_sdk/put_object_input.py index d1db693..3fd0693 100644 --- a/src/cc_sdk/put_object_input.py +++ b/src/cc_sdk/put_object_input.py @@ -18,11 +18,12 @@ class PutObjectInput: - object_state : ObjectState The storage state of the object. readonly - data : bytes - The raw bytes of the data. readonly - - source_path : string - The source path of the object (includes file name or extension). readonly - - dest_path : string - The destination path of the object on the data store (includes file name or extension). readonly + The raw bytes of the data. readonly. optional, but required for in memory files. + - source_root_path : string + The source directory path of the object (includes file name or extension). readonly + optional, but required for local disk files. + - dest_root_path : string + The destination directory path of the object on the data store (includes file name or extension). readonly Raises: - TypeError: @@ -37,6 +38,6 @@ class PutObjectInput: validator=[validators.instance_of(StoreType)], ) object_state: ObjectState = field(validator=validators.instance_of(ObjectState)) - data: bytes = field(validator=validators.instance_of(bytes)) - source_path: str = field(validator=validators.instance_of(str)) - dest_path: str = field(validator=validators.instance_of(str)) + dest_root_path: str = field(validator=validators.instance_of(str)) + source_root_path: str = field(default="", validator=validators.instance_of(str)) + data: bytes = field(default=bytes(), validator=validators.instance_of(bytes)) diff --git a/src/cc_sdk/status.py b/src/cc_sdk/status.py index 3ae21ac..d50526b 100644 --- a/src/cc_sdk/status.py +++ b/src/cc_sdk/status.py @@ -24,19 +24,19 @@ class StatusLevel(Enum): SUCCEEDED = 2 -def convert_status_level(cls, fields): +def convert_status_level(_, fields): results = [] - for field in fields: - if field.converter is not None: - results.append(field) + for the_field in fields: + if the_field.converter is not None: + results.append(the_field) continue - if field.type in {StatusLevel, "status_level"}: + if the_field.type in {StatusLevel, "status_level"}: converter = ( lambda s: StatusLevel.__members__[s] if isinstance(s, str) else s ) else: converter = None - results.append(field.evolve(converter=converter)) + results.append(the_field.evolve(converter=converter)) return results diff --git a/src/cc_sdk/validators.py b/src/cc_sdk/validators.py index b6aab70..d493401 100644 --- a/src/cc_sdk/validators.py +++ b/src/cc_sdk/validators.py @@ -1,7 +1,7 @@ import json -def validate_serializable(instance, attribute, value): +def validate_serializable(_instance, _attribute, value): """ A validator that ensures an attribute is JSON serializable. @@ -21,11 +21,11 @@ def validate_serializable(instance, attribute, value): """ try: json.dumps(value) - except TypeError: - raise ValueError(f"Attributes must be JSON serializable") + except TypeError as exc: + raise ValueError("Attributes must be JSON serializable") from exc -def validate_range(instance, attribute, value, lower_bound, upper_bound): +def validate_range(_instance, _attribute, value, lower_bound, upper_bound): """ A validator that ensures an value is great than or less than a set of bounding variables. @@ -52,7 +52,7 @@ def validate_range(instance, attribute, value, lower_bound, upper_bound): raise ValueError("Value out of bounds") -def validate_homogeneous_list(instance, attribute, value, type): +def validate_homogeneous_list(_instance, attribute, value, the_type): """ A validator that ensures an attribute is a list of strings @@ -64,7 +64,7 @@ def validate_homogeneous_list(instance, attribute, value, type): The name of the attribute. value : any The value of the attribute. - type: Type + the_type: Type The type of the elements of the list. Raises: @@ -73,6 +73,6 @@ def validate_homogeneous_list(instance, attribute, value, type): If an incorrect value is set for the attribute. """ if not isinstance(value, list): - raise ValueError(f"{str(attribute)} must be a list of {str(type)}") - if not all(isinstance(path, type) for path in value): - raise ValueError(f"{str(attribute)} must be a list of {str(type)}") + raise ValueError(f"{str(attribute)} must be a list of {str(the_type)}") + if not all(isinstance(path, the_type) for path in value): + raise ValueError(f"{str(attribute)} must be a list of {str(the_type)}") diff --git a/tests/test_aws_config.py b/tests/test_aws_config.py index f965db5..88a11c4 100644 --- a/tests/test_aws_config.py +++ b/tests/test_aws_config.py @@ -2,6 +2,8 @@ import pytest from cc_sdk import AWSConfig +# pylint: disable=redefined-outer-name + @pytest.fixture def aws_config(): @@ -60,6 +62,8 @@ def test_setters(aws_config): def test_serialize(aws_config): - expected_json = '{"aws_config_name": "test", "aws_access_key_id": "my_access_key", "aws_secret_access_key_id": "my_secret_key", "aws_region": "us-west-2", "aws_bucket": "my_bucket", "aws_mock": true, "aws_endpoint": "https://my-endpoint.com", "aws_disable_ssl": true, "aws_force_path_style": true}' - assert aws_config.serialize() == expected_json + expected_json = '{"aws_config_name": "test", "aws_access_key_id": "my_access_key", "aws_secret_access_key_id": \ + "my_secret_key", "aws_region": "us-west-2", "aws_bucket": "my_bucket", "aws_mock": true, "aws_endpoint": \ + "https://my-endpoint.com", "aws_disable_ssl": true, "aws_force_path_style": true}' + assert aws_config.serialize() == json.dumps(json.loads(expected_json)) assert json.loads(aws_config.serialize()) == json.loads(expected_json) diff --git a/tests/test_cc_store_s3.py b/tests/test_cc_store_s3.py index 3c7c696..d2dde28 100644 --- a/tests/test_cc_store_s3.py +++ b/tests/test_cc_store_s3.py @@ -1,4 +1,10 @@ -import json +from unittest.mock import Mock +import tempfile +import os +import shutil +from moto import mock_s3 +import boto3 +from botocore.exceptions import ClientError import pytest from cc_sdk import ( CCStoreS3, @@ -14,12 +20,8 @@ DataSource, DataStore, ) -from unittest.mock import Mock -import tempfile -import os -import shutil -from moto import mock_s3 -import boto3 + +# pylint: disable=redefined-outer-name def test_initialize(monkeypatch): @@ -45,10 +47,6 @@ def test_initialize(monkeypatch): environment_variables.CC_PROFILE + "_" + environment_variables.AWS_S3_BUCKET, "my_bucket", ) - monkeypatch.setenv( - environment_variables.CC_PROFILE + "_" + environment_variables.S3_ENDPOINT, - "http://localhost:9000", - ) monkeypatch.setenv(environment_variables.CC_MANIFEST_ID, "my_manifest") monkeypatch.setenv(environment_variables.CC_ROOT, "my_root") monkeypatch.setenv( @@ -67,7 +65,7 @@ def test_initialize(monkeypatch): assert store.config.aws_secret_access_key_id == "my_secret_key" assert store.config.aws_region == "us-west-2" assert store.config.aws_bucket == "my_bucket" - assert store.config.aws_endpoint == "http://localhost:9000" + assert store.config.aws_endpoint is None assert store.manifest_id == "my_manifest" assert store.local_root_path == constants.LOCAL_ROOT_PATH assert store.bucket == "my_bucket" @@ -107,10 +105,6 @@ def store(monkeypatch): + environment_variables.AWS_S3_BUCKET, "my_bucket", ) - monkeypatch.setenv( - environment_variables.CC_PROFILE + "_" + environment_variables.S3_ENDPOINT, - "http://localhost:9000", - ) monkeypatch.setenv(environment_variables.CC_MANIFEST_ID, "my_manifest") monkeypatch.setenv(environment_variables.CC_ROOT, "/tmp") monkeypatch.setenv( @@ -131,18 +125,20 @@ def store(monkeypatch): ) # create a mock S3 client - s3 = boto3.client("s3") + s3_client = boto3.client("s3") # create a mock S3 bucket - s3.create_bucket(Bucket="my_bucket") + s3_client.create_bucket(Bucket="my_bucket") # create and return an instance of the Store class store = CCStoreS3() yield store - response = s3.list_objects_v2(Bucket="my_bucket") + response = s3_client.list_objects_v2(Bucket="my_bucket") if "Contents" in response: delete_keys = [{"Key": obj["Key"]} for obj in response["Contents"]] - s3.delete_objects(Bucket="my_bucket", Delete={"Objects": delete_keys}) - s3.delete_bucket(Bucket="my_bucket") + s3_client.delete_objects( + Bucket="my_bucket", Delete={"Objects": delete_keys} + ) + s3_client.delete_bucket(Bucket="my_bucket") def test_handles_data_store_type(store): @@ -154,10 +150,10 @@ def test_put_object_local_disk_file_not_found(store): "file_name": "test_file", "file_extension": "txt", "dest_store_type": StoreType.S3, - "object_state": ObjectState.LocalDisk, + "object_state": ObjectState.LOCAL_DISK, "data": bytes(), - "source_path": "/no/file/here", - "dest_path": "place/to/put/file", + "source_root_path": "/no/file/here", + "dest_root_path": "place/to/put/file", } with pytest.raises(FileNotFoundError): store.put_object(PutObjectInput(**input_data)) @@ -168,17 +164,17 @@ def test_put_object_local_disk_error_reading_file(store): with tempfile.NamedTemporaryFile(delete=False) as tmp_file: tmp_file.write(b"Hello, world!") tmp_file.flush() - # Set the file permissions to read-only - os.chmod(tmp_file.name, 0o400) + # Set the file permissions to not readable + os.chmod(tmp_file.name, ~(0o400)) input_data = { "file_name": os.path.basename(tmp_file.name), "file_extension": "", "dest_store_type": StoreType.S3, - "object_state": ObjectState.LocalDisk, + "object_state": ObjectState.LOCAL_DISK, "data": bytes(), - "source_path": os.path.dirname(tmp_file.name), - "dest_path": "place/to/put/file", + "source_root_path": os.path.dirname(tmp_file.name), + "dest_root_path": "place/to/put/file", } # Check that an IOError is raised when `store.put_object` is called @@ -211,41 +207,42 @@ def test_put_object_local_disk_success(store, temp_dir): "file_name": os.path.basename(tmp_file.name), "file_extension": "", "dest_store_type": StoreType.S3, - "object_state": ObjectState.LocalDisk, + "object_state": ObjectState.LOCAL_DISK, "data": bytes(), - "source_path": tmp_file.name, - "dest_path": object_key, + "source_root_path": os.path.dirname(tmp_file.name), + "dest_root_path": "place/to/put/file", } assert store.put_object(PutObjectInput(**input_data)) is True - s3 = CCStoreS3.create_s3_client(store.config) - objects = s3.list_objects_v2(Bucket="my_bucket", Prefix=object_key) + s3_client = CCStoreS3.create_s3_client(store.config) + objects = s3_client.list_objects_v2(Bucket="my_bucket", Prefix=object_key) assert any( obj["Key"] == object_key for obj in objects.get("Contents", []) ), f"Object '{object_key}' does not exist in bucket '{store.config.bucket}'" - response = s3.get_object(Bucket="my_bucket", Key=object_key) + response = s3_client.get_object(Bucket="my_bucket", Key=object_key) assert ( response["Body"].read() == b"Hello, world!" ), f"Object '{object_key}' in bucket '{store.config.bucket}' has unexpected contents" def test_put_object_memory_success(store): - object_key = "place/to/put/file/" + "memory_put_test" + dest_dir = "place/to/put/file" input_data = { "file_name": "memory_put_test", "file_extension": "", "dest_store_type": StoreType.S3, - "object_state": ObjectState.Memory, + "object_state": ObjectState.MEMORY, "data": b"Hello, world!", - "source_path": "memory", - "dest_path": object_key, + "source_root_path": "", + "dest_root_path": dest_dir, } + object_key = dest_dir + "/" + "memory_put_test" assert store.put_object(PutObjectInput(**input_data)) is True - s3 = CCStoreS3.create_s3_client(store.config) - objects = s3.list_objects_v2(Bucket="my_bucket", Prefix=object_key) + s3_client = CCStoreS3.create_s3_client(store.config) + objects = s3_client.list_objects_v2(Bucket="my_bucket", Prefix=object_key) assert any( obj["Key"] == object_key for obj in objects.get("Contents", []) ), f"Object '{object_key}' does not exist in bucket '{store.config.bucket}'" - response = s3.get_object(Bucket="my_bucket", Key=object_key) + response = s3_client.get_object(Bucket="my_bucket", Key=object_key) assert ( response["Body"].read() == b"Hello, world!" ), f"Object '{object_key}' in bucket '{store.config.bucket}' has unexpected contents" @@ -253,15 +250,15 @@ def test_put_object_memory_success(store): def test_pull_object_success(store, temp_dir): # put the object - object_key = "place/to/put/file/" + "memory_put_test" + dest_dir = "place/to/put/file" input_data = { "file_name": "memory_put_test", "file_extension": "", "dest_store_type": StoreType.S3, - "object_state": ObjectState.Memory, + "object_state": ObjectState.MEMORY, "data": b"Hello, world!", - "source_path": "memory", - "dest_path": object_key, + "source_root_path": "", + "dest_root_path": dest_dir, } assert store.put_object(PutObjectInput(**input_data)) is True # pull the object @@ -274,8 +271,8 @@ def test_pull_object_success(store, temp_dir): } assert store.pull_object(PullObjectInput(**input_data)) is True pulled_filepath = os.path.join(temp_dir, "memory_put_test") - with open(pulled_filepath, "rb") as f: - contents = f.read() + with open(pulled_filepath, "rb") as the_file: + contents = the_file.read() assert ( contents == b"Hello, world!" ), f"File at '{pulled_filepath}' has unexpected contents" @@ -295,15 +292,15 @@ def test_pull_object_error(store, temp_dir): def test_get_object_success(store): # put the object - object_key = "place/to/put/file/" + "memory_put_test" + dest_dir = "place/to/put/file" input_data = { "file_name": "memory_put_test", "file_extension": "", "dest_store_type": StoreType.S3, - "object_state": ObjectState.Memory, + "object_state": ObjectState.MEMORY, "data": b"Hello, world!", - "source_path": "memory", - "dest_path": object_key, + "source_root_path": "", + "dest_root_path": dest_dir, } assert store.put_object(PutObjectInput(**input_data)) is True # pull the object @@ -317,15 +314,15 @@ def test_get_object_success(store): def test_get_object_error(store): - with pytest.raises(Exception): - # get the object that doesn't exist - input_data = { - "file_name": "not_a_real_file", - "file_extension": "", - "source_store_type": StoreType.S3, - "source_root_path": "place/to/put/file", - } - _ = store.pull_object(PullObjectInput(**input_data)) + # pull the object that doesn't exist + input_data = { + "file_name": "not_a_real_file", + "file_extension": "", + "source_store_type": StoreType.S3, + "source_root_path": "no/file/here", + } + with pytest.raises(ClientError): + store.get_object(GetObjectInput(**input_data)) @pytest.fixture @@ -380,21 +377,32 @@ def payload(): def test_read_json_model_payload_from_bytes(payload): - payload_bytes = b'{"attributes": {"attr1": "value1", "attr2": 2}, "stores": [{"name": "store1", "id": "store_id1", "parameters": {"param1": "value1"}, "store_type": "S3", "ds_profile": "profile1", "session": null}, {"name": "store2", "id": "store_id2", "parameters": {"param2": "value2"}, "store_type": "S3", "ds_profile": "profile2", "session": null}], "inputs": [{"name": "input1", "id": "input_id1", "store_name": "store1", "paths": ["/path/to/data1"]}, {"name": "input2", "id": "input_id2", "store_name": "store2", "paths": ["/path/to/data2"]}], "outputs": [{"name": "output1", "id": "output_id1", "store_name": "store1", "paths": ["/path/to/output1"]}, {"name": "output2", "id": "output_id2", "store_name": "store2", "paths": ["/path/to/output2"]}]}' + payload_bytes = b'{"attributes": {"attr1": "value1", "attr2": 2}, "stores": [{"name": "store1", "id": "store_id1", \ + "parameters": {"param1": "value1"}, "store_type": "S3", "ds_profile": "profile1", "session": null}, \ + {"name": "store2", "id": "store_id2", "parameters": {"param2": "value2"}, "store_type": "S3", \ + "ds_profile": "profile2", "session": null}], "inputs": [{"name": "input1", "id": "input_id1", \ + "store_name": "store1", "paths": ["/path/to/data1"]}, {"name": "input2", "id": "input_id2", \ + "store_name": "store2", "paths": ["/path/to/data2"]}], "outputs": [{"name": "output1", "id": \ + "output_id1", "store_name": "store1", "paths": ["/path/to/output1"]}, {"name": "output2", \ + "id": "output_id2", "store_name": "store2", "paths": ["/path/to/output2"]}]}' assert payload == CCStoreS3.read_json_model_payload_from_bytes(payload_bytes) +def test_set_payload(payload, store): + assert store.set_payload(payload) is True + + def test_get_payload(payload, store): # Create a temporary file for the payload and put on S3 - path = store.root + "/" + store.manifest_id + "/" + constants.PAYLOAD_FILE_NAME + path = store.root + "/" + store.manifest_id input_data = { "file_name": constants.PAYLOAD_FILE_NAME, "file_extension": "", "dest_store_type": StoreType.S3, - "object_state": ObjectState.Memory, + "object_state": ObjectState.MEMORY, "data": payload.serialize().encode(), - "source_path": "memory", - "dest_path": path, + "source_root_path": "", + "dest_root_path": path, } assert store.put_object(PutObjectInput(**input_data)) is True assert store.get_payload() == payload diff --git a/tests/test_config.py b/tests/test_config.py index 69d123f..6439b7b 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -2,6 +2,8 @@ from pytest import fixture from cc_sdk import AWSConfig, Config +# pylint: disable=redefined-outer-name + @fixture def config(): @@ -50,6 +52,9 @@ def test_setters(config): def test_serialize(config): - expected_json = '{"aws_configs": [{"aws_config_name": "test", "aws_access_key_id": "my_access_key", "aws_secret_access_key_id": "my_secret_key", "aws_region": "us-west-2", "aws_bucket": "my_bucket", "aws_mock": true, "aws_endpoint": "https://my-endpoint.com", "aws_disable_ssl": true, "aws_force_path_style": true}]}' - assert config.serialize() == expected_json + expected_json = '{"aws_configs": [{"aws_config_name": "test", "aws_access_key_id": "my_access_key", \ + "aws_secret_access_key_id": "my_secret_key", "aws_region": "us-west-2", "aws_bucket": "my_bucket", \ + "aws_mock": true, "aws_endpoint": "https://my-endpoint.com", "aws_disable_ssl": true, \ + "aws_force_path_style": true}]}' + assert config.serialize() == json.dumps(json.loads(expected_json)) assert json.loads(config.serialize()) == json.loads(expected_json) diff --git a/tests/test_data_source.py b/tests/test_data_source.py index d0a1655..e6762ef 100644 --- a/tests/test_data_source.py +++ b/tests/test_data_source.py @@ -1,9 +1,10 @@ -import pytest import json - +import pytest from attr.exceptions import FrozenInstanceError from cc_sdk import DataSource +# pylint: disable=redefined-outer-name + @pytest.fixture def data_source(): diff --git a/tests/test_data_store.py b/tests/test_data_store.py index afdac9a..f5af4bd 100644 --- a/tests/test_data_store.py +++ b/tests/test_data_store.py @@ -1,6 +1,8 @@ +import json import pytest from cc_sdk import DataStore, StoreType -import json + +# pylint: disable=redefined-outer-name @pytest.fixture @@ -39,14 +41,9 @@ def test_setters(data_store): with pytest.raises(AttributeError): data_store.ds_profile = "new_test_profile" - with pytest.raises(ValueError): - data_store.session = object() # non-serializable object - - data_store.session = {"key": "value"} # serializable object - assert data_store.session == {"key": "value"} - def test_serialize(data_store): - expected_json = '{"name": "test", "id": "123", "parameters": {"param1": "value1", "param2": "value2"}, "store_type": "S3", "ds_profile": "test_profile", "session": null}' - assert data_store.serialize() == expected_json + expected_json = '{"name": "test", "id": "123", "parameters": {"param1": "value1", "param2": "value2"}, \ + "store_type": "S3", "ds_profile": "test_profile"}' + assert data_store.serialize() == json.dumps(json.loads(expected_json)) assert json.loads(data_store.serialize()) == json.loads(expected_json) diff --git a/tests/test_error.py b/tests/test_error.py index 3cfead7..a20585f 100644 --- a/tests/test_error.py +++ b/tests/test_error.py @@ -1,7 +1,9 @@ -from cc_sdk.error import Error, ErrorLevel import json import pytest from attr.exceptions import FrozenInstanceError +from cc_sdk.error import Error, ErrorLevel + +# pylint: disable=redefined-outer-name @pytest.fixture @@ -27,7 +29,9 @@ def test_serialize(error): assert expected_json == error.serialize() assert json.loads(error.serialize()) == json.loads(expected_json) + def test_error_level_comparison(): + # pylint: disable=too-many-statements, comparison-with-itself debug = ErrorLevel.DEBUG info = ErrorLevel.INFO warn = ErrorLevel.WARN diff --git a/tests/test_file_data_store_s3.py b/tests/test_file_data_store_s3.py new file mode 100644 index 0000000..63bb936 --- /dev/null +++ b/tests/test_file_data_store_s3.py @@ -0,0 +1,90 @@ +import io +import pytest +from moto import mock_s3 +import boto3 +from cc_sdk import ( + FileDataStoreS3, + StoreType, + environment_variables, + DataStore, +) + +# pylint: disable=redefined-outer-name + + +@pytest.fixture +def file_data_store(monkeypatch): + with mock_s3(): + data_store = DataStore( + name="testname", + id="testid", + parameters={"root": "testroot"}, + store_type=StoreType.S3, + ds_profile="testprofile", + ) + monkeypatch.setenv( + "testprofile" + "_" + environment_variables.AWS_ACCESS_KEY_ID, + "my_access_key", + ) + monkeypatch.setenv( + "testprofile" + "_" + environment_variables.AWS_SECRET_ACCESS_KEY, + "my_secret_key", + ) + monkeypatch.setenv( + "testprofile" + "_" + environment_variables.AWS_DEFAULT_REGION, + "us-west-2", + ) + monkeypatch.setenv( + "testprofile" + "_" + environment_variables.AWS_S3_BUCKET, + "my_bucket", + ) + monkeypatch.setenv( + "testprofile" + "_" + environment_variables.S3_MOCK, + "True", + ) + monkeypatch.setenv( + "testprofile" + "_" + environment_variables.S3_DISABLE_SSL, + "True", + ) + monkeypatch.setenv( + "testprofile" + "_" + environment_variables.S3_FORCE_PATH_STYLE, + "False", + ) + + # create a mock S3 client + s3_client = boto3.client("s3") + # create a mock S3 bucket + s3_client.create_bucket(Bucket="my_bucket") + + file_data_store = FileDataStoreS3(data_store) + + yield file_data_store + response = s3_client.list_objects_v2(Bucket="my_bucket") + if "Contents" in response: + delete_keys = [{"Key": obj["Key"]} for obj in response["Contents"]] + s3_client.delete_objects( + Bucket="my_bucket", Delete={"Objects": delete_keys} + ) + s3_client.delete_bucket(Bucket="my_bucket") + + +def test_put(file_data_store): + assert file_data_store.put(io.BytesIO(b"Hello"), "test") is True + + +def test_get(file_data_store): + file_data_store.put(io.BytesIO(b"Hello"), "test") + assert file_data_store.get("test").getvalue() == b"Hello" + + +def test_copy(file_data_store): + file_data_store.put(io.BytesIO(b"Hello"), "test") + file_data_store.copy(file_data_store, "test", "testcopy") + assert file_data_store.get("testcopy").getvalue() == b"Hello" + + +def test_delete(file_data_store): + assert file_data_store.put(io.BytesIO(b"Hello"), "test") is True + file_data_store.delete("test") + with pytest.raises(Exception): + file_data_store.get("test") diff --git a/tests/test_get_object_input.py b/tests/test_get_object_input.py index 9f18454..c92c3bc 100644 --- a/tests/test_get_object_input.py +++ b/tests/test_get_object_input.py @@ -3,6 +3,8 @@ from cc_sdk.get_object_input import GetObjectInput from cc_sdk.store_type import StoreType +# pylint: disable=redefined-outer-name + @fixture def input_obj(): diff --git a/tests/test_logger.py b/tests/test_logger.py index 65ee237..d80bf9b 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -6,8 +6,11 @@ from cc_sdk.error import Error, ErrorLevel from cc_sdk.status import Status, StatusLevel +# pylint: disable=redefined-outer-name + def test_logger_set_error_level(): + # pylint: disable=protected-access logger = Logger(ErrorLevel.INFO, "test_sender") logger.set_error_level(ErrorLevel.WARN) assert logger._error_level == ErrorLevel.WARN diff --git a/tests/test_message.py b/tests/test_message.py index 086c3d1..09eb3a9 100644 --- a/tests/test_message.py +++ b/tests/test_message.py @@ -1,7 +1,9 @@ -from cc_sdk.message import Message import json import pytest from attr.exceptions import FrozenInstanceError +from cc_sdk.message import Message + +# pylint: disable=redefined-outer-name @pytest.fixture diff --git a/tests/test_payload.py b/tests/test_payload.py index 93eb05c..b47e02f 100644 --- a/tests/test_payload.py +++ b/tests/test_payload.py @@ -2,6 +2,8 @@ import pytest from cc_sdk import Payload, DataSource, DataStore, StoreType +# pylint: disable=redefined-outer-name + @pytest.fixture def payload(): @@ -93,11 +95,25 @@ def test_stores_setter(payload): def test_serialize(payload): - expected_json = '{"attributes": {"attr1": "value1", "attr2": 2}, "stores": [{"name": "store1", "id": "store_id1", "parameters": {"param1": "value1"}, "store_type": "S3", "ds_profile": "profile1", "session": null}, {"name": "store2", "id": "store_id2", "parameters": {"param2": "value2"}, "store_type": "S3", "ds_profile": "profile2", "session": null}], "inputs": [{"name": "input1", "id": "input_id1", "store_name": "store1", "paths": ["/path/to/data1"]}, {"name": "input2", "id": "input_id2", "store_name": "store2", "paths": ["/path/to/data2"]}], "outputs": [{"name": "output1", "id": "output_id1", "store_name": "store1", "paths": ["/path/to/output1"]}, {"name": "output2", "id": "output_id2", "store_name": "store2", "paths": ["/path/to/output2"]}]}' - assert payload.serialize() == expected_json + expected_json = '{"attributes": {"attr1": "value1", "attr2": 2}, "stores": [{"name": "store1", "id": "store_id1", \ + "parameters": {"param1": "value1"}, "store_type": "S3", "ds_profile": "profile1"}, {"name": \ + "store2", "id": "store_id2", "parameters": {"param2": "value2"}, "store_type": "S3", "ds_profile": \ + "profile2"}], "inputs": [{"name": "input1", "id": "input_id1", "store_name": \ + "store1", "paths": ["/path/to/data1"]}, {"name": "input2", "id": "input_id2", "store_name": \ + "store2", "paths": ["/path/to/data2"]}], "outputs": [{"name": "output1", "id": "output_id1", \ + "store_name": "store1", "paths": ["/path/to/output1"]}, {"name": "output2", "id": \ + "output_id2", "store_name": "store2", "paths": ["/path/to/output2"]}]}' + assert payload.serialize() == json.dumps(json.loads(expected_json)) assert json.loads(payload.serialize()) == json.loads(expected_json) def test_from_json(payload): - payload_str = '{"attributes": {"attr1": "value1", "attr2": 2}, "stores": [{"name": "store1", "id": "store_id1", "parameters": {"param1": "value1"}, "store_type": "S3", "ds_profile": "profile1", "session": null}, {"name": "store2", "id": "store_id2", "parameters": {"param2": "value2"}, "store_type": "S3", "ds_profile": "profile2", "session": null}], "inputs": [{"name": "input1", "id": "input_id1", "store_name": "store1", "paths": ["/path/to/data1"]}, {"name": "input2", "id": "input_id2", "store_name": "store2", "paths": ["/path/to/data2"]}], "outputs": [{"name": "output1", "id": "output_id1", "store_name": "store1", "paths": ["/path/to/output1"]}, {"name": "output2", "id": "output_id2", "store_name": "store2", "paths": ["/path/to/output2"]}]}' + payload_str = '{"attributes": {"attr1": "value1", "attr2": 2}, "stores": [{"name": "store1", "id": "store_id1", \ + "parameters": {"param1": "value1"}, "store_type": "S3", "ds_profile": "profile1", "session": null}, \ + {"name": "store2", "id": "store_id2", "parameters": {"param2": "value2"}, "store_type": "S3", \ + "ds_profile": "profile2", "session": null}], "inputs": [{"name": "input1", "id": "input_id1", \ + "store_name": "store1", "paths": ["/path/to/data1"]}, {"name": "input2", "id": "input_id2", \ + "store_name": "store2", "paths": ["/path/to/data2"]}], "outputs": [{"name": "output1", \ + "id": "output_id1", "store_name": "store1", "paths": ["/path/to/output1"]}, {"name": \ + "output2", "id": "output_id2", "store_name": "store2", "paths": ["/path/to/output2"]}]}' assert payload == Payload.from_json(payload_str) diff --git a/tests/test_plugin_manager.py b/tests/test_plugin_manager.py new file mode 100644 index 0000000..6effa7f --- /dev/null +++ b/tests/test_plugin_manager.py @@ -0,0 +1,261 @@ +import io +from unittest import mock +import pytest +import boto3 +from moto import mock_s3 +from cc_sdk import ( + PluginManager, + DataSource, + DataStore, + Error, + Message, + Payload, + StoreType, + environment_variables, + CCStoreS3, + FileDataStoreS3, + PutObjectInput, + ObjectState, +) + +# pylint: disable=redefined-outer-name + + +@pytest.fixture +def payload(): + return Payload( + attributes={"attr1": "value1", "attr2": 2}, + stores=[ + DataStore( + name="store1", + id="store_id1", + parameters={"param1": "value1", "root": "store1_root"}, + store_type=StoreType.S3, + ds_profile="profile1", + ), + DataStore( + name="store2", + id="store_id2", + parameters={"param2": "value2", "root": "store2_root"}, + store_type=StoreType.S3, + ds_profile="profile2", + ), + ], + inputs=[ + DataSource( + name="input1", + id="input_id1", + store_name="store1", + paths=["path/to/data1"], + ), + DataSource( + name="input2", + id="input_id2", + store_name="store2", + paths=["path/to/data2"], + ), + ], + outputs=[ + DataSource( + name="output1", + id="output_id1", + store_name="store1", + paths=["path/to/output1"], + ), + DataSource( + name="output2", + id="output_id2", + store_name="store2", + paths=["path/to/output2"], + ), + ], + ) + + +@pytest.fixture +def plugin_manager(payload, monkeypatch): + # CCStore Env vars + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_ACCESS_KEY_ID, + "my_access_key", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_SECRET_ACCESS_KEY, + "my_secret_key", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_DEFAULT_REGION, + "us-west-2", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + "_" + environment_variables.AWS_S3_BUCKET, + "my_bucket", + ) + monkeypatch.setenv(environment_variables.CC_MANIFEST_ID, "my_manifest") + monkeypatch.setenv(environment_variables.CC_ROOT, "my_root") + monkeypatch.setenv( + environment_variables.CC_PROFILE + "_" + environment_variables.S3_MOCK, "True" + ) + ## plugin env vars + monkeypatch.setenv(environment_variables.CC_PLUGIN_DEFINITION, "test_plugin") + monkeypatch.setenv( + environment_variables.CC_EVENT_NUMBER, + "000", + ) + ## profile1 env vars + monkeypatch.setenv( + "profile1" + "_" + environment_variables.AWS_ACCESS_KEY_ID, + "my_access_key", + ) + monkeypatch.setenv( + "profile1" + "_" + environment_variables.AWS_SECRET_ACCESS_KEY, + "my_secret_key", + ) + monkeypatch.setenv( + "profile1" + "_" + environment_variables.AWS_DEFAULT_REGION, + "us-west-2", + ) + monkeypatch.setenv( + "profile1" + "_" + environment_variables.AWS_S3_BUCKET, + "my_bucket", + ) + monkeypatch.setenv("profile1" + "_" + environment_variables.S3_MOCK, "True") + ## profile2 env vars + monkeypatch.setenv( + "profile2" + "_" + environment_variables.AWS_ACCESS_KEY_ID, + "my_access_key", + ) + monkeypatch.setenv( + "profile2" + "_" + environment_variables.AWS_SECRET_ACCESS_KEY, + "my_secret_key", + ) + monkeypatch.setenv( + "profile2" + "_" + environment_variables.AWS_DEFAULT_REGION, + "us-west-2", + ) + monkeypatch.setenv( + "profile2" + "_" + environment_variables.AWS_S3_BUCKET, + "my_bucket", + ) + monkeypatch.setenv("profile2" + "_" + environment_variables.S3_MOCK, "True") + with mock_s3(): + # create a mock S3 client + s3_client = boto3.client("s3") + # create a mock S3 bucket + s3_client.create_bucket(Bucket="my_bucket") + # upload the payload to the bucket + store = CCStoreS3() + store.set_payload(payload) + data1_input = PutObjectInput( + file_name="data1", + file_extension="", + dest_store_type=StoreType.S3, + object_state=ObjectState.MEMORY, + data=b"test data 1", + source_root_path="", + dest_root_path="store1_root/path/to", + ) + data2_input = PutObjectInput( + file_name="data2", + file_extension="", + dest_store_type=StoreType.S3, + object_state=ObjectState.MEMORY, + data=b"test data 2", + source_root_path="", + dest_root_path="store2_root/path/to", + ) + store.put_object(data1_input) + store.put_object(data2_input) + yield PluginManager() + # cleanup mock s3 bucket + response = s3_client.list_objects_v2(Bucket="my_bucket") + if "Contents" in response: + delete_keys = [{"Key": obj["Key"]} for obj in response["Contents"]] + s3_client.delete_objects( + Bucket="my_bucket", Delete={"Objects": delete_keys} + ) + s3_client.delete_bucket(Bucket="my_bucket") + + +def test_get_payload(plugin_manager, payload): + test_payload = plugin_manager.get_payload() + assert test_payload.attributes == payload.attributes + assert test_payload.inputs == payload.inputs + assert test_payload.outputs == payload.outputs + + +def test_get_file_store(plugin_manager): + assert isinstance(plugin_manager.get_file_store("store1"), FileDataStoreS3) + + +def test_get_store(plugin_manager): + assert plugin_manager.get_store("store2").name == "store2" + + +def test_get_input_data_source(plugin_manager): + assert plugin_manager.get_input_data_source("input1").name == "input1" + + +def test_get_output_data_source(plugin_manager): + assert plugin_manager.get_output_data_source("output1").name == "output1" + + +def test_get_input_data_sources(plugin_manager): + assert plugin_manager.get_input_data_sources()[0].name == "input1" + + +def test_get_output_data_sources(plugin_manager): + assert plugin_manager.get_output_data_sources()[0].name == "output1" + + +def test_get_file(plugin_manager): + data_source = plugin_manager.get_input_data_source("input1") + assert plugin_manager.get_file(data_source, 0) == b"test data 1" + assert plugin_manager.get_file(data_source, 1) is None + + +def test_put_file(plugin_manager): + data_source = plugin_manager.get_output_data_source("output1") + assert plugin_manager.put_file(b"output data 1", data_source, 0) is True + + +def test_file_writer(plugin_manager): + data_source = plugin_manager.get_output_data_source("output2") + assert ( + plugin_manager.file_writer(io.BytesIO(b"output data 2"), data_source, 0) is True + ) + + +def test_file_reader(plugin_manager): + data_source = plugin_manager.get_output_data_source("output2") + plugin_manager.file_writer(io.BytesIO(b"output data 2"), data_source, 0) + assert plugin_manager.file_reader(data_source, 0).getvalue() == b"output data 2" + + +def test_file_reader_by_name(plugin_manager): + data_source = plugin_manager.get_input_data_source("input1") + plugin_manager.file_writer(io.BytesIO(b"input data 1"), data_source, 0) + assert plugin_manager.file_reader_by_name("input1", 0).getvalue() == b"input data 1" + + +def test_event_number(plugin_manager): + assert isinstance(plugin_manager.event_number(), int) + + +def test_find_data_source(plugin_manager): + assert ( + plugin_manager.find_data_source( + "input1", plugin_manager.get_input_data_sources() + ).name + == "input1" + ) + + +def test_find_data_store(plugin_manager): + assert plugin_manager.find_data_store("store1").name == "store1" diff --git a/tests/test_pull_object_input.py b/tests/test_pull_object_input.py index 021eae1..68f6448 100644 --- a/tests/test_pull_object_input.py +++ b/tests/test_pull_object_input.py @@ -2,6 +2,8 @@ from cc_sdk.pull_object_input import PullObjectInput from cc_sdk.store_type import StoreType +# pylint: disable=redefined-outer-name + @pytest.fixture def pull_object_input(): diff --git a/tests/test_put_object_input.py b/tests/test_put_object_input.py index 66f1bb9..db36b92 100644 --- a/tests/test_put_object_input.py +++ b/tests/test_put_object_input.py @@ -3,6 +3,8 @@ from cc_sdk.store_type import StoreType from cc_sdk.object_state import ObjectState +# pylint: disable=redefined-outer-name + @pytest.fixture def put_object_input(): @@ -10,10 +12,10 @@ def put_object_input(): file_name="test", file_extension="txt", dest_store_type=StoreType.S3, - object_state=ObjectState.LocalDisk, + object_state=ObjectState.LOCAL_DISK, data=b"test data", - source_path="/path/to/source", - dest_path="/path/to/destination", + source_root_path="/path/to/source", + dest_root_path="/path/to/destination", ) @@ -21,10 +23,10 @@ def test_getters(put_object_input): assert put_object_input.file_name == "test" assert put_object_input.file_extension == "txt" assert put_object_input.dest_store_type == StoreType.S3 - assert put_object_input.object_state == ObjectState.LocalDisk + assert put_object_input.object_state == ObjectState.LOCAL_DISK assert put_object_input.data == b"test data" - assert put_object_input.source_path == "/path/to/source" - assert put_object_input.dest_path == "/path/to/destination" + assert put_object_input.source_root_path == "/path/to/source" + assert put_object_input.dest_root_path == "/path/to/destination" def test_setters(put_object_input): @@ -35,10 +37,10 @@ def test_setters(put_object_input): with pytest.raises(AttributeError): put_object_input.dest_store_type = StoreType.EBS with pytest.raises(AttributeError): - put_object_input.object_state = ObjectState.Memory + put_object_input.object_state = ObjectState.MEMORY with pytest.raises(AttributeError): put_object_input.data = b"new data" with pytest.raises(AttributeError): - put_object_input.source_path = "/new/source/path" + put_object_input.source_root_path = "/new/source/path" with pytest.raises(AttributeError): - put_object_input.dest_path = "/new/destination/path" + put_object_input.dest_root_path = "/new/destination/path" diff --git a/tests/test_seed_set.py b/tests/test_seed_set.py index 180718b..5a2b986 100644 --- a/tests/test_seed_set.py +++ b/tests/test_seed_set.py @@ -1,7 +1,9 @@ -from cc_sdk.seed_set import SeedSet import json import pytest from attr.exceptions import FrozenInstanceError +from cc_sdk.seed_set import SeedSet + +# pylint: disable=redefined-outer-name @pytest.fixture diff --git a/tests/test_status.py b/tests/test_status.py index 37f7adb..9d97fa1 100644 --- a/tests/test_status.py +++ b/tests/test_status.py @@ -1,7 +1,9 @@ -from cc_sdk.status import Status, StatusLevel import json import pytest from attr.exceptions import FrozenInstanceError +from cc_sdk.status import Status, StatusLevel + +# pylint: disable=redefined-outer-name @pytest.fixture From 4072f315b736d7611ee91396b0928b236573d25a Mon Sep 17 00:00:00 2001 From: Brendan Barnes Date: Sun, 19 Mar 2023 22:25:35 +0000 Subject: [PATCH 04/12] save --- src/cc_sdk/cc_store_s3.py | 2 +- src/cc_sdk/file_data_store_s3.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cc_sdk/cc_store_s3.py b/src/cc_sdk/cc_store_s3.py index 456a927..77e74db 100644 --- a/src/cc_sdk/cc_store_s3.py +++ b/src/cc_sdk/cc_store_s3.py @@ -24,13 +24,13 @@ class CCStoreS3(CCStore): - CC_AWS_SECRET_ACCESS_KEY: AWS credentials - CC_AWS_DEFAULT_REGION: the region the bucket is in - CC_AWS_S3_BUCKET: the bucket name to use - - CC_S3_ENDPOINT: the AWS S3 endpoint for the bucket - CC_EVENT_NUMBER: CC event number to use - CC_ROOT: The root prefix on S3 where the payload will be stored in: s3://///payload Optional: - CC_S3_MOCK: True or False. If true, bucket will be mocked + - CC_S3_ENDPOINT: the AWS S3 endpoint for the bucket - CC_S3_DISABLE_SSL: True or False. If true, bucket will not use SSL - CC_S3_FORCE_PATH_STYLE: True or False. If true, bucket will force path style """ diff --git a/src/cc_sdk/file_data_store_s3.py b/src/cc_sdk/file_data_store_s3.py index 55da3a3..a5d26fc 100644 --- a/src/cc_sdk/file_data_store_s3.py +++ b/src/cc_sdk/file_data_store_s3.py @@ -68,7 +68,8 @@ def put(self, data: io.BytesIO, path: str) -> bool: return self.upload_to_s3(self.post_fix + "/" + path, data.getvalue()) def delete(self, path: str) -> bool: - key = self.post_fix + "/" + path + # standard file separators, replace \ with / + key = os.path.join(self.post_fix, path).replace("\\", "/") if self.aws_s3 is not None: self.aws_s3.delete_object(Bucket=self.bucket, Key=key) return True From ac84d9d560ca30df847fd0dee124f1d3f6801f59 Mon Sep 17 00:00:00 2001 From: Brendan Barnes Date: Sun, 19 Mar 2023 22:38:17 +0000 Subject: [PATCH 05/12] fix workflow syntax error --- .github/workflows/lint_and_test.yml | 48 ++++++++++++++--------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml index 36683d5..f8ab610 100644 --- a/.github/workflows/lint_and_test.yml +++ b/.github/workflows/lint_and_test.yml @@ -15,27 +15,27 @@ jobs: python-version: ["3.11"] steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - - name: Run pytest and pylint - run: | - pytest - pylint --fail-under=9 src/cc_sdk/*.py - - - name: Stop the build if there are any linting errors or test failures - run: | - if [ $? -eq 0 ]; then - echo "All tests and linting passed." - else - echo "There are test failures or linting errors. Aborting the build." >&2 - exit 1 - fi + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run pytest and pylint + run: | + pytest + pylint --fail-under=9 src/cc_sdk/*.py + + - name: Stop the build if there are any linting errors or test failures + run: | + if [ $? -eq 0 ]; then + echo "All tests and linting passed." + else + echo "There are test failures or linting errors. Aborting the build." >&2 + exit 1 + fi From 810f4d542f7c3bc1e29959c2615d720c8e82606c Mon Sep 17 00:00:00 2001 From: Brendan Barnes Date: Sun, 19 Mar 2023 22:39:48 +0000 Subject: [PATCH 06/12] fix workflow file syntax error --- .github/workflows/lint_and_test.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml index f8ab610..2005a45 100644 --- a/.github/workflows/lint_and_test.yml +++ b/.github/workflows/lint_and_test.yml @@ -16,11 +16,11 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies run: | python -m pip install --upgrade pip @@ -39,3 +39,4 @@ jobs: echo "There are test failures or linting errors. Aborting the build." >&2 exit 1 fi + \ No newline at end of file From 6ee8a98de986a92cc3473dba8cb7e29c1ee97790 Mon Sep 17 00:00:00 2001 From: Brendan Barnes Date: Sun, 19 Mar 2023 22:41:09 +0000 Subject: [PATCH 07/12] fix workflow file --- .github/workflows/lint_and_test.yml | 7 +++---- requirements.txt | 9 +++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml index 2005a45..c61f021 100644 --- a/.github/workflows/lint_and_test.yml +++ b/.github/workflows/lint_and_test.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.11"] + python-version: ["3.10", "3.11"] steps: - uses: actions/checkout@v3 @@ -25,12 +25,12 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt - + - name: Run pytest and pylint run: | pytest pylint --fail-under=9 src/cc_sdk/*.py - + - name: Stop the build if there are any linting errors or test failures run: | if [ $? -eq 0 ]; then @@ -39,4 +39,3 @@ jobs: echo "There are test failures or linting errors. Aborting the build." >&2 exit 1 fi - \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 0a82885..962ddf3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +astroid==2.15.0 attrs==22.2.0 boto3==1.26.93 botocore==1.29.93 @@ -5,15 +6,21 @@ certifi==2022.12.7 cffi==1.15.1 charset-normalizer==3.1.0 cryptography==39.0.2 +dill==0.3.6 idna==3.4 iniconfig==2.0.0 +isort==5.12.0 Jinja2==3.1.2 jmespath==1.0.1 +lazy-object-proxy==1.9.0 MarkupSafe==2.1.2 +mccabe==0.7.0 moto==4.1.4 packaging==23.0 +platformdirs==3.1.1 pluggy==1.0.0 pycparser==2.21 +pylint==2.17.0 pytest==7.2.2 python-dateutil==2.8.2 PyYAML==6.0 @@ -21,7 +28,9 @@ requests==2.28.2 responses==0.23.1 s3transfer==0.6.0 six==1.16.0 +tomlkit==0.11.6 types-PyYAML==6.0.12.8 urllib3==1.26.15 Werkzeug==2.2.3 +wrapt==1.15.0 xmltodict==0.13.0 From 83d3eecaad135d5e143810971a599c40b3f83071 Mon Sep 17 00:00:00 2001 From: Brendan Barnes Date: Tue, 4 Apr 2023 12:20:09 +0000 Subject: [PATCH 08/12] convert StoreType enum to string --- src/cc_sdk/store_type.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cc_sdk/store_type.py b/src/cc_sdk/store_type.py index 577a22d..830abbc 100644 --- a/src/cc_sdk/store_type.py +++ b/src/cc_sdk/store_type.py @@ -18,7 +18,7 @@ class StoreType(Enum): readability and prevent errors when deserializing. """ - S3 = 0 - WS = 1 - RDBMS = 2 - EBS = 3 + S3 = "S3" + WS = "WS" + RDBMS = "RDBMS" + EBS = "EBS" From 97d0d3dd7652714622e2ae3498c64e7e34e71118 Mon Sep 17 00:00:00 2001 From: Brendan Barnes Date: Tue, 4 Apr 2023 12:31:20 +0000 Subject: [PATCH 09/12] change ObjectState enum to string --- src/cc_sdk/object_state.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cc_sdk/object_state.py b/src/cc_sdk/object_state.py index 77600a0..eef36a5 100644 --- a/src/cc_sdk/object_state.py +++ b/src/cc_sdk/object_state.py @@ -17,6 +17,6 @@ class ObjectState(Enum): when deserializing. """ - MEMORY = 0 - LOCAL_DISK = 1 - # RemoteDisk = 2 + MEMORY = "MEMORY" + LOCAL_DISK = "LOCAL_DISK" + # REMOTE_DISK = "REMOTE_DISK" From 65102c9bc04d4abc41d29ac5d6e7cd507c7e9366 Mon Sep 17 00:00:00 2001 From: Brendan Barnes Date: Tue, 4 Apr 2023 12:35:23 +0000 Subject: [PATCH 10/12] change StatusLevel enum to string --- src/cc_sdk/status.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cc_sdk/status.py b/src/cc_sdk/status.py index d50526b..c87319a 100644 --- a/src/cc_sdk/status.py +++ b/src/cc_sdk/status.py @@ -19,9 +19,9 @@ class StatusLevel(Enum): when deserializing. """ - COMPUTING = 0 - FAILED = 1 - SUCCEEDED = 2 + COMPUTING = "COMPUTING" + FAILED = "FAILED" + SUCCEEDED = "SUCCEEDED" def convert_status_level(_, fields): From 8fd1f6eae849f63c887731ad0b4e0bd0dbb1541d Mon Sep 17 00:00:00 2001 From: Brendan Barnes Date: Tue, 4 Apr 2023 12:36:48 +0000 Subject: [PATCH 11/12] update private method names to use leading "_" --- src/cc_sdk/cc_store_s3.py | 24 ++++++++++++------------ src/cc_sdk/file_data_store_s3.py | 15 ++++++++------- src/cc_sdk/payload.py | 2 +- src/cc_sdk/plugin_manager.py | 20 +++++++------------- tests/test_cc_store_s3.py | 2 +- tests/test_plugin_manager.py | 4 ++-- 6 files changed, 31 insertions(+), 36 deletions(-) diff --git a/src/cc_sdk/cc_store_s3.py b/src/cc_sdk/cc_store_s3.py index 77e74db..7227bfc 100644 --- a/src/cc_sdk/cc_store_s3.py +++ b/src/cc_sdk/cc_store_s3.py @@ -216,7 +216,7 @@ def put_object(self, put_input: PutObjectInput) -> bool: try: with open(local_path, "rb") as the_file: data = the_file.read() - self.upload_to_s3(remote_path, data) + self._upload_to_s3(remote_path, data) except FileNotFoundError as exc: raise FileNotFoundError from exc except IOError as exc: @@ -224,7 +224,7 @@ def put_object(self, put_input: PutObjectInput) -> bool: return True case ObjectState.MEMORY: - self.upload_to_s3(remote_path, put_input.data) + self._upload_to_s3(remote_path, put_input.data) return True case _: return False @@ -249,8 +249,8 @@ def pull_object(self, pull_input: PullObjectInput) -> bool: remote_path += "." + pull_input.file_extension local_path += "." + pull_input.file_extension try: - data = self.download_bytes_from_s3(remote_path) - self.write_input_stream_to_disk(io.BytesIO(data), local_path) + data = self._download_bytes_from_s3(remote_path) + self._write_input_stream_to_disk(io.BytesIO(data), local_path) except ClientError: return False except IOError: @@ -274,7 +274,7 @@ def get_object(self, get_input: GetObjectInput) -> bytes: # add extensions if used remote_path += "." + get_input.file_extension try: - return self.download_bytes_from_s3(remote_path) + return self._download_bytes_from_s3(remote_path) except ClientError as exc: raise exc @@ -290,8 +290,8 @@ def get_payload(self) -> Payload: self.root, self.manifest_id, constants.PAYLOAD_FILE_NAME ).replace("\\", "/") try: - body = self.download_bytes_from_s3(path) - return self.read_json_model_payload_from_bytes(body) + body = self._download_bytes_from_s3(path) + return self._read_json_model_payload_from_bytes(body) except ClientError as exc: raise exc @@ -308,20 +308,20 @@ def set_payload(self, payload: Payload) -> bool: self.root, self.manifest_id, constants.PAYLOAD_FILE_NAME ).replace("\\", "/") try: - self.upload_to_s3(path, payload.serialize().encode()) + self._upload_to_s3(path, payload.serialize().encode()) return True except ClientError: return False @staticmethod - def read_json_model_payload_from_bytes(data: bytes) -> Payload: + def _read_json_model_payload_from_bytes(data: bytes) -> Payload: """Helper method to decode the JSON to a Payload object""" try: return Payload.from_json(data.decode("utf-8")) except Exception as exc: raise exc - def write_input_stream_to_disk( + def _write_input_stream_to_disk( self, input_stream: io.BytesIO, output_destination: str ) -> None: directory = os.path.dirname(output_destination) @@ -331,13 +331,13 @@ def write_input_stream_to_disk( with open(output_destination, "wb") as output_file: output_file.write(bytes_data) - def upload_to_s3(self, object_key: str, file_bytes: bytes) -> None: + def _upload_to_s3(self, object_key: str, file_bytes: bytes) -> None: if self.aws_s3 is not None: self.aws_s3.put_object(Bucket=self.bucket, Key=object_key, Body=file_bytes) else: raise RuntimeError("AWS config not set.") - def download_bytes_from_s3(self, object_key: str) -> bytes: + def _download_bytes_from_s3(self, object_key: str) -> bytes: if self.aws_s3 is not None: response = self.aws_s3.get_object(Bucket=self.bucket, Key=object_key) file_bytes = response["Body"].read() diff --git a/src/cc_sdk/file_data_store_s3.py b/src/cc_sdk/file_data_store_s3.py index a5d26fc..01cc54e 100644 --- a/src/cc_sdk/file_data_store_s3.py +++ b/src/cc_sdk/file_data_store_s3.py @@ -39,10 +39,11 @@ def _initialize(self, data_store: DataStore): # TODO, throw error? print("Missing S3 Root Paramter. Cannot create the store.") - def get_object(self, path: str): - return self.download_bytes_from_s3(path) + def _get_object(self, path: str): + """Alias for _download_bytes_from_s3""" + return self._download_bytes_from_s3(path) - def download_bytes_from_s3(self, object_key: str) -> bytes: + def _download_bytes_from_s3(self, object_key: str) -> bytes: # standard file separators, replace \ with / key = os.path.join(self.post_fix, object_key).replace("\\", "/") if self.aws_s3 is not None: @@ -51,21 +52,21 @@ def download_bytes_from_s3(self, object_key: str) -> bytes: return file_bytes raise RuntimeError("AWS config not set.") - def upload_to_s3(self, object_key: str, file_bytes: bytes) -> bool: + def _upload_to_s3(self, object_key: str, file_bytes: bytes) -> bool: if self.aws_s3 is not None: self.aws_s3.put_object(Bucket=self.bucket, Key=object_key, Body=file_bytes) return True return False def copy(self, dest_store: FileDataStore, src_path: str, dest_path: str) -> bool: - data = self.get_object(src_path) + data = self._get_object(src_path) return dest_store.put(io.BytesIO(data), dest_path) def get(self, path: str) -> io.BytesIO: - return io.BytesIO(self.get_object(path)) + return io.BytesIO(self._get_object(path)) def put(self, data: io.BytesIO, path: str) -> bool: - return self.upload_to_s3(self.post_fix + "/" + path, data.getvalue()) + return self._upload_to_s3(self.post_fix + "/" + path, data.getvalue()) def delete(self, path: str) -> bool: # standard file separators, replace \ with / diff --git a/src/cc_sdk/payload.py b/src/cc_sdk/payload.py index 9154922..34307bc 100644 --- a/src/cc_sdk/payload.py +++ b/src/cc_sdk/payload.py @@ -98,7 +98,7 @@ def from_json(json_str: str): JSONDecodeError: If the JSON string cannot be decoded. """ - # TODO, should we expect camelCase for attribute names? + # TODO should we expect camelCase for attribute names? json_dict = json.loads(json_str) stores = [DataStore(**store) for store in json_dict["stores"]] inputs = [DataSource(**input) for input in json_dict["inputs"]] diff --git a/src/cc_sdk/plugin_manager.py b/src/cc_sdk/plugin_manager.py index 7288935..1ff6a4e 100644 --- a/src/cc_sdk/plugin_manager.py +++ b/src/cc_sdk/plugin_manager.py @@ -77,12 +77,6 @@ class PluginManager: event_number(self) -> int: Returns the event number associated with the current instance. - - find_data_source(self, name: str, data_sources: list[DataSource]) -> DataSource | None: - Finds and returns the data source object with the given name from the specified list of data sources. - - find_data_store(self, name: str) -> DataStore | None: - Finds and returns the data store object with the given name from the payload stores. """ def __init__(self): @@ -183,7 +177,7 @@ def get_payload(self) -> Payload: return self._payload def get_file_store(self, store_name: str) -> FileDataStore: - data_store = self.find_data_store(store_name) + data_store = self._find_data_store(store_name) if data_store is None: raise RuntimeError(f"DataStore with name '{store_name}' was not found.") if isinstance(data_store.session, FileDataStore): @@ -191,19 +185,19 @@ def get_file_store(self, store_name: str) -> FileDataStore: raise RuntimeError("DataStore session object is invalid.") def get_store(self, store_name: str) -> DataStore: - data_store = self.find_data_store(store_name) + data_store = self._find_data_store(store_name) if data_store is None: raise RuntimeError(f"DataStore with name '{store_name}' was not found") return data_store def get_input_data_source(self, name: str) -> DataSource: - data_source = self.find_data_source(name, self.get_input_data_sources()) + data_source = self._find_data_source(name, self.get_input_data_sources()) if data_source is None: raise RuntimeError(f"Input DataSource with name '{name}' was not found") return data_source def get_output_data_source(self, name: str) -> DataSource: - data_source = self.find_data_source(name, self.get_output_data_sources()) + data_source = self._find_data_source(name, self.get_output_data_sources()) if data_source is None: raise RuntimeError(f"Output DataSource with name '{name}' was not found") return data_source @@ -243,7 +237,7 @@ def file_reader(self, data_source: DataSource, path_index: int) -> io.BytesIO: return store.get(data_source.paths[path_index]) def file_reader_by_name(self, data_source_name: str, path_index: int) -> io.BytesIO: - data_source = self.find_data_source( + data_source = self._find_data_source( data_source_name, self.get_input_data_sources() ) if data_source is None: @@ -273,7 +267,7 @@ def event_number(self) -> int: event_number = int(val) return event_number - def find_data_source( + def _find_data_source( self, name: str, data_sources: list[DataSource] ) -> DataSource | None: for data_source in data_sources: @@ -281,7 +275,7 @@ def find_data_source( return data_source return None - def find_data_store(self, name: str) -> DataStore | None: + def _find_data_store(self, name: str) -> DataStore | None: # pylint can't determine attributes type # pylint: disable=not-an-iterable for data_store in self._payload.stores: diff --git a/tests/test_cc_store_s3.py b/tests/test_cc_store_s3.py index d2dde28..74af98e 100644 --- a/tests/test_cc_store_s3.py +++ b/tests/test_cc_store_s3.py @@ -385,7 +385,7 @@ def test_read_json_model_payload_from_bytes(payload): "store_name": "store2", "paths": ["/path/to/data2"]}], "outputs": [{"name": "output1", "id": \ "output_id1", "store_name": "store1", "paths": ["/path/to/output1"]}, {"name": "output2", \ "id": "output_id2", "store_name": "store2", "paths": ["/path/to/output2"]}]}' - assert payload == CCStoreS3.read_json_model_payload_from_bytes(payload_bytes) + assert payload == CCStoreS3._read_json_model_payload_from_bytes(payload_bytes) def test_set_payload(payload, store): diff --git a/tests/test_plugin_manager.py b/tests/test_plugin_manager.py index 6effa7f..5a616e6 100644 --- a/tests/test_plugin_manager.py +++ b/tests/test_plugin_manager.py @@ -250,7 +250,7 @@ def test_event_number(plugin_manager): def test_find_data_source(plugin_manager): assert ( - plugin_manager.find_data_source( + plugin_manager._find_data_source( "input1", plugin_manager.get_input_data_sources() ).name == "input1" @@ -258,4 +258,4 @@ def test_find_data_source(plugin_manager): def test_find_data_store(plugin_manager): - assert plugin_manager.find_data_store("store1").name == "store1" + assert plugin_manager._find_data_store("store1").name == "store1" From 11ee2bd8399b086f4f6dbea5a8a8dd392591eaa3 Mon Sep 17 00:00:00 2001 From: Brendan Barnes Date: Thu, 18 May 2023 15:37:10 +0000 Subject: [PATCH 12/12] raise error for unimplemented store types --- src/cc_sdk/plugin_manager.py | 9 ++- tests/test_plugin_manager.py | 139 +++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 2 deletions(-) diff --git a/src/cc_sdk/plugin_manager.py b/src/cc_sdk/plugin_manager.py index 1ff6a4e..fc16fe7 100644 --- a/src/cc_sdk/plugin_manager.py +++ b/src/cc_sdk/plugin_manager.py @@ -99,14 +99,19 @@ def __init__(self): store.session = FileDataStoreS3(store) case StoreType.WS: # TODO - pass + raise NotImplementedError("Payload StoreType 'WS' not implemented") case StoreType.RDBMS: # TODO - pass + raise NotImplementedError("Payload StoreType 'RDBMS' not implemented") + case StoreType.EBS: + # TODO + raise NotImplementedError("Payload StoreType 'EBS' not implemented") case _: raise RuntimeError("Payload contains invalid StoreType.") except EnvironmentError as exc: raise exc + except NotImplementedError as exc: + raise exc except Exception as exc: raise RuntimeError( f"Could not acquire payload file. ERROR: {str(exc)}" diff --git a/tests/test_plugin_manager.py b/tests/test_plugin_manager.py index 5a616e6..7e206f7 100644 --- a/tests/test_plugin_manager.py +++ b/tests/test_plugin_manager.py @@ -259,3 +259,142 @@ def test_find_data_source(plugin_manager): def test_find_data_store(plugin_manager): assert plugin_manager._find_data_store("store1").name == "store1" + + +def test_unimplemented_store_types(monkeypatch): + # CCStore Env vars + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_ACCESS_KEY_ID, + "my_access_key", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_SECRET_ACCESS_KEY, + "my_secret_key", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + + "_" + + environment_variables.AWS_DEFAULT_REGION, + "us-west-2", + ) + monkeypatch.setenv( + environment_variables.CC_PROFILE + "_" + environment_variables.AWS_S3_BUCKET, + "my_bucket", + ) + monkeypatch.setenv(environment_variables.CC_MANIFEST_ID, "my_manifest") + monkeypatch.setenv(environment_variables.CC_ROOT, "my_root") + monkeypatch.setenv( + environment_variables.CC_PROFILE + "_" + environment_variables.S3_MOCK, "True" + ) + ## plugin env vars + monkeypatch.setenv(environment_variables.CC_PLUGIN_DEFINITION, "test_plugin") + monkeypatch.setenv( + environment_variables.CC_EVENT_NUMBER, + "000", + ) + ## profile1 env vars + monkeypatch.setenv( + "profile1" + "_" + environment_variables.AWS_ACCESS_KEY_ID, + "my_access_key", + ) + monkeypatch.setenv( + "profile1" + "_" + environment_variables.AWS_SECRET_ACCESS_KEY, + "my_secret_key", + ) + monkeypatch.setenv( + "profile1" + "_" + environment_variables.AWS_DEFAULT_REGION, + "us-west-2", + ) + monkeypatch.setenv( + "profile1" + "_" + environment_variables.AWS_S3_BUCKET, + "my_bucket", + ) + monkeypatch.setenv("profile1" + "_" + environment_variables.S3_MOCK, "True") + ## profile2 env vars + monkeypatch.setenv( + "profile2" + "_" + environment_variables.AWS_ACCESS_KEY_ID, + "my_access_key", + ) + monkeypatch.setenv( + "profile2" + "_" + environment_variables.AWS_SECRET_ACCESS_KEY, + "my_secret_key", + ) + monkeypatch.setenv( + "profile2" + "_" + environment_variables.AWS_DEFAULT_REGION, + "us-west-2", + ) + monkeypatch.setenv( + "profile2" + "_" + environment_variables.AWS_S3_BUCKET, + "my_bucket", + ) + monkeypatch.setenv("profile2" + "_" + environment_variables.S3_MOCK, "True") + with mock_s3(): + # create a mock S3 client + s3_client = boto3.client("s3") + # create a mock S3 bucket + s3_client.create_bucket(Bucket="my_bucket") + # upload the payload to the bucket + store = CCStoreS3() + # test WS store payload + store.set_payload( Payload( + attributes={}, + stores=[ + DataStore( + name="store1", + id="store_id1", + parameters={"param1": "value1", "root": "store1_root"}, + store_type=StoreType.WS, + ds_profile="profile1", + ) + ], + inputs=[], + outputs=[], + )) + with pytest.raises(NotImplementedError): + PluginManager() + # test RDBMS store payload + store.set_payload( Payload( + attributes={}, + stores=[ + DataStore( + name="store1", + id="store_id1", + parameters={"param1": "value1", "root": "store1_root"}, + store_type=StoreType.RDBMS, + ds_profile="profile1", + ) + ], + inputs=[], + outputs=[], + )) + with pytest.raises(NotImplementedError): + PluginManager() + # test EBS store payload + store.set_payload( Payload( + attributes={}, + stores=[ + DataStore( + name="store1", + id="store_id1", + parameters={"param1": "value1", "root": "store1_root"}, + store_type=StoreType.EBS, + ds_profile="profile1", + ) + ], + inputs=[], + outputs=[], + )) + with pytest.raises(NotImplementedError): + PluginManager() + # cleanup mock s3 bucket + response = s3_client.list_objects_v2(Bucket="my_bucket") + if "Contents" in response: + delete_keys = [{"Key": obj["Key"]} for obj in response["Contents"]] + s3_client.delete_objects( + Bucket="my_bucket", Delete={"Objects": delete_keys} + ) + s3_client.delete_bucket(Bucket="my_bucket")