From 27608913d5db08bcb2182b8b784d68667166d4ae Mon Sep 17 00:00:00 2001 From: Alexey Prutskov Date: Tue, 16 Nov 2021 14:01:44 +0300 Subject: [PATCH] FIX-#26: Reduce memory consumption in `Python` backend Signed-off-by: Alexey Prutskov --- docs/developer/architecture.rst | 3 +- .../unidist/core/backends/python/core/api.rst | 22 +++-- .../backends/python/core/object_store.rst | 17 ---- unidist/core/backends/common/data_id.py | 5 +- unidist/core/backends/python/backend.py | 8 +- unidist/core/backends/python/core/__init__.py | 4 +- unidist/core/backends/python/core/api.py | 52 ++++++------ .../core/backends/python/core/object_store.py | 85 ------------------- unidist/core/backends/python/utils.py | 18 ---- unidist/core/base/utils.py | 2 - 10 files changed, 46 insertions(+), 170 deletions(-) delete mode 100644 docs/flow/unidist/core/backends/python/core/object_store.rst delete mode 100644 unidist/core/backends/python/core/object_store.py delete mode 100644 unidist/core/backends/python/utils.py diff --git a/docs/developer/architecture.rst b/docs/developer/architecture.rst index 0e714f313..a47c440bf 100644 --- a/docs/developer/architecture.rst +++ b/docs/developer/architecture.rst @@ -80,8 +80,7 @@ details just pick module you are interested in. │ │ └─── :doc:`remote_function ` │ ├───python │ | ├───core - │ │ │ ├─── :doc:`api ` - │ │ │ └─── :doc:`object_store ` + │ │ │ └─── :doc:`api ` │ │ ├─── :doc:`actor ` │ │ ├─── :doc:`backend ` │ │ └─── :doc:`remote_function ` diff --git a/docs/flow/unidist/core/backends/python/core/api.rst b/docs/flow/unidist/core/backends/python/core/api.rst index a8a8c1862..50771d41f 100644 --- a/docs/flow/unidist/core/backends/python/core/api.rst +++ b/docs/flow/unidist/core/backends/python/core/api.rst @@ -8,25 +8,23 @@ Python High-level API """"""""""""""""""""" -Python API module provides high-level functions for initialization of the backend, -for working with object storage and submitting tasks. +Python API module provides high-level functions for wrapping/unwrapping objects and +submitting tasks. API === -Function :py:func:`~unidist.core.backends.python.core.api.init` creates an instance of singleton +Functions :py:func:`~unidist.core.backends.python.core.api.init` creates an instance of singleton class :py:class:`~unidist.core.backends.python.core.object_store.ObjectStore`. -.. autofunction:: unidist.core.backends.python.core.api.init +Functions :py:func:`~unidist.core.backends.python.core.api.unwrap` and +:`~unidist.core.backends.python.core.api.wrap` are responsible for +unwrap/wrap, respectively, objects from/in :py:class:`~unidist.backends.common.data_id.DataID`. -Functions :py:func:`~unidist.core.backends.python.core.api.get` and -:py:func:`~unidist.core.backends.python.core.api.put` are responsible for -read/write, respectively, objects from/to :py:class:`~unidist.core.backends.python.core.object_store.ObjectStore`. +.. autofunction:: unidist.core.backends.python.core.api.unwrap +.. autofunction:: unidist.core.backends.python.core.api.wrap -.. autofunction:: unidist.core.backends.python.core.api.get -.. autofunction:: unidist.core.backends.python.core.api.put - -:py:func:`~unidist.core.backends.python.core.api.submit` executes a task, which result will be put into -:py:class:`~unidist.core.backends.python.core.object_store.ObjectStore`. +:py:func:`~unidist.core.backends.python.core.api.submit` executes a task, which result will be wrapped +in :py:class:`~unidist.backends.common.data_id.DataID`-(s). .. autofunction:: unidist.core.backends.python.core.api.submit diff --git a/docs/flow/unidist/core/backends/python/core/object_store.rst b/docs/flow/unidist/core/backends/python/core/object_store.rst deleted file mode 100644 index f706c4ba0..000000000 --- a/docs/flow/unidist/core/backends/python/core/object_store.rst +++ /dev/null @@ -1,17 +0,0 @@ -.. - Copyright (C) 2021 Modin authors - - SPDX-License-Identifier: Apache-2.0 - -:orphan: - -Object Storage -"""""""""""""" - -Python :py:class:`~unidist.core.backends.python.core.object_store.ObjectStore` stores -data in the `standard python dict`_. - -.. autoclass:: unidist.core.backends.python.core.object_store.ObjectStore - :members: - -.. _`standard python dict`: https://docs.python.org/3/tutorial/datastructures.html#dictionaries diff --git a/unidist/core/backends/common/data_id.py b/unidist/core/backends/common/data_id.py index 20c7e3875..114babd07 100644 --- a/unidist/core/backends/common/data_id.py +++ b/unidist/core/backends/common/data_id.py @@ -8,7 +8,10 @@ class DataID: - """Class that holds unique identifier. + """ + Class that holds unique identifier. + + In the case of Python backend this class holds an original object. Parameters ---------- diff --git a/unidist/core/backends/python/backend.py b/unidist/core/backends/python/backend.py index 5f94a3274..576de5c1e 100644 --- a/unidist/core/backends/python/backend.py +++ b/unidist/core/backends/python/backend.py @@ -61,7 +61,7 @@ def make_actor(cls, num_cpus, resources): @staticmethod def get(data_ids): """ - Get an object or a list of objects from object store. + Get an object or a list of objects from ``DataID``-(s). Parameters ---------- @@ -73,12 +73,12 @@ def get(data_ids): object A Python object or a list of Python objects. """ - return py.get(data_ids) + return py.unwrap(data_ids) @staticmethod def put(data): """ - Put `data` into object store. + Put `data` into ``DataID``. Parameters ---------- @@ -90,7 +90,7 @@ def put(data): unidist.core.backends.common.data_id.DataID ``DataID`` matching to data. """ - return py.put(data) + return py.wrap(data) @staticmethod def wait(data_ids, num_returns=1): diff --git a/unidist/core/backends/python/core/__init__.py b/unidist/core/backends/python/core/__init__.py index 3bb0b8fe9..b7a06dd7b 100644 --- a/unidist/core/backends/python/core/__init__.py +++ b/unidist/core/backends/python/core/__init__.py @@ -4,6 +4,6 @@ """Python backend core functionality.""" -from .api import put, get, submit, init +from .api import wrap, unwrap, submit -__all__ = ["put", "get", "submit", "init"] +__all__ = ["wrap", "unwrap", "submit"] diff --git a/unidist/core/backends/python/core/api.py b/unidist/core/backends/python/core/api.py index 8d358efd2..19537a111 100644 --- a/unidist/core/backends/python/core/api.py +++ b/unidist/core/backends/python/core/api.py @@ -6,52 +6,52 @@ from unidist.core.backends.common.data_id import DataID -from unidist.core.backends.python.core.object_store import ObjectStore -def init(): +def wrap(data): """ - Initialize an object storage. - - Notes - ----- - Run initialization of singleton object ``unidist.core.backends.python.core.object_store.ObjectStore``. - """ - ObjectStore.get_instance() - - -def put(data): - """ - Put data into object storage. + Wrap data in ``DataID``. Parameters ---------- data : object - Data to be put. + Data to be wrapped. Returns ------- unidist.core.backends.common.data_id.DataID - An ID of object in object storage. """ - return ObjectStore.get_instance().put(data) + return DataID(data) -def get(data_ids): +def unwrap(data_ids): """ - Get object(s) associated with `data_ids` from the object storage. + Unwrap object(s) from `data_ids`. Parameters ---------- data_ids : unidist.core.backends.common.data_id.DataID or list - ID(s) to object(s) to get data from. + ID(s) of object(s) to be unwrapped. Returns ------- object A Python object. """ - return ObjectStore.get_instance().get(data_ids) + is_list = isinstance(data_ids, list) + if not is_list: + data_ids = [data_ids] + if not all(isinstance(data_id, DataID) for data_id in data_ids): + raise ValueError("`data_ids` must either be a data ID or a list of data IDs.") + + def check_exception(value): + if isinstance(value, Exception): + raise value + return value + + values = [check_exception(data_id._id) for data_id in data_ids] + + return values if is_list else values[0] def submit(func, *args, num_returns=1, **kwargs): @@ -78,13 +78,11 @@ def submit(func, *args, num_returns=1, **kwargs): * if `num_returns > 1`, list of ``DataID``-s will be returned. * if `num_returns == 0`, ``None`` will be returned. """ - obj_store = ObjectStore.get_instance() - materialized_args = [ - obj_store.get(arg) if isinstance(arg, DataID) else arg for arg in args + unwrap(arg) if isinstance(arg, DataID) else arg for arg in args ] materialized_kwargs = { - key: obj_store.get(value) if isinstance(value, DataID) else value + key: unwrap(value) if isinstance(value, DataID) else value for key, value in kwargs.items() } @@ -96,8 +94,8 @@ def submit(func, *args, num_returns=1, **kwargs): if num_returns == 0: data_ids = None elif num_returns > 1: - data_ids = [obj_store.put(result[idx]) for idx in range(num_returns)] + data_ids = [DataID(result[idx]) for idx in range(num_returns)] else: - data_ids = obj_store.put(result) + data_ids = DataID(result) return data_ids diff --git a/unidist/core/backends/python/core/object_store.py b/unidist/core/backends/python/core/object_store.py deleted file mode 100644 index 181de1aa3..000000000 --- a/unidist/core/backends/python/core/object_store.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2021 Modin authors -# -# SPDX-License-Identifier: Apache-2.0 - -"""Object storage related functionality.""" - -from unidist.core.backends.common.data_id import DataID - - -class ObjectStore: - """Class that stores objects and provides access to these.""" - - __instance = None - - def __init__(self): - if ObjectStore.__instance is None: - self.store = dict() - - def __repr__(self): - return f"Object store: {self.store}" - - @classmethod - def get_instance(cls): - """ - Get instance of ``ObjectStore``. - - Returns - ------- - unidist.core.backends.python.core.object_store.ObjectStore - """ - if cls.__instance is None: - cls.__instance = ObjectStore() - return cls.__instance - - def put(self, data, data_id=None): - """ - Put `data` to internal dictionary. - - Parameters - ---------- - data : object - Data to be put. - data_id : unidist.core.backends.common.data_id.DataID, optional - An ID of data. If it isn't provided, will be created automatically. - - Returns - ------- - unidist.core.backends.common.data_id.DataID - An ID of object in internal dictionary. - """ - data_id = DataID() if data_id is None else data_id - - self.store[data_id] = data - return data_id - - def get(self, data_ids): - """ - Get object(s) associated with `data_ids` from the internal dictionary. - - Parameters - ---------- - data_ids : unidist.core.backends.common.data_id.DataID or list - ID(s) of object(s) to get data from. - - Returns - ------- - object - A Python object. - """ - is_list = isinstance(data_ids, list) - if not is_list: - data_ids = [data_ids] - if not all(isinstance(data_id, DataID) for data_id in data_ids): - raise ValueError( - "`data_ids` must either be a data ID or a list of data IDs." - ) - - def check_exception(value): - if isinstance(value, Exception): - raise value - return value - - values = [check_exception(self.store[data_id]) for data_id in data_ids] - - return values if is_list else values[0] diff --git a/unidist/core/backends/python/utils.py b/unidist/core/backends/python/utils.py deleted file mode 100644 index 2d93e2ce8..000000000 --- a/unidist/core/backends/python/utils.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 2021 Modin authors -# -# SPDX-License-Identifier: Apache-2.0 - -"""Utilities used to initialize Python execution backend.""" - - -def initialize_python(): - """ - Initialize the Python execution backend. - - Notes - ----- - All execution will happen sequentially. - """ - from unidist.core.backends.python.core import init - - init() diff --git a/unidist/core/base/utils.py b/unidist/core/base/utils.py index d1a5ec307..434722a55 100644 --- a/unidist/core/base/utils.py +++ b/unidist/core/base/utils.py @@ -45,9 +45,7 @@ def init_backend(): backend_cls = MultiProcessingBackend() elif backend_name == "Python": from unidist.core.backends.python.backend import PythonBackend - from unidist.core.backends.python.utils import initialize_python - initialize_python() backend_cls = PythonBackend() elif backend_name == "MPI": from unidist.core.backends.mpi.backend import MPIBackend