From 440d4fa40e59b2f84a7acfb62f4fbf51f93afbb8 Mon Sep 17 00:00:00 2001 From: Felipe Date: Sun, 4 Feb 2024 12:01:24 -0800 Subject: [PATCH 1/3] Fix bug --- rdt/transformers/categorical.py | 3 ++- .../transformers/test_categorical.py | 23 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/rdt/transformers/categorical.py b/rdt/transformers/categorical.py index 13d4e038f..16ae6ff08 100644 --- a/rdt/transformers/categorical.py +++ b/rdt/transformers/categorical.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd from scipy.stats import norm +from pandas.api.types import is_numeric_dtype from rdt.errors import TransformerInputError from rdt.transformers.base import BaseTransformer @@ -599,7 +600,7 @@ def _fit(self, data): self._indexer = list(range(self._num_dummies)) self.dummies = self._uniques.copy() - if not np.issubdtype(data.dtype, np.number): + if not np.issubdtype(data.dtype.type, np.number): self._dummy_encoded = True if self._dummy_na: diff --git a/tests/integration/transformers/test_categorical.py b/tests/integration/transformers/test_categorical.py index 571b857fc..da032b27e 100644 --- a/tests/integration/transformers/test_categorical.py +++ b/tests/integration/transformers/test_categorical.py @@ -463,6 +463,29 @@ def test_one_hot_doesnt_warn(tmp_path): ohe_loaded.transform(data) +def test_one_hot_categoricals(): + """Ensure OneHotEncoder works on categorical data. GH#751""" + # Setup + test_data = pd.DataFrame(data={ + 'A': ['Yes', 'No', 'Yes', 'Maybe', 'No'] + }) + test_data['A'] = test_data['A'].astype('category') + transformer = OneHotEncoder() + + # Run + transformed_data = transformer.fit_transform(test_data, column='A') + + # Assert + pd.testing.assert_frame_equal( + transformed_data, + pd.DataFrame({ + 'A.value0': [1, 0, 1, 0, 0], + 'A.value1': [0, 1, 0, 0, 1], + 'A.value2': [0, 0, 0, 1, 0], + }) + ) + + def test_label_numerical_2d_array(): """Ensure LabelEncoder works on numerical + nan only columns.""" From 9fda88002495161e7683d1beab70c86acab31ae5 Mon Sep 17 00:00:00 2001 From: Felipe Date: Thu, 4 Apr 2024 09:33:29 -0700 Subject: [PATCH 2/3] Add py12 support --- .github/workflows/integration.yml | 2 +- .github/workflows/minimum.yml | 2 +- .github/workflows/readme.yml | 2 +- .github/workflows/unit.yml | 2 +- INSTALL.md | 2 +- pyproject.toml | 32 +++++++++++++++++++++++++------ tox.ini | 2 +- 7 files changed, 32 insertions(+), 12 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 113824900..6d7bb0965 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -10,7 +10,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] os: [ubuntu-latest, macos-latest, windows-latest] steps: - uses: actions/checkout@v1 diff --git a/.github/workflows/minimum.yml b/.github/workflows/minimum.yml index 3ea44860e..54efa7dc9 100644 --- a/.github/workflows/minimum.yml +++ b/.github/workflows/minimum.yml @@ -10,7 +10,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] os: [ubuntu-latest, macos-latest, windows-latest] steps: - uses: actions/checkout@v1 diff --git a/.github/workflows/readme.yml b/.github/workflows/readme.yml index 77be43452..5f3b34960 100644 --- a/.github/workflows/readme.yml +++ b/.github/workflows/readme.yml @@ -10,7 +10,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] os: [ubuntu-latest, macos-latest] # skip windows bc rundoc fails steps: - uses: actions/checkout@v1 diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml index db025c6f3..ce24f310c 100644 --- a/.github/workflows/unit.yml +++ b/.github/workflows/unit.yml @@ -10,7 +10,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] os: [ubuntu-latest, macos-latest, windows-latest] steps: - uses: actions/checkout@v1 diff --git a/INSTALL.md b/INSTALL.md index e938efae6..5862ec86c 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -3,7 +3,7 @@ ## Requirements **RDT** has been developed and tested on -[Python 3.8, 3.9, 3.10 and 3.11](https://www.python.org/downloads/) +[Python 3.8, 3.9, 3.10, 3.11 and 3.12](https://www.python.org/downloads/) Also, although it is not strictly required, the usage of a [virtualenv]( https://virtualenv.pypa.io/en/latest/) is highly recommended in order to avoid diff --git a/pyproject.toml b/pyproject.toml index 368599371..282dadeb3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,25 +12,46 @@ classifiers = [ 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'Topic :: Scientific/Engineering :: Artificial Intelligence', ] keywords = ['machine learning', 'synthetic data generation', 'benchmark', 'generative models'] dynamic = ['version'] license = { text = 'BSL-1.1' } -requires-python = '>=3.8,<3.12' +requires-python = '>=3.8,<3.13' readme = 'README.md' dependencies = [ +<<<<<<< HEAD "numpy>=1.20.0;python_version<'3.10'", - "numpy>=1.23.3;python_version>='3.10'", + "numpy>=1.23.3;python_version>='3.10' and python_version<'3.12'", + "numpy>=1.26.0;python_version>='3.12'", "pandas>=1.1.3;python_version<'3.10'", "pandas>=1.3.4;python_version>='3.10' and python_version<'3.11'", "pandas>=1.5.0;python_version>='3.11'", "scipy>=1.5.4;python_version<'3.10'", - "scipy>=1.9.2;python_version>='3.10'", + "scipy>=1.9.2;python_version>='3.10' and python_version<'3.12'", + "scipy>=1.12.0;python_version>='3.12'", "scikit-learn>=0.24;python_version<'3.10'", "scikit-learn>=1.1.0;python_version>='3.10' and python_version<'3.11'", - "scikit-learn>=1.1.3;python_version>='3.11'", + "scikit-learn>=1.1.3;python_version>='3.11' and python_version<'3.12'", + "scikit-learn>=1.3.0;python_version>='3.12'", 'Faker>=17', +======= + "numpy>=1.20.0,<2;python_version<'3.10'", + "numpy>=1.23.3,<2;python_version>='3.10' and python_version<'3.12'", + "numpy>=1.26.0,<2;python_version>='3.12'", + "pandas>=1.1.3;python_version<'3.10'", + "pandas>=1.3.4;python_version>='3.10' and python_version<'3.11'", + "pandas>=1.5.0;python_version>='3.11'", + "scipy>=1.5.4,<2;python_version<'3.10'", + "scipy>=1.9.2,<2;python_version>='3.10' and python_version<'3.12'", + "scipy>=1.12.0,<2;python_version>='3.12'", + "scikit-learn>=0.24,<2;python_version<'3.10'", + "scikit-learn>=1.1.0;python_version>='3.10' and python_version<'3.11'", + "scikit-learn>=1.1.3,<2;python_version>='3.11' and python_version<'3.12'", + "scikit-learn>=1.3.0,<2;python_version>='3.12'", + 'Faker>=17,<20', +>>>>>>> a1624cd (Add py12 support) ] [project.urls] @@ -63,7 +84,7 @@ dev = [ 'build>=1.0.0,<2', 'bump-my-version>=0.18.3,<1', 'pip>=9.0.1', - 'watchdog>=0.8.3,<0.11', + 'watchdog>=1.0.1,<5', # style check 'pycodestyle<2.8.0,>=2.7.0', @@ -133,7 +154,6 @@ namespaces = false '*.png', '*.gif' ] -'tests' = ['*'] [tool.setuptools.exclude-package-data] '*' = [ diff --git a/tox.ini b/tox.ini index ff8ce0536..245932a90 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py39-lint, py39-performance, py3{8,9,10,11}-{readme,unit,integration,minimum} +envlist = py39-lint, py39-performance, py3{8,9,10,11,12}-{readme,unit,integration,minimum} [testenv] skipsdist = false From 4c212395fb5dcb85eb2b7ce4f0aa907fb9dc00da Mon Sep 17 00:00:00 2001 From: Felipe Date: Thu, 4 Apr 2024 09:33:44 -0700 Subject: [PATCH 3/3] Add py12 support --- .github/workflows/integration.yml | 2 +- .github/workflows/lint.yml | 2 +- .github/workflows/minimum.yml | 2 +- .github/workflows/readme.yml | 2 +- .github/workflows/unit.yml | 2 +- pyproject.toml | 19 +---------- rdt/__init__.py | 10 ++++-- rdt/transformers/categorical.py | 1 - .../transformers/test_categorical.py | 1 + tests/unit/test___init__.py | 32 +++++++++++++++---- 10 files changed, 40 insertions(+), 33 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6d7bb0965..a7ad18f34 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -13,7 +13,7 @@ jobs: python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index fd795accc..84bf0f1dd 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -9,7 +9,7 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 - name: Set up Python 3.9 uses: actions/setup-python@v2 with: diff --git a/.github/workflows/minimum.yml b/.github/workflows/minimum.yml index 54efa7dc9..1d0d9ecc1 100644 --- a/.github/workflows/minimum.yml +++ b/.github/workflows/minimum.yml @@ -13,7 +13,7 @@ jobs: python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: diff --git a/.github/workflows/readme.yml b/.github/workflows/readme.yml index 5f3b34960..dcc4389fe 100644 --- a/.github/workflows/readme.yml +++ b/.github/workflows/readme.yml @@ -13,7 +13,7 @@ jobs: python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] os: [ubuntu-latest, macos-latest] # skip windows bc rundoc fails steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml index ce24f310c..b031ebe64 100644 --- a/.github/workflows/unit.yml +++ b/.github/workflows/unit.yml @@ -13,7 +13,7 @@ jobs: python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: diff --git a/pyproject.toml b/pyproject.toml index 282dadeb3..b9b73562d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,6 @@ license = { text = 'BSL-1.1' } requires-python = '>=3.8,<3.13' readme = 'README.md' dependencies = [ -<<<<<<< HEAD "numpy>=1.20.0;python_version<'3.10'", "numpy>=1.23.3;python_version>='3.10' and python_version<'3.12'", "numpy>=1.26.0;python_version>='3.12'", @@ -36,22 +35,6 @@ dependencies = [ "scikit-learn>=1.1.3;python_version>='3.11' and python_version<'3.12'", "scikit-learn>=1.3.0;python_version>='3.12'", 'Faker>=17', -======= - "numpy>=1.20.0,<2;python_version<'3.10'", - "numpy>=1.23.3,<2;python_version>='3.10' and python_version<'3.12'", - "numpy>=1.26.0,<2;python_version>='3.12'", - "pandas>=1.1.3;python_version<'3.10'", - "pandas>=1.3.4;python_version>='3.10' and python_version<'3.11'", - "pandas>=1.5.0;python_version>='3.11'", - "scipy>=1.5.4,<2;python_version<'3.10'", - "scipy>=1.9.2,<2;python_version>='3.10' and python_version<'3.12'", - "scipy>=1.12.0,<2;python_version>='3.12'", - "scikit-learn>=0.24,<2;python_version<'3.10'", - "scikit-learn>=1.1.0;python_version>='3.10' and python_version<'3.11'", - "scikit-learn>=1.1.3,<2;python_version>='3.11' and python_version<'3.12'", - "scikit-learn>=1.3.0,<2;python_version>='3.12'", - 'Faker>=17,<20', ->>>>>>> a1624cd (Add py12 support) ] [project.urls] @@ -65,7 +48,7 @@ dependencies = [ rdt = { main = 'rdt.cli.__main__:main' } [project.optional-dependencies] -copulas = ['copulas>=0.10.0',] +copulas = ['copulas@git+https://git@github.com/sdv-dev/copulas.git',] test = [ 'rdt[copulas]', diff --git a/rdt/__init__.py b/rdt/__init__.py index 06b92dbc1..c0e3dbfc3 100644 --- a/rdt/__init__.py +++ b/rdt/__init__.py @@ -10,12 +10,12 @@ import sys import warnings +from importlib.metadata import entry_points from operator import attrgetter from types import ModuleType import numpy as np import pandas as pd -from pkg_resources import iter_entry_points from rdt import transformers from rdt.hyper_transformer import HyperTransformer @@ -149,7 +149,13 @@ def _find_addons(): from top_module.addon_module import x """ group = 'rdt_modules' - for entry_point in iter_entry_points(group=group): + try: + eps = entry_points(group=group) # pylint: disable=E1123 + except TypeError: + # Load-time selection requires Python >= 3.10 or importlib_metadata >= 3.6 + eps = entry_points().get(group, []) + + for entry_point in eps: try: addon = entry_point.load() except Exception: # pylint: disable=broad-exception-caught diff --git a/rdt/transformers/categorical.py b/rdt/transformers/categorical.py index 16ae6ff08..e077f5be2 100644 --- a/rdt/transformers/categorical.py +++ b/rdt/transformers/categorical.py @@ -6,7 +6,6 @@ import numpy as np import pandas as pd from scipy.stats import norm -from pandas.api.types import is_numeric_dtype from rdt.errors import TransformerInputError from rdt.transformers.base import BaseTransformer diff --git a/tests/integration/transformers/test_categorical.py b/tests/integration/transformers/test_categorical.py index 5ae02b662..907df4851 100644 --- a/tests/integration/transformers/test_categorical.py +++ b/tests/integration/transformers/test_categorical.py @@ -474,6 +474,7 @@ def test_one_hot_categoricals(): # Run transformed_data = transformer.fit_transform(test_data, column='A') + # Assert pd.testing.assert_frame_equal( transformed_data, diff --git a/tests/unit/test___init__.py b/tests/unit/test___init__.py index db0f8b2db..cd027b665 100644 --- a/tests/unit/test___init__.py +++ b/tests/unit/test___init__.py @@ -56,7 +56,7 @@ def test_get_demo_many_rows(): pd.testing.assert_frame_equal(demo, expected) -@patch.object(rdt, 'iter_entry_points') +@patch.object(rdt, 'entry_points') def test__find_addons_module(entry_points_mock, mock_rdt): """Test loading an add-on.""" # Setup @@ -75,7 +75,25 @@ def test__find_addons_module(entry_points_mock, mock_rdt): assert sys.modules['rdt.submodule.entry_name'] == add_on_mock -@patch.object(rdt, 'iter_entry_points') +@patch.object(rdt, 'entry_points') +def test__find_addons_type_error(entry_points_mock): + """Test it when entry_points raises a TypeError (happens for py38, py39).""" + # Setup + def side_effect(arg=None): + if arg == 'rdt_modules': + raise TypeError() + return {arg: []} + + entry_points_mock.side_effect = side_effect + + # Run + _find_addons() + + # Assert + entry_points_mock.assert_called_with() + + +@patch.object(rdt, 'entry_points') def test__find_addons_object(entry_points_mock, mock_rdt): """Test loading an add-on.""" # Setup @@ -93,7 +111,7 @@ def test__find_addons_object(entry_points_mock, mock_rdt): @patch('warnings.warn') -@patch('rdt.iter_entry_points') +@patch('rdt.entry_points') def test__find_addons_bad_addon(entry_points_mock, warning_mock): """Test failing to load an add-on generates a warning.""" # Setup @@ -116,7 +134,7 @@ def entry_point_error(): @patch('warnings.warn') -@patch('rdt.iter_entry_points') +@patch('rdt.entry_points') def test__find_addons_wrong_base(entry_points_mock, warning_mock): """Test incorrect add-on name generates a warning.""" # Setup @@ -137,7 +155,7 @@ def test__find_addons_wrong_base(entry_points_mock, warning_mock): @patch('warnings.warn') -@patch('rdt.iter_entry_points') +@patch('rdt.entry_points') def test__find_addons_missing_submodule(entry_points_mock, warning_mock): """Test incorrect add-on name generates a warning.""" # Setup @@ -158,7 +176,7 @@ def test__find_addons_missing_submodule(entry_points_mock, warning_mock): @patch('warnings.warn') -@patch('rdt.iter_entry_points') +@patch('rdt.entry_points') def test__find_addons_module_and_object(entry_points_mock, warning_mock): """Test incorrect add-on name generates a warning.""" # Setup @@ -179,7 +197,7 @@ def test__find_addons_module_and_object(entry_points_mock, warning_mock): @patch('warnings.warn') -@patch.object(rdt, 'iter_entry_points') +@patch.object(rdt, 'entry_points') def test__find_addons_missing_object(entry_points_mock, warning_mock, mock_rdt): """Test incorrect add-on name generates a warning.""" # Setup