From 03b96e9b0714eb9c90e4262651c9d15a98665316 Mon Sep 17 00:00:00 2001 From: Frances Hartwell Date: Thu, 28 Mar 2024 12:26:59 -0400 Subject: [PATCH] Providing locales to AnonymizedFaker with a function that uses the BaseProvider crashes (#776) --- rdt/transformers/pii/anonymizer.py | 2 +- .../transformers/pii/test_anonymizer.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/rdt/transformers/pii/anonymizer.py b/rdt/transformers/pii/anonymizer.py index 0e2eced9..177bd718 100644 --- a/rdt/transformers/pii/anonymizer.py +++ b/rdt/transformers/pii/anonymizer.py @@ -138,7 +138,7 @@ def __init__(self, provider_name=None, function_name=None, function_kwargs=None, self._faker_random_seed = None self.locales = locales self.faker = faker.Faker(self.locales) - if self.locales: + if self.provider_name != 'BaseProvider' and self.locales: self._check_locales() if missing_value_generation not in ['random', None]: diff --git a/tests/integration/transformers/pii/test_anonymizer.py b/tests/integration/transformers/pii/test_anonymizer.py index c9b51078..66244f68 100644 --- a/tests/integration/transformers/pii/test_anonymizer.py +++ b/tests/integration/transformers/pii/test_anonymizer.py @@ -27,6 +27,24 @@ def test_default_settings(self): pd.testing.assert_frame_equal(transformed, expected_transformed) assert len(reverse_transform['username']) == 5 + def test_default_settings_with_locales(self): + """End to end test with the default settings and locales of the ``AnonymizedFaker``.""" + data = pd.DataFrame({ + 'id': [1, 2, 3, 4, 5], + 'username': ['a', 'b', 'c', 'd', 'e'] + }) + + instance = AnonymizedFaker(locales=['en_US', 'en_CA', 'es_ES']) + transformed = instance.fit_transform(data, 'username') + + reverse_transform = instance.reverse_transform(transformed) + expected_transformed = pd.DataFrame({ + 'id': [1, 2, 3, 4, 5] + }) + + pd.testing.assert_frame_equal(transformed, expected_transformed) + assert len(reverse_transform['username']) == 5 + def test_get_supported_sdtypes(self): """Test that the correct supported sdtypes are returned.""" # Run