Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement fit, transform of DisparateImpactRemover #546

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions aif360/algorithms/preprocessing/disparate_impact_remover.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,49 @@ def __init__(self, repair_level=1.0, sensitive_attribute=''):

self.sensitive_attribute = sensitive_attribute

def fit(self, dataset):
"""Fit the model to the dataset.

Args:
dataset (BinaryLabelDataset): Dataset containing true labels and protected attributes.
Returns:
DisparateImpactRemover: Returns self after fitting the model.

Note:
This method sets the sensitive attribute if it is not already specified.
"""
if not self.sensitive_attribute:
self.sensitive_attribute = dataset.protected_attribute_names[0]

return self

def transform(self, dataset):
"""Transform the dataset using the fitted model.

Args:
dataset (BinaryLabelDataset): Dataset containing labels that needs to be transformed.
Returns:
BinaryLabelDataset: Transformed Dataset with adjusted feature values.

Note:
The transformation preserves the rank-ordering of features while modifying them
to reduce disparate impact based on the specified sensitive attribute.
"""
features = dataset.features.tolist()
index = dataset.feature_names.index(self.sensitive_attribute)
repairer = self.Repairer(features, index, self.repair_level, False)

transformed_features = repairer.repair(features)
transformed_dataset = dataset.copy()
transformed_dataset.features = np.array(transformed_features, dtype=np.float64)

# Ensure protected attributes remain unchanged
transformed_dataset.features[:, index] = transformed_dataset.protected_attributes[:,
transformed_dataset.protected_attribute_names.index(
self.sensitive_attribute)]

return transformed_dataset

def fit_transform(self, dataset):
"""Run a repairer on the non-protected features and return the
transformed dataset.
Expand Down
72 changes: 70 additions & 2 deletions tests/test_disparate_impact_remover.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC as SVM
from sklearn.preprocessing import MinMaxScaler

from aif360.algorithms.preprocessing import DisparateImpactRemover
Expand Down Expand Up @@ -78,3 +76,73 @@ def test_adult():

assert after > before
assert abs(1 - after) <= 0.2


def test_fit_transform_no_repair():
"""Test case for fit_transform with no repair (repair_level=0.0)"""
protected = 'sex'
ad = AdultDataset(protected_attribute_names=[protected],
privileged_classes=[['Male']], categorical_features=[],
features_to_keep=['age', 'education-num'])

di = DisparateImpactRemover(repair_level=0.0)
ad_repd = di.fit_transform(ad)

# Assert that the transformed dataset is the same as the original
assert np.array_equal(ad.features, ad_repd.features), "Transformed dataset should be the same as original."


def test_fit_transform_full_repair():
"""Test case for fit_transform with full repair (repair_level=1.0)"""
protected = 'sex'
ad = AdultDataset(protected_attribute_names=[protected],
privileged_classes=[['Male']], categorical_features=[],
features_to_keep=['age', 'education-num'])

di = DisparateImpactRemover(repair_level=1.0)
ad_repd = di.fit_transform(ad)

# Assert that the transformed dataset is different from the original
assert not np.array_equal(ad.features, ad_repd.features), "Transformed dataset should differ from original."


def test_transform_after_fit():
"""Test case for transform method after fitting"""
protected = 'sex'
ad = AdultDataset(protected_attribute_names=[protected],
privileged_classes=[['Male']], categorical_features=[],
features_to_keep=['age', 'education-num'])

di = DisparateImpactRemover(repair_level=1.0)

# Fit the model
di.fit(ad)

# Transform the dataset
ad_repd = di.transform(ad)

# Assert that the transformed dataset is different from the original
assert not np.array_equal(ad.features, ad_repd.features), "Transformed dataset should differ from original."


def test_fit_transform_equivalence():
"""Test case to ensure fit + transform is equivalent to fit_transform."""
protected = 'sex'
ad = AdultDataset(protected_attribute_names=[protected],
privileged_classes=[['Male']], categorical_features=[],
features_to_keep=['age', 'education-num'])

# Create DisparateImpactRemover instance with repair level 1.0
di = DisparateImpactRemover(repair_level=1.0)

# Use fit_transform method
ad_repd_fit_transform = di.fit_transform(ad)

# Use fit followed by transform
di.fit(ad)
ad_repd_fit_then_transform = di.transform(ad)

# Assert that the two results are equal
assert np.array_equal(ad_repd_fit_transform.features, ad_repd_fit_then_transform.features), (
"Results from fit + transform should be equal to fit_transform."
)