Bug fixes for the FACTS method (#533)

* bugfix: drop_above arg used hard-coded names, removed * FACTS hotfix: drop_infeasible set to False this part of the code is problematic due to the use of hardcoded feature names. The required functionality should be achieved in some other way. * FACTS bugfix: feat weights were not passed properly * removed obsolete use of drop_above argument * FACTS: added test for user interface API * FACTS_bias_scan: test improvement Previously, the test case only had inf costs. Consequently, the exact values of the feature weights were not actually tested properly.
Trusted-AI · Jul 5, 2024 · e011686 · e011686
1 parent 7c4f172
commit e011686
Show file tree

Hide file tree

Showing 4 changed files with 179 additions and 17 deletions.
diff --git a/aif360/sklearn/detectors/facts/__init__.py b/aif360/sklearn/detectors/facts/__init__.py
@@ -352,6 +352,7 @@ def fit(self, X: DataFrame, verbose: bool = True):
             model=self.clf,
             sensitive_attribute=self.prot_attr,
             freqitem_minsupp=self.freq_itemset_min_supp,
+            drop_infeasible=False,
             feats_not_allowed_to_change=list(feats_not_allowed_to_change),
             verbose=verbose,
         )
@@ -368,7 +369,7 @@ def fit(self, X: DataFrame, verbose: bool = True):
             params=params,
             verbose=verbose,
         )
-        self.rules_by_if = calc_costs(rules_by_if)
+        self.rules_by_if = calc_costs(rules_by_if, params=params)
 
         self.dataset = X.copy(deep=True)
 

diff --git a/aif360/sklearn/detectors/facts/misc.py b/aif360/sklearn/detectors/facts/misc.py
@@ -6,7 +6,7 @@
 from pandas import DataFrame
 
 from .parameters import *
-from .predicate import Predicate, recIsValid, featureChangePred, drop_two_above
+from .predicate import Predicate, recIsValid, featureChangePred
 from .frequent_itemsets import run_fpgrowth, preprocessDataset, fpgrowth_out_to_predicate_list
 from .metrics import (
     incorrectRecoursesIfThen,
@@ -182,7 +182,6 @@ def valid_ifthens(
     freqitem_minsupp: float = 0.01,
     missing_subgroup_val: str = "N/A",
     drop_infeasible: bool = True,
-    drop_above: bool = True,
     feats_not_allowed_to_change: List[str] = [],
     verbose: bool = True,
 ) -> List[Tuple[Predicate, Predicate, Dict[str, float], Dict[str, float]]]:
@@ -196,7 +195,6 @@ def valid_ifthens(
         freqitem_minsupp (float): Minimum support threshold for frequent itemset mining.
         missing_subgroup_val (str): Value indicating missing or unknown subgroup.
         drop_infeasible (bool): Whether to drop infeasible if-then rules.
-        drop_above (bool): Whether to drop if-then rules with feature changes above a certain threshold.
         feats_not_allowed_to_change (list[str]): optionally, the user can provide some features which are not allowed to change at all (e.g. sex).
         verbose (bool): whether to print intermediate messages and progress bar. Defaults to True.
 
@@ -281,16 +279,6 @@ def valid_ifthens(
                     )
                 )
 
-    # keep ifs that have change on features of max value 2
-    if drop_above == True:
-        age = [val.left for val in X.age.unique()]
-        age.sort()
-        ifthens = [
-            (ifs, then, cov)
-            for ifs, then, cov in ifthens
-            if drop_two_above(ifs, then, age)
-        ]
-
     # Calculate correctness percentages
     if verbose:
         print("Computing percentages of individuals flipped by each action independently.", flush=True)

diff --git a/tests/sklearn/facts/test_init.py b/tests/sklearn/facts/test_init.py
@@ -0,0 +1,176 @@
+import numpy as np
+import pandas as pd
+
+import pytest
+
+from aif360.sklearn.detectors import FACTS, FACTS_bias_scan
+
+from aif360.sklearn.detectors.facts.predicate import Predicate
+from aif360.sklearn.detectors.facts.parameters import ParameterProxy, feature_change_builder
+
+def test_FACTS():
+    class MockModel:
+        def predict(self, X: pd.DataFrame) -> np.ndarray:
+            ret = []
+            for i, r in X.iterrows():
+                if r["a"] > 20:
+                    ret.append(1)
+                elif r["c"] < 15:
+                    ret.append(1)
+                else:
+                    ret.append(0)
+            return np.array(ret)
+
+    X = pd.DataFrame(
+        [
+            [21, 2, 3, 4, "Female", pd.Interval(60, 70)],
+            [21, 13, 3, 19, "Male", pd.Interval(60, 70)],
+            [25, 2, 7, 4, "Female", pd.Interval(60, 70)],
+            [21, 2, 3, 4, "Male", pd.Interval(60, 70)],
+            [1, 2, 3, 4, "Male", pd.Interval(20, 30)],
+            [1, 20, 30, 40, "Male", pd.Interval(40, 50)],
+            [19, 2, 30, 43, "Male", pd.Interval(30, 40)],
+            [19, 13, 30, 4, "Male", pd.Interval(10, 20)],
+            [1, 2, 30, 4, "Female", pd.Interval(20, 30)],
+            [19, 20, 30, 40, "Female", pd.Interval(40, 50)],
+            [19, 2, 30, 4, "Female", pd.Interval(30, 40)],
+        ],
+        columns=["a", "b", "c", "d", "sex", "age"]
+    )
+    model = MockModel()
+
+    detector = FACTS(
+        clf=model,
+        prot_attr="sex",
+        categorical_features=["sex", "age"],
+        freq_itemset_min_supp=0.5,
+        feature_weights={f: 10 for f in X.columns},
+        feats_not_allowed_to_change=[],
+    )
+    detector.fit(X, verbose=False)
+
+    expected_ifthens = {
+        Predicate.from_dict({"a": 19}): {
+            "Male": (2/3, [
+                (Predicate.from_dict({"a": 21}), 1., 20.)
+            ]),
+            "Female": (2/3, [
+                (Predicate.from_dict({"a": 21}), 1., 20.)
+            ])
+        },
+        Predicate.from_dict({"c": 30}): {
+            "Male": (1., [
+                (Predicate.from_dict({"c": 3}), 1., 270.)
+            ]),
+            "Female": (1., [
+                (Predicate.from_dict({"c": 3}), 1., 270.)
+            ])
+        },
+        Predicate.from_dict({"a": 19, "c": 30}): {
+            "Male": (2/3, [
+                (Predicate.from_dict({"a": 21, "c": 3}), 1., 290.)
+            ]),
+            "Female": (2/3, [
+                (Predicate.from_dict({"a": 21, "c": 3}), 1., 290.)
+            ])
+        },
+    }
+
+    assert set(expected_ifthens.keys()) == set(detector.rules_by_if)
+    for ifclause, all_thens in expected_ifthens.items():
+        assert detector.rules_by_if[ifclause] == all_thens
+
+def test_FACTS_bias_scan():
+    class MockModel:
+        def predict(self, X: pd.DataFrame) -> np.ndarray:
+            ret = []
+            for i, r in X.iterrows():
+                if r["sex"] == "Female" and r["d"] < 15:
+                    if r["c"] < 5:
+                        ret.append(1)
+                    else:
+                        ret.append(0)
+                elif r["a"] > 20:
+                    ret.append(1)
+                elif r["c"] < 15:
+                    ret.append(1)
+                else:
+                    ret.append(0)
+            return np.array(ret)
+
+    X = pd.DataFrame(
+        [
+            [21, 2, 3, 20, "Female", pd.Interval(60, 70)],
+            [21, 13, 3, 19, "Male", pd.Interval(60, 70)],
+            [25, 2, 7, 21, "Female", pd.Interval(60, 70)],
+            [21, 2, 3, 4, "Male", pd.Interval(60, 70)],
+            [1, 2, 7, 4, "Male", pd.Interval(20, 30)],
+            [1, 2, 7, 40, "Female", pd.Interval(20, 30)],
+            [1, 20, 30, 40, "Male", pd.Interval(40, 50)],
+            [19, 2, 30, 43, "Male", pd.Interval(30, 40)],
+            [19, 13, 30, 4, "Male", pd.Interval(10, 20)],
+            [1, 2, 30, 4, "Female", pd.Interval(20, 30)],
+            [19, 20, 30, 7, "Female", pd.Interval(40, 50)],
+            [19, 2, 30, 4, "Female", pd.Interval(30, 40)],
+        ],
+        columns=["a", "b", "c", "d", "sex", "age"]
+    )
+    model = MockModel()
+
+    most_biased_subgroups = FACTS_bias_scan(
+        X=X,
+        clf=model,
+        prot_attr="sex",
+        metric="equal-cost-of-effectiveness",
+        categorical_features=["sex", "age"],
+        freq_itemset_min_supp=0.5,
+        feature_weights={f: 10 for f in X.columns},
+        feats_not_allowed_to_change=[],
+        viewpoint="macro",
+        sort_strategy="max-cost-diff-decr",
+        top_count=3,
+        phi=0.5,
+        verbose=False,
+        print_recourse_report=False,
+    )
+
+    # just so we can see them here
+    expected_ifthens = {
+        Predicate.from_dict({"a": 19}): {
+            "Male": (2/3, [
+                (Predicate.from_dict({"a": 21}), 1., 20.)
+            ]),
+            "Female": (2/3, [
+                (Predicate.from_dict({"a": 21}), 0., 20.)
+            ])
+        },
+        Predicate.from_dict({"c": 30}): {
+            "Male": (1., [
+                (Predicate.from_dict({"c": 7}), 1., 230.),
+                (Predicate.from_dict({"c": 3}), 1., 270.),
+            ]),
+            "Female": (1., [
+                (Predicate.from_dict({"c": 7}), 0., 230.),
+                (Predicate.from_dict({"c": 3}), 1., 270.),
+            ])
+        },
+        Predicate.from_dict({"a": 19, "c": 30}): {
+            "Male": (2/3, [
+                (Predicate.from_dict({"a": 21, "c": 3}), 1., 290.)
+            ]),
+            "Female": (2/3, [
+                (Predicate.from_dict({"a": 21, "c": 3}), 1., 290.)
+            ])
+        },
+    }
+    expected_most_biased_subgroups = [
+        ({"a": 19}, float("inf")),
+        ({"c": 30}, 40.),
+        ({"a": 19, "c": 30}, 0.),
+    ]
+
+    assert len(most_biased_subgroups) == len(expected_most_biased_subgroups)
+    for g in expected_most_biased_subgroups:
+        assert g in most_biased_subgroups
+    for g in most_biased_subgroups:
+        assert g in expected_most_biased_subgroups
diff --git a/tests/sklearn/facts/test_misc.py b/tests/sklearn/facts/test_misc.py
@@ -1,5 +1,3 @@
-from pprint import pprint
-
 import numpy as np
 import pandas as pd
 
@@ -54,7 +52,6 @@ def test_rule_generation() -> None:
         sensitive_attribute="sex",
         freqitem_minsupp=0.5,
         drop_infeasible=False,
-        drop_above=True
     )
     ifthens = rules2rulesbyif(ifthens)