diff --git a/CHANGELOG.md b/CHANGELOG.md index b177e141..4ab32c69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ - adds the `upset_plot` function to the `plot` module to visualize the interactions of higher-order [#290](https://github.com/mmschlk/shapiq/issues/290) - adds support for IsoForest models to explainer and tree explainer [#278](https://github.com/mmschlk/shapiq/issues/278) - adds support for sub-selection of players in the interaction values data class [#276](https://github.com/mmschlk/shapiq/issues/276) which allows retrieving interaction values for a subset of players +- refactors game theory computations like `ExactComputer`, `MoebiusConverter`, `core`, among others to be more modular and flexible into the `game_theory` module [#258](https://github.com/mmschlk/shapiq/issues/258) +- improves quality of the tests by adding many more semantic tests to the different interaction indices and computations [#285](https://github.com/mmschlk/shapiq/pull/285) ### v1.1.1 (2024-11-13) diff --git a/requirements.txt b/requirements.txt index ab184d65..dd72302b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,8 @@ tqdm==4.67.1 torch==2.5.1 torchvision==0.20.1 transformers==4.46.3 +tensorflow==2.18.0 +tf-keras==2.18.0 xgboost==2.1.3 numpy==1.26.4 requests==2.32.3 diff --git a/shapiq/__init__.py b/shapiq/__init__.py index 38d4e3fc..649cac94 100644 --- a/shapiq/__init__.py +++ b/shapiq/__init__.py @@ -38,12 +38,12 @@ # dataset functions from .datasets import load_adult_census, load_bike_sharing, load_california_housing -# exact computer classes -from .exact import ExactComputer - # explainer classes from .explainer import Explainer, TabularExplainer, TreeExplainer +# exact computer classes +from .game_theory.exact import ExactComputer + # game classes # imputer classes from .games import BaselineImputer, ConditionalImputer, Game, MarginalImputer diff --git a/shapiq/approximator/_base.py b/shapiq/approximator/_base.py index c6782402..0122c72e 100644 --- a/shapiq/approximator/_base.py +++ b/shapiq/approximator/_base.py @@ -6,15 +6,15 @@ import numpy as np -from shapiq.approximator.sampling import CoalitionSampler -from shapiq.indices import ( +from ..approximator.sampling import CoalitionSampler +from ..game_theory.indices import ( AVAILABLE_INDICES_FOR_APPROXIMATION, get_computation_index, is_empty_value_the_baseline, is_index_aggregated, ) -from shapiq.interaction_values import InteractionValues -from shapiq.utils.sets import generate_interaction_lookup +from ..interaction_values import InteractionValues +from ..utils.sets import generate_interaction_lookup __all__ = [ "Approximator", @@ -318,7 +318,7 @@ def aggregate_interaction_values( Returns: The aggregated interaction values. """ - from ..aggregation import aggregate_interaction_values + from shapiq.game_theory.aggregation import aggregate_interaction_values if player_set is not None: raise NotImplementedError( @@ -339,6 +339,6 @@ def aggregate_to_one_dimension( Returns: tuple[np.ndarray, np.ndarray]: The positive and negative aggregated values. """ - from ..aggregation import aggregate_to_one_dimension + from shapiq.game_theory.aggregation import aggregate_to_one_dimension return aggregate_to_one_dimension(interaction_values) diff --git a/shapiq/approximator/marginals/owen.py b/shapiq/approximator/marginals/owen.py index 318ae7a8..91106163 100644 --- a/shapiq/approximator/marginals/owen.py +++ b/shapiq/approximator/marginals/owen.py @@ -6,8 +6,8 @@ import numpy as np -from shapiq.approximator._base import Approximator -from shapiq.interaction_values import InteractionValues +from ...interaction_values import InteractionValues +from .._base import Approximator class OwenSamplingSV(Approximator): diff --git a/shapiq/approximator/marginals/stratified.py b/shapiq/approximator/marginals/stratified.py index 01ad4e0f..0eb19a95 100644 --- a/shapiq/approximator/marginals/stratified.py +++ b/shapiq/approximator/marginals/stratified.py @@ -6,8 +6,8 @@ import numpy as np -from shapiq.approximator._base import Approximator -from shapiq.interaction_values import InteractionValues +from ...interaction_values import InteractionValues +from .._base import Approximator class StratifiedSamplingSV(Approximator): diff --git a/shapiq/approximator/montecarlo/_base.py b/shapiq/approximator/montecarlo/_base.py index 29854e4c..87a952b0 100644 --- a/shapiq/approximator/montecarlo/_base.py +++ b/shapiq/approximator/montecarlo/_base.py @@ -5,10 +5,10 @@ import numpy as np from scipy.special import binom, factorial -from shapiq.approximator._base import Approximator -from shapiq.indices import AVAILABLE_INDICES_MONTE_CARLO -from shapiq.interaction_values import InteractionValues -from shapiq.utils.sets import powerset +from ...game_theory.indices import AVAILABLE_INDICES_MONTE_CARLO +from ...interaction_values import InteractionValues +from ...utils.sets import powerset +from .._base import Approximator class MonteCarlo(Approximator): diff --git a/shapiq/approximator/permutation/stii.py b/shapiq/approximator/permutation/stii.py index d4568642..1d4eef13 100644 --- a/shapiq/approximator/permutation/stii.py +++ b/shapiq/approximator/permutation/stii.py @@ -6,9 +6,9 @@ import numpy as np import scipy as sp -from shapiq.approximator._base import Approximator -from shapiq.interaction_values import InteractionValues -from shapiq.utils import get_explicit_subsets, powerset +from ...interaction_values import InteractionValues +from ...utils import get_explicit_subsets, powerset +from .._base import Approximator class PermutationSamplingSTII(Approximator): diff --git a/shapiq/approximator/permutation/sv.py b/shapiq/approximator/permutation/sv.py index 5c429aaf..44346a06 100644 --- a/shapiq/approximator/permutation/sv.py +++ b/shapiq/approximator/permutation/sv.py @@ -6,8 +6,8 @@ import numpy as np -from shapiq.approximator._base import Approximator -from shapiq.interaction_values import InteractionValues +from ...interaction_values import InteractionValues +from .._base import Approximator class PermutationSamplingSV(Approximator): diff --git a/shapiq/approximator/regression/_base.py b/shapiq/approximator/regression/_base.py index 0246072f..06c3be25 100644 --- a/shapiq/approximator/regression/_base.py +++ b/shapiq/approximator/regression/_base.py @@ -7,10 +7,10 @@ import numpy as np from scipy.special import bernoulli, binom -from shapiq.approximator._base import Approximator -from shapiq.indices import AVAILABLE_INDICES_REGRESSION -from shapiq.interaction_values import InteractionValues -from shapiq.utils.sets import powerset +from ...game_theory.indices import AVAILABLE_INDICES_REGRESSION +from ...interaction_values import InteractionValues +from ...utils.sets import powerset +from .._base import Approximator class Regression(Approximator): diff --git a/shapiq/approximator/sampling.py b/shapiq/approximator/sampling.py index a572544e..c205463b 100644 --- a/shapiq/approximator/sampling.py +++ b/shapiq/approximator/sampling.py @@ -7,7 +7,7 @@ import numpy as np from scipy.special import binom -from shapiq.utils.sets import powerset +from ..utils.sets import powerset class CoalitionSampler: diff --git a/shapiq/explainer/_base.py b/shapiq/explainer/_base.py index ce509989..3bc9442c 100644 --- a/shapiq/explainer/_base.py +++ b/shapiq/explainer/_base.py @@ -4,8 +4,8 @@ import numpy as np -from shapiq.explainer.utils import get_explainers, get_predict_function_and_model_type, print_class -from shapiq.interaction_values import InteractionValues +from ..explainer.utils import get_explainers, get_predict_function_and_model_type, print_class +from ..interaction_values import InteractionValues class Explainer: diff --git a/shapiq/explainer/tabular.py b/shapiq/explainer/tabular.py index 7700c067..b958ad82 100644 --- a/shapiq/explainer/tabular.py +++ b/shapiq/explainer/tabular.py @@ -5,7 +5,7 @@ import numpy as np -from shapiq.approximator import ( +from ..approximator import ( SHAPIQ, SVARMIQ, InconsistentKernelSHAPIQ, @@ -17,9 +17,9 @@ RegressionFSII, UnbiasedKernelSHAP, ) -from shapiq.approximator._base import Approximator -from shapiq.explainer._base import Explainer -from shapiq.interaction_values import InteractionValues +from ..approximator._base import Approximator +from ..explainer._base import Explainer +from ..interaction_values import InteractionValues APPROXIMATOR_CONFIGURATIONS = { "regression": { diff --git a/shapiq/explainer/tree/conversion/lightgbm.py b/shapiq/explainer/tree/conversion/lightgbm.py index f08d4160..4a2bfde2 100644 --- a/shapiq/explainer/tree/conversion/lightgbm.py +++ b/shapiq/explainer/tree/conversion/lightgbm.py @@ -5,8 +5,7 @@ import pandas as pd -from shapiq.utils.types import Model - +from ....utils.types import Model from ..base import TreeModel diff --git a/shapiq/explainer/tree/conversion/sklearn.py b/shapiq/explainer/tree/conversion/sklearn.py index c11d9914..b0dcd896 100644 --- a/shapiq/explainer/tree/conversion/sklearn.py +++ b/shapiq/explainer/tree/conversion/sklearn.py @@ -4,11 +4,9 @@ from typing import Optional import numpy as np -from sklearn.ensemble._iforest import _average_path_length - -from shapiq.utils import safe_isinstance -from shapiq.utils.types import Model +from ....utils import safe_isinstance +from ....utils.types import Model from ..base import TreeModel @@ -77,11 +75,20 @@ def convert_sklearn_tree( ) -def average_path_length(isolation_forest): +def average_path_length(isolation_forest: Model) -> float: + """Compute the average path length of the isolation forest. + + Args: + isolation_forest: The isolation forest model. + + Returns: + The average path length of the isolation forest. + """ + from sklearn.ensemble._iforest import _average_path_length + max_samples = isolation_forest._max_samples - average_path_length = _average_path_length( - [max_samples] - ) # NOTE: _average_path_length func is equivalent to equation 1 in Isolation Forest paper Lui2008 + # NOTE: _average_path_length func is equivalent to equation 1 in Isolation Forest paper Lui2008 + average_path_length = _average_path_length([max_samples]) return average_path_length @@ -99,7 +106,6 @@ def convert_sklearn_isolation_forest( scaling = 1.0 / len(tree_model.estimators_) return [ - # convert_isolation_tree_shap_isotree(tree, features, scaling=scaling) convert_isolation_tree(tree, features, scaling=scaling) for tree, features in zip(tree_model.estimators_, tree_model.estimators_features_) ] @@ -107,25 +113,20 @@ def convert_sklearn_isolation_forest( def convert_isolation_tree( tree_model: Model, - tree_features, - class_label: Optional[int] = None, + tree_features: np.ndarray, scaling: float = 1.0, - average_path_length: float = 1.0, # TODO fix default value ) -> TreeModel: """Convert a scikit-learn decision tree to the format used by shapiq. Args: tree_model: The scikit-learn decision tree model to convert. - class_label: The class label of the model to explain. Only used for classification models. - Defaults to ``1``. + tree_features: The features used in the tree. scaling: The scaling factor for the tree values. Returns: The converted decision tree model. """ output_type = "raw" - tree_values = tree_model.tree_.value.copy() - tree_values = tree_values.flatten() features_updated, values_updated = isotree_value_traversal( tree_model.tree_, tree_features, normalize=False, scaling=1.0 ) @@ -145,8 +146,24 @@ def convert_isolation_tree( def isotree_value_traversal( - tree, tree_features, normalize=False, scaling=1.0, data=None, data_missing=None -): + tree: Model, + tree_features: np.ndarray, + normalize: bool = False, + scaling: float = 1.0, +) -> tuple[np.ndarray, np.ndarray]: + """Traverse the tree and calculate the average path length for each node. + + Args: + tree: The tree to traverse. + tree_features: The features used in the tree. + normalize: Whether to normalize the values. + scaling: The scaling factor for the values. + + Returns: + The updated features and values. + """ + from sklearn.ensemble._iforest import _average_path_length + features = tree.feature.copy() corrected_values = tree.value.copy() if safe_isinstance(tree, "sklearn.tree._tree.Tree"): diff --git a/shapiq/explainer/tree/conversion/xgboost.py b/shapiq/explainer/tree/conversion/xgboost.py index 0e29aea8..dbf425e3 100644 --- a/shapiq/explainer/tree/conversion/xgboost.py +++ b/shapiq/explainer/tree/conversion/xgboost.py @@ -7,8 +7,7 @@ import numpy as np import pandas as pd -from shapiq.utils.types import Model - +from ....utils.types import Model from ..base import TreeModel diff --git a/shapiq/explainer/tree/explainer.py b/shapiq/explainer/tree/explainer.py index 47cf9bc9..7e172f26 100644 --- a/shapiq/explainer/tree/explainer.py +++ b/shapiq/explainer/tree/explainer.py @@ -7,9 +7,8 @@ import numpy as np -from shapiq.explainer._base import Explainer -from shapiq.interaction_values import InteractionValues - +from ...interaction_values import InteractionValues +from .._base import Explainer from .treeshapiq import TreeModel, TreeSHAPIQ from .validation import validate_tree_model @@ -77,6 +76,8 @@ def __init__( self.baseline_value = self._compute_baseline_value() def explain(self, x: np.ndarray) -> InteractionValues: + if len(x.shape) != 1: + raise TypeError("explain expects a single instance, not a batch.") # run treeshapiq for all trees interaction_values: list[InteractionValues] = [] for explainer in self._treeshapiq_explainers: diff --git a/shapiq/explainer/tree/treeshapiq.py b/shapiq/explainer/tree/treeshapiq.py index 07d7bfc2..56827456 100644 --- a/shapiq/explainer/tree/treeshapiq.py +++ b/shapiq/explainer/tree/treeshapiq.py @@ -7,8 +7,8 @@ import numpy as np import scipy as sp -from ...aggregation import aggregate_interaction_values -from ...indices import get_computation_index +from ...game_theory.aggregation import aggregate_interaction_values +from ...game_theory.indices import get_computation_index from ...interaction_values import InteractionValues from ...utils.sets import generate_interaction_lookup, powerset from .base import EdgeTree, TreeModel diff --git a/shapiq/explainer/tree/validation.py b/shapiq/explainer/tree/validation.py index cb7bff4a..79e7f170 100644 --- a/shapiq/explainer/tree/validation.py +++ b/shapiq/explainer/tree/validation.py @@ -29,6 +29,7 @@ "lightgbm.sklearn.LGBMRegressor", "lightgbm.sklearn.LGBMClassifier", "lightgbm.basic.Booster", + # xboost? } diff --git a/shapiq/game_theory/__init__.py b/shapiq/game_theory/__init__.py new file mode 100644 index 00000000..62be0865 --- /dev/null +++ b/shapiq/game_theory/__init__.py @@ -0,0 +1,30 @@ +"""conversions of interaction values to different indices +""" + +from .aggregation import aggregate_interaction_values +from .core import egalitarian_least_core +from .exact import ExactComputer, get_bernoulli_weights +from .indices import ( + ALL_AVAILABLE_CONCEPTS, + get_computation_index, + index_generalizes_bv, + index_generalizes_sv, + is_empty_value_the_baseline, + is_index_aggregated, +) +from .moebius_converter import MoebiusConverter + +__all__ = [ + "ExactComputer", + "aggregate_interaction_values", + "get_bernoulli_weights", + "ALL_AVAILABLE_CONCEPTS", + "index_generalizes_sv", + "index_generalizes_bv", + "get_computation_index", + "is_index_aggregated", + "is_empty_value_the_baseline", + "egalitarian_least_core", + "MoebiusConverter", +] +# todo complete list diff --git a/shapiq/aggregation.py b/shapiq/game_theory/aggregation.py similarity index 98% rename from shapiq/aggregation.py rename to shapiq/game_theory/aggregation.py index 3441652e..f4037fbb 100644 --- a/shapiq/aggregation.py +++ b/shapiq/game_theory/aggregation.py @@ -7,8 +7,8 @@ import numpy as np import scipy as sp -from .interaction_values import InteractionValues -from .utils.sets import powerset +from ..interaction_values import InteractionValues +from ..utils.sets import powerset def _change_index(index: str) -> str: diff --git a/shapiq/core.py b/shapiq/game_theory/core.py similarity index 98% rename from shapiq/core.py rename to shapiq/game_theory/core.py index 5191520b..bebbbe74 100644 --- a/shapiq/core.py +++ b/shapiq/game_theory/core.py @@ -7,8 +7,8 @@ import numpy as np from scipy.optimize import LinearConstraint, minimize -from shapiq.interaction_values import InteractionValues -from shapiq.utils.sets import powerset +from ..interaction_values import InteractionValues +from ..utils.sets import powerset __all__ = ["egalitarian_least_core"] diff --git a/shapiq/exact.py b/shapiq/game_theory/exact.py similarity index 95% rename from shapiq/exact.py rename to shapiq/game_theory/exact.py index e1430cba..85e8b9c5 100644 --- a/shapiq/exact.py +++ b/shapiq/game_theory/exact.py @@ -2,21 +2,21 @@ like interaction indices or generalized values.""" import copy -from typing import Callable, Union +from typing import Callable, Optional, Union import numpy as np from scipy.special import bernoulli, binom -from shapiq.indices import ALL_AVAILABLE_CONCEPTS -from shapiq.interaction_values import InteractionValues -from shapiq.utils import powerset +from ..interaction_values import InteractionValues +from ..utils import powerset +from .indices import ALL_AVAILABLE_CONCEPTS __all__ = ["ExactComputer", "get_bernoulli_weights"] class ExactComputer: """Computes exact Shapley Interactions for specified game by evaluating the powerset of all - :math:`2^n` coalitions. + :math:`2^n` coalitions. The ExactComputer class computes a variety of game theoretic concepts like interaction indices or generalized values. Currently, the following indices and values are supported: @@ -27,6 +27,7 @@ class ExactComputer: n_players: The number of players in the game. game_fun: A callable game that takes a binary matrix of shape ``(n_coalitions, n_players)`` and returns a numpy array of shape ``(n_coalitions,)`` containing the game values. + evaluate_game: whether to compute the values at init (if True) or first call (False) Attributes: n: The number of players. @@ -40,6 +41,7 @@ def __init__( self, n_players: int, game_fun: Callable[[np.ndarray], np.ndarray[float]], + evaluate_game: bool = False, ) -> None: # set parameter attributes self.n: int = n_players @@ -52,12 +54,15 @@ def __init__( self._n_interactions: np.ndarray = self.get_n_interactions(self.n) self._computed: dict[tuple[str, int], InteractionValues] = {} # will store all computations self._elc_stability_subsidy: float = -1 + self._game_is_computed: bool = False - # evaluate the game on the powerset - computed_game = self.compute_game_values(game_fun) - self.baseline_value: float = computed_game[0] - self.game_values: np.ndarray[float] = computed_game[1] - self.coalition_lookup: dict[tuple[int], int] = computed_game[2] + self._baseline_value: Optional[float] = None + self._game_values: Optional[np.ndarray] = None + self._coalition_lookup: Optional[dict[tuple[int], int]] = None + + if evaluate_game: + # evaluate the game on the powerset + self._evaluate_game() # setup callable mapping from index to computation self._index_mapping: dict[str, Callable[[str, int], InteractionValues]] = { @@ -124,9 +129,32 @@ def __call__(self, index: str, order: int = None) -> InteractionValues: else: raise ValueError(f"Index {index} not supported.") - def compute_game_values( - self, game_fun: Callable[[np.ndarray], np.ndarray[float]] - ) -> tuple[float, np.ndarray[float], dict[tuple[int], int]]: + @property + def baseline_value(self) -> float: + if not self._game_is_computed: + self._evaluate_game() + return self._baseline_value + + @property + def coalition_lookup(self) -> dict[tuple[int], int]: + if not self._game_is_computed: + self._evaluate_game() + return self._coalition_lookup + + @property + def game_values(self) -> np.ndarray[float]: + if not self._game_is_computed: + self._evaluate_game() + return self._game_values + + def _evaluate_game(self): + computed_game = self.compute_game_values() + self._baseline_value = computed_game[0] + self._game_values = computed_game[1] + self._coalition_lookup = computed_game[2] + self._game_is_computed = True + + def compute_game_values(self) -> tuple[float, np.ndarray[float], dict[tuple[int], int]]: """Evaluates the game on the powerset of all coalitions. Args: @@ -141,8 +169,9 @@ def compute_game_values( for i, T in enumerate(powerset(self._grand_coalition_set, min_size=0, max_size=self.n)): coalition_lookup[T] = i # set lookup for the coalition coalition_matrix[i, T] = True # one-hot-encode the coalition - game_values = game_fun(coalition_matrix) # compute the game values + game_values = self.game_fun(coalition_matrix) # compute the game values baseline_value = float(game_values[0]) # set the baseline value + coalition_lookup = coalition_lookup return baseline_value, game_values, coalition_lookup def moebius_transform(self, *args, **kwargs) -> InteractionValues: @@ -158,6 +187,7 @@ def moebius_transform(self, *args, **kwargs) -> InteractionValues: return self._computed[("Moebius", self.n)] except KeyError: # if not computed yet, just continue pass + # compute the Moebius transform moebius_transform = np.zeros(2**self.n) coalition_lookup = {} @@ -836,7 +866,7 @@ def probabilistic_value(self, index: str, *args, **kwargs) -> InteractionValues: def compute_egalitarian_least_core(self, *args, **kwargs): - from shapiq.core import egalitarian_least_core + from shapiq.game_theory.core import egalitarian_least_core order = 1 @@ -856,7 +886,7 @@ def compute_egalitarian_least_core(self, *args, **kwargs): def get_bernoulli_weights(order: int) -> np.ndarray: """Returns the bernoulli weights in the k-additive approximation via SII, e.g. used in - kADD-SHAP. + kADD-SHAP. Args: order: The highest order of interactions diff --git a/shapiq/indices.py b/shapiq/game_theory/indices.py similarity index 100% rename from shapiq/indices.py rename to shapiq/game_theory/indices.py diff --git a/shapiq/moebius_converter.py b/shapiq/game_theory/moebius_converter.py similarity index 99% rename from shapiq/moebius_converter.py rename to shapiq/game_theory/moebius_converter.py index bacc42a3..f93c6e08 100644 --- a/shapiq/moebius_converter.py +++ b/shapiq/game_theory/moebius_converter.py @@ -7,8 +7,8 @@ import numpy as np from scipy.special import binom -from .interaction_values import InteractionValues -from .utils.sets import powerset +from ..interaction_values import InteractionValues +from ..utils.sets import powerset class MoebiusConverter: diff --git a/shapiq/games/base.py b/shapiq/games/base.py index 5677f59f..d274b64c 100644 --- a/shapiq/games/base.py +++ b/shapiq/games/base.py @@ -9,8 +9,8 @@ import numpy as np from tqdm.auto import tqdm -from shapiq.interaction_values import InteractionValues -from shapiq.utils import powerset, transform_array_to_coalitions, transform_coalitions_to_array +from ..interaction_values import InteractionValues +from ..utils import powerset, transform_array_to_coalitions, transform_coalitions_to_array class Game(ABC): @@ -478,7 +478,7 @@ def exact_values(self, index: str, order: int) -> InteractionValues: Returns: InteractionValues: The exact interaction values. """ - from ..exact import ExactComputer + from shapiq.game_theory.exact import ExactComputer # raise warning if the game is not precomputed and n_players > 16 if not self.precomputed and self.n_players > 16: diff --git a/shapiq/games/benchmark/synthetic/soum.py b/shapiq/games/benchmark/synthetic/soum.py index 4789354c..6f393af2 100644 --- a/shapiq/games/benchmark/synthetic/soum.py +++ b/shapiq/games/benchmark/synthetic/soum.py @@ -94,7 +94,7 @@ def __init__( normalize: bool = False, verbose: bool = False, ): - from ....moebius_converter import MoebiusConverter + from shapiq.game_theory.moebius_converter import MoebiusConverter self._rng = np.random.default_rng(random_state) @@ -160,7 +160,7 @@ def exact_values(self, index: str, order: int) -> InteractionValues: Returns: The exact values for the given index and order. """ - from ....moebius_converter import MoebiusConverter + from shapiq.game_theory.moebius_converter import MoebiusConverter if self.converter is None: self.converter = MoebiusConverter(self.moebius_coefficients) diff --git a/shapiq/games/imputer/baseline_imputer.py b/shapiq/games/imputer/baseline_imputer.py index b23c41b4..1ee632e8 100644 --- a/shapiq/games/imputer/baseline_imputer.py +++ b/shapiq/games/imputer/baseline_imputer.py @@ -5,7 +5,7 @@ import numpy as np -from shapiq.games.imputer.base import Imputer +from .base import Imputer class BaselineImputer(Imputer): diff --git a/shapiq/games/imputer/conditional_imputer.py b/shapiq/games/imputer/conditional_imputer.py index b8f42c6b..5dad0690 100644 --- a/shapiq/games/imputer/conditional_imputer.py +++ b/shapiq/games/imputer/conditional_imputer.py @@ -5,9 +5,9 @@ import numpy as np -from shapiq.approximator.sampling import CoalitionSampler -from shapiq.games.imputer.base import Imputer -from shapiq.utils.modules import check_import_module +from ...approximator.sampling import CoalitionSampler +from ...utils.modules import check_import_module +from .base import Imputer class ConditionalImputer(Imputer): diff --git a/shapiq/games/imputer/marginal_imputer.py b/shapiq/games/imputer/marginal_imputer.py index be13354f..eeabcaca 100644 --- a/shapiq/games/imputer/marginal_imputer.py +++ b/shapiq/games/imputer/marginal_imputer.py @@ -5,7 +5,7 @@ import numpy as np -from shapiq.games.imputer.base import Imputer +from .base import Imputer _too_large_sample_size_warning = ( "The sample size is larger than the number of data points in the background set. " diff --git a/shapiq/interaction_values.py b/shapiq/interaction_values.py index cd290b40..ae98d8fa 100644 --- a/shapiq/interaction_values.py +++ b/shapiq/interaction_values.py @@ -11,8 +11,12 @@ import matplotlib.pyplot as plt import numpy as np -from shapiq.indices import ALL_AVAILABLE_INDICES, index_generalizes_bv, index_generalizes_sv -from shapiq.utils.sets import count_interactions, generate_interaction_lookup, powerset +from .game_theory.indices import ( + ALL_AVAILABLE_INDICES, + index_generalizes_bv, + index_generalizes_sv, +) +from .utils.sets import count_interactions, generate_interaction_lookup, powerset @dataclass diff --git a/shapiq/plot/force.py b/shapiq/plot/force.py index a6ee8ff3..9907282a 100644 --- a/shapiq/plot/force.py +++ b/shapiq/plot/force.py @@ -5,9 +5,8 @@ import matplotlib.pyplot as plt import numpy as np -from shapiq.interaction_values import InteractionValues -from shapiq.utils.modules import check_import_module - +from ..interaction_values import InteractionValues +from ..utils.modules import check_import_module from .utils import get_interaction_values_and_feature_names __all__ = ["force_plot"] diff --git a/shapiq/plot/network.py b/shapiq/plot/network.py index c0e2c246..9edff911 100644 --- a/shapiq/plot/network.py +++ b/shapiq/plot/network.py @@ -9,9 +9,8 @@ import numpy as np from PIL import Image -from shapiq.interaction_values import InteractionValues -from shapiq.utils import powerset - +from ..interaction_values import InteractionValues +from ..utils import powerset from ._config import BLUE, LINES, NEUTRAL, RED, get_color __all__ = ["network_plot"] diff --git a/shapiq/plot/stacked_bar.py b/shapiq/plot/stacked_bar.py index 08ca7140..8466df1c 100644 --- a/shapiq/plot/stacked_bar.py +++ b/shapiq/plot/stacked_bar.py @@ -28,7 +28,7 @@ def stacked_bar_plot( This stacked bar plot can be used to visualize the amount of interaction between the features for a given instance. The n-SII values are plotted as stacked bars with positive and negative parts stacked on top of each other. The colors represent the order of the n-SII values. For a - detailed explanation of this plot, refer to `Bordt and von Luxburg (2023) `_. + detailed explanation of this plot, refer to `Bordt and von Luxburg (2023) `_. An example of the plot is shown below. diff --git a/shapiq/plot/utils.py b/shapiq/plot/utils.py index b573047f..d5c6a117 100644 --- a/shapiq/plot/utils.py +++ b/shapiq/plot/utils.py @@ -6,8 +6,8 @@ import numpy as np -from shapiq.interaction_values import InteractionValues -from shapiq.utils import powerset +from ..interaction_values import InteractionValues +from ..utils import powerset __all__ = ["get_interaction_values_and_feature_names", "abbreviate_feature_names"] diff --git a/shapiq/plot/watefall.py b/shapiq/plot/watefall.py index e89b82f0..0af4b941 100644 --- a/shapiq/plot/watefall.py +++ b/shapiq/plot/watefall.py @@ -5,9 +5,8 @@ import matplotlib.pyplot as plt import numpy as np -from shapiq.interaction_values import InteractionValues -from shapiq.utils.modules import check_import_module - +from ..interaction_values import InteractionValues +from ..utils.modules import check_import_module from .utils import get_interaction_values_and_feature_names __all__ = ["waterfall_plot"] diff --git a/tests/conftest.py b/tests/conftest.py index 582ed526..7da6bfca 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,11 +13,13 @@ from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +NR_FEATURES = 7 + @pytest.fixture def dt_reg_model() -> DecisionTreeRegressor: """Return a simple decision tree model.""" - X, y = make_regression(n_samples=100, n_features=7, random_state=42) + X, y = make_regression(n_samples=100, n_features=NR_FEATURES, random_state=42) model = DecisionTreeRegressor(random_state=42, max_depth=3) model.fit(X, y) return model @@ -28,7 +30,7 @@ def dt_clf_model() -> DecisionTreeClassifier: """Return a simple decision tree model.""" X, y = make_classification( n_samples=100, - n_features=7, + n_features=NR_FEATURES, random_state=42, n_classes=3, n_informative=7, @@ -71,7 +73,7 @@ def lightgbm_reg_model(): """Return a simple lightgbm regression model.""" from lightgbm import LGBMRegressor - X, y = make_regression(n_samples=100, n_features=7, random_state=42) + X, y = make_regression(n_samples=100, n_features=NR_FEATURES, random_state=42) model = LGBMRegressor(random_state=42, n_estimators=3) model.fit(X, y) return model @@ -84,7 +86,7 @@ def lightgbm_clf_model(): X, y = make_classification( n_samples=100, - n_features=7, + n_features=NR_FEATURES, random_state=42, n_classes=3, n_informative=7, @@ -103,7 +105,7 @@ def dt_clf_model_tree_model(): X, y = make_classification( n_samples=100, - n_features=7, + n_features=NR_FEATURES, random_state=42, n_classes=3, n_informative=7, @@ -127,10 +129,10 @@ def rf_reg_model() -> RandomForestRegressor: @pytest.fixture def rf_clf_model() -> RandomForestClassifier: - """Return a simple random forest model.""" + """Return a simple (classification) random forest model.""" X, y = make_classification( n_samples=100, - n_features=7, + n_features=NR_FEATURES, random_state=42, n_classes=3, n_informative=7, @@ -184,7 +186,7 @@ def rf_clf_binary_model() -> RandomForestClassifier: """Return a simple random forest model.""" X, y = make_classification( n_samples=100, - n_features=7, + n_features=NR_FEATURES, random_state=42, n_classes=2, n_informative=7, @@ -203,7 +205,7 @@ def xgb_clf_model(): X, y = make_classification( n_samples=100, - n_features=7, + n_features=NR_FEATURES, random_state=42, n_classes=3, n_informative=7, @@ -255,7 +257,7 @@ def background_clf_data() -> np.ndarray: """Return a simple background dataset.""" X, _ = make_classification( n_samples=100, - n_features=7, + n_features=NR_FEATURES, random_state=42, n_classes=3, n_informative=7, @@ -268,7 +270,7 @@ def background_clf_data() -> np.ndarray: @pytest.fixture def background_reg_dataset() -> tuple[np.ndarray, np.ndarray]: """Return a simple background dataset.""" - X, y = make_regression(n_samples=100, n_features=7, random_state=42) + X, y = make_regression(n_samples=100, n_features=NR_FEATURES, random_state=42) return X, y @@ -277,7 +279,7 @@ def background_clf_dataset() -> tuple[np.ndarray, np.ndarray]: """Return a simple background dataset.""" X, y = make_classification( n_samples=100, - n_features=7, + n_features=NR_FEATURES, random_state=42, n_classes=3, n_informative=7, @@ -292,7 +294,7 @@ def background_clf_dataset_binary() -> tuple[np.ndarray, np.ndarray]: """Return a simple background dataset.""" X, y = make_classification( n_samples=100, - n_features=7, + n_features=NR_FEATURES, random_state=42, n_classes=2, n_informative=7, @@ -352,3 +354,111 @@ def interaction_values_list(): ) iv_list.append(iv) return iv_list + + +@pytest.fixture +def xgboost_regressor(background_reg_dataset): + """Return a xgb regression model""" + import xgboost as xgb + + X, y = background_reg_dataset + model = xgb.XGBRegressor(n_estimators=10, max_depth=1) + model.fit(X, y, verbose=False) + return model + + +@pytest.fixture +def xgboost_booster(background_reg_dataset): + """Return a xgb booster""" + import xgboost as xgb + + X, y = background_reg_dataset + dtrain = xgb.DMatrix(X, label=y) + params = {"1": 1, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7} + booster = xgb.train(params=params, dtrain=dtrain, num_boost_round=0) + return booster + + +@pytest.fixture +def lightgbm_basic(background_reg_dataset): + """Return a lgm basic booster""" + import lightgbm as lgb + + X, y = background_reg_dataset + train_data = lgb.Dataset(X, label=y) + model = lgb.train(params={}, train_set=train_data, num_boost_round=1) + return model + + +@pytest.fixture +def sequential_model_1_class(): + """Return a keras nn with output dimension 1""" + return _sequential_model(1) + + +@pytest.fixture +def sequential_model_2_classes(): + """Return a keras nn with output dimension 2""" + return _sequential_model(2) + + +@pytest.fixture +def sequential_model_3_classes(): + """Return a keras nn with output dimension 3""" + return _sequential_model(3) + + +def _sequential_model(output_shape_nr): + """Return a keras nn with specified output dimension""" + import keras + + model = keras.Sequential( + [ + keras.layers.Input(shape=(NR_FEATURES,)), + keras.layers.Dense(2, activation="relu", name="layer1"), + keras.layers.Dense(output_shape_nr, name="layer2"), + ] + ) + model.compile(optimizer="adam", loss="mse") + X, y = make_regression(n_samples=100, n_features=NR_FEATURES, random_state=42) + model.fit(X, y, epochs=0, batch_size=32) + return model + + +class CustomModel: + def __init__(self, data: tuple[np.ndarray, np.ndarray]): + self.data = data + + def __call__(self, *args, **kwargs): + return self.data[1] + + +@pytest.fixture +def custom_model(background_reg_dataset) -> CustomModel: + """Return a callable mock custom model""" + return CustomModel(background_reg_dataset) + + +TABULAR_MODEL_FIXTURES = [ + ("custom_model", "custom_model"), + ("lr_reg_model", "sklearn.linear_model.LinearRegression"), + ("sequential_model_1_class", "tensorflow.python.keras.engine.sequential.Sequential"), + ("sequential_model_2_classes", "keras.src.models.sequential.Sequential"), + ("sequential_model_3_classes", "keras.engine.sequential.Sequential"), + ("lr_clf_model", "sklearn.linear_model.LogisticRegression"), + ("torch_clf_model", "torch.nn.modules.container.Sequential"), + ("torch_reg_model", "torch.nn.modules.container.Sequential"), +] + +TREE_MODEL_FIXTURES = [ + ("lightgbm_reg_model", "lightgbm.sklearn.LGBMRegressor"), + ("xgboost_regressor", "xgboost.sklearn.XGBRegressor"), + ("xgboost_booster", "xgboost.core.Booster"), + ("lightgbm_basic", "lightgbm.basic.Booster"), + ("rf_reg_model", "sklearn.ensemble.RandomForestRegressor"), + ("rf_clf_model", "sklearn.ensemble.RandomForestClassifier"), + ("dt_clf_model", "sklearn.tree.DecisionTreeClassifier"), + ("dt_reg_model", "sklearn.tree.DecisionTreeRegressor"), +] + +ALL_MODEL_FIXTURES = TABULAR_MODEL_FIXTURES + TREE_MODEL_FIXTURES diff --git a/tests/requirements/requirements.txt b/tests/requirements/requirements.txt index e690dcfb..303ab789 100644 --- a/tests/requirements/requirements.txt +++ b/tests/requirements/requirements.txt @@ -20,3 +20,5 @@ xgboost==2.1.1 numpy==1.26.4 requests==2.32.3 lightgbm==4.5.0 +tf-keras==2.18.0 +tensorflow==2.18.0 diff --git a/tests/test_configuration.py b/tests/test_configuration.py index b5a714ca..5cf3aa31 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -1,6 +1,6 @@ """Tests the indices configuration module.""" -from shapiq.indices import ALL_AVAILABLE_CONCEPTS +from shapiq.game_theory.indices import ALL_AVAILABLE_CONCEPTS def test_configuration(): diff --git a/tests/test_exact_computer.py b/tests/test_exact_computer.py deleted file mode 100644 index df5bda13..00000000 --- a/tests/test_exact_computer.py +++ /dev/null @@ -1,148 +0,0 @@ -"""This test module tests the ExactComputer class.""" - -import numpy as np -import pytest - -from shapiq.exact import ExactComputer -from shapiq.games.benchmark.synthetic.soum import SOUM -from shapiq.moebius_converter import MoebiusConverter - - -def test_exact_computer_on_soum(): - for i in range(20): - n = np.random.randint(low=2, high=10) - order = np.random.randint(low=1, high=min(n, 5)) - n_basis_games = np.random.randint(low=1, high=100) - soum = SOUM(n, n_basis_games=n_basis_games) - - predicted_value = soum(np.ones(n))[0] - - # Compute via exactComputer - exact_computer = ExactComputer(n_players=n, game_fun=soum) - - # Compute via sparse Möbius representation - moebius_converter = MoebiusConverter(soum.moebius_coefficients) - - moebius_transform = exact_computer.moebius_transform() - # Assert equality with ground truth Möbius coefficients from SOUM - assert np.sum((moebius_transform - soum.moebius_coefficients).values ** 2) < 10e-7 - - # Compare ground truth via MoebiusConvert with exact computation of ExactComputer - shapley_interactions_gt = {} - shapley_interactions_exact = {} - for index in ["STII", "k-SII", "FSII"]: - shapley_interactions_gt[index] = moebius_converter.moebius_to_shapley_interaction( - index=index, order=order - ) - shapley_interactions_exact[index] = exact_computer.shapley_interaction( - index=index, order=order - ) - # Check equality with ground truth calculations from SOUM - assert ( - np.sum( - (shapley_interactions_exact[index] - shapley_interactions_gt[index]).values ** 2 - ) - < 10e-7 - ) - - index = "JointSV" - shapley_generalized_values = exact_computer.shapley_generalized_value( - order=order, index=index - ) - # Assert efficiency - assert (np.sum(shapley_generalized_values.values) - predicted_value) ** 2 < 10e-7 - - index = "kADD-SHAP" - shapley_interactions_exact[index] = exact_computer.shapley_interaction( - index=index, order=order - ) - - base_interaction_indices = ["SII", "BII", "CHII", "Co-Moebius"] - base_interactions = {} - for base_index in base_interaction_indices: - base_interactions[base_index] = exact_computer.shapley_base_interaction( - order=order, index=base_index - ) - - base_gv_indices = ["SGV", "BGV", "CHGV", "IGV", "EGV"] - base_gv = {} - for base_gv_index in base_gv_indices: - base_gv[base_gv_index] = exact_computer.base_generalized_value( - order=order, index=base_gv_index - ) - - probabilistic_values_indices = ["SV", "BV"] - probabilistic_values = {} - for pv_index in probabilistic_values_indices: - probabilistic_values[pv_index] = exact_computer.probabilistic_value(index=pv_index) - - # Assert efficiency for SV - assert (np.sum(probabilistic_values["SV"].values) - predicted_value) ** 2 < 10e-7 - - -@pytest.mark.parametrize( - "index, order", - [("ELC", 1)], -) -def test_exact_elc_computer_call(index, order): - """Tests the call function for the ExactComputer.""" - n = 5 - soum = SOUM(n, n_basis_games=10, normalize=True) - exact_computer = ExactComputer(n_players=n, game_fun=soum) - interaction_values = exact_computer(index=index, order=order) - if order is None: - order = n - assert interaction_values is not None # should return something - assert interaction_values.max_order == order # order should be the same - assert interaction_values.min_order == 1 # ELC only has singleton values - assert interaction_values.index == index # index should be the same - assert interaction_values.baseline_value == 0 # ELC needs baseline_value zero - assert interaction_values.estimated is False # nothing should be estimated - assert interaction_values.values is not None # values should be computed - assert exact_computer._elc_stability_subsidy is not None # ELC should have stored subsidy - - -@pytest.mark.parametrize( - "index, order", - [ - ("SV", 1), - ("BV", 1), - ("SII", 2), - ("BII", 2), - ("CHII", 2), - ("Co-Moebius", 2), - ("SGV", 2), - ("BGV", 2), - ("CHGV", 2), - ("EGV", 2), - ("IGV", 2), - ("STII", 2), - ("k-SII", 2), - ("FSII", 2), - ("JointSV", 2), - ("kADD-SHAP", 2), - ("SII", None), - ], -) -def test_exact_computer_call(index, order): - """Tests the call function for the ExactComputer.""" - n = 5 - soum = SOUM(n, n_basis_games=10) - exact_computer = ExactComputer(n_players=n, game_fun=soum) - interaction_values = exact_computer(index=index, order=order) - if order is None: - order = n - assert interaction_values is not None # should return something - assert interaction_values.max_order == order # order should be the same - assert interaction_values.index == index # index should be the same - assert interaction_values.estimated is False # nothing should be estimated - assert interaction_values.values is not None # values should be computed - - -def test_basic_functions(): - """Tests the basic functions of the ExactComputer.""" - n = 5 - soum = SOUM(n, n_basis_games=10) - exact_computer = ExactComputer(n_players=n, game_fun=soum) - isinstance(repr(exact_computer), str) - isinstance(str(exact_computer), str) diff --git a/tests/tests_explainer/test_explainer_tabular.py b/tests/tests_explainer/test_explainer_tabular.py index 5e39c39b..829eb586 100644 --- a/tests/tests_explainer/test_explainer_tabular.py +++ b/tests/tests_explainer/test_explainer_tabular.py @@ -1,4 +1,4 @@ -"""This test module contains all tests regarding the interaciton explainer for the shapiq package.""" +"""This test module contains all tests regarding the interaction explainer for the shapiq package.""" import numpy as np import pytest diff --git a/tests/tests_explainer/test_explainer_utils.py b/tests/tests_explainer/test_explainer_utils.py new file mode 100644 index 00000000..c83920e3 --- /dev/null +++ b/tests/tests_explainer/test_explainer_utils.py @@ -0,0 +1,107 @@ +import inspect +from typing import Any +from unittest.mock import Mock + +import numpy as np +import pytest + +from shapiq.explainer.tree.validation import SUPPORTED_MODELS +from shapiq.explainer.utils import get_predict_function_and_model_type +from tests.conftest import TABULAR_MODEL_FIXTURES, TREE_MODEL_FIXTURES + + +@pytest.mark.external_libraries +@pytest.mark.parametrize("model_name, label", TABULAR_MODEL_FIXTURES) +def test_tabular_get_predict_function_and_model_type( + model_name, label, background_reg_dataset, request +): + model = request.getfixturevalue(model_name) + X, y = background_reg_dataset + + predict_function, model_type = get_predict_function_and_model_type(model, label) + assert model_type == "tabular" + assert predict_function(model, X).ndim == 1 + + if label == "custom_model": + assert np.all(predict_function(model, X) == y) + + if label == "sklearn.linear_model.LinearRegression": + assert np.all(predict_function(model, X) == model.predict(X)) + + +@pytest.mark.external_libraries +@pytest.mark.parametrize("model_fixture, model_class", TREE_MODEL_FIXTURES) +def test_tree_get_predict_function_and_model_type( + model_fixture, model_class, background_reg_dataset, request +): + model = request.getfixturevalue(model_fixture) + X, y = background_reg_dataset + + predict_function, model_type = get_predict_function_and_model_type(model, model_class) + assert predict_function(model, X).ndim == 1 + assert model_type == "tree" + + if model_class == "sklearn.tree.DecisionTreeRegressor": + assert np.all(predict_function(model, X) == model.predict(X)) + + +def test_all_supported_tree_models_recognized(): + model = Mock() + for label in SUPPORTED_MODELS: + predict_function, model_type = get_predict_function_and_model_type(model, label) + assert model_type == "tree" + + +class ModelWithFalseCall: + def __call__(self, string: str, double: float): + pass + + +class NonCallableModel: + pass + + +def test_exceptions_get_predict_function_and_model_type(background_reg_data): + # neither call nor predict functions + model_without_call = NonCallableModel() + with pytest.raises(TypeError): + _, _ = get_predict_function_and_model_type(model_without_call, "non_sense_model") + + +def test_class_index(): + def _model(x: np.ndarray): + return np.array([[1, 2, 3, 4], [1, 2, 3, 4]]) + + for i in range(0, 4): + pred_fun, label = get_predict_function_and_model_type(_model, "custom_model", i) + return_value = pred_fun(_model, np.array([[11, 22, 33, 44], [11, 22, 33, 44]])) + assert return_value[0] == i + 1 + + +@pytest.mark.skip("not possible to implement right now") +def test_class_index_errors(): + def _model(x: np.ndarray): + return np.array([[1, 2, 3, 4], [1, 2, 3, 4]]) + + # out of bounds + with pytest.raises(TypeError): + _, _ = get_predict_function_and_model_type(_model, "non_sense_model", 4) + # out of bounds + with pytest.raises(TypeError): + _, _ = get_predict_function_and_model_type(_model, "non_sense_model", -5) + + +def _valid_sig(param: inspect.Parameter): + return ( + param.annotation == np.ndarray + or param.annotation == inspect._empty + or param.annotation == Any + ) + + +def callable_check(): # todo useful addition? + # call with false signature + model_with_false_call = ModelWithFalseCall() + call_signature = inspect.signature(model_with_false_call) + if not any([_valid_sig(param) for param in call_signature.parameters.values()]): + raise TypeError diff --git a/tests/tests_explainer/tests_tree_explainer/test_tree_explainer.py b/tests/tests_explainer/tests_tree_explainer/test_tree_explainer.py index cb07807c..1df79fee 100644 --- a/tests/tests_explainer/tests_tree_explainer/test_tree_explainer.py +++ b/tests/tests_explainer/tests_tree_explainer/test_tree_explainer.py @@ -58,7 +58,7 @@ def test_decision_tree_regression(dt_reg_model, background_reg_data): assert prediction == pytest.approx(sum_of_values) -def test_random_forrest_regression(rf_reg_model, background_reg_data): +def test_random_forest_regression(rf_reg_model, background_reg_data): """Test TreeExplainer with a simple decision tree regressor.""" explainer = TreeExplainer(model=rf_reg_model, max_order=2, min_order=1) @@ -79,7 +79,7 @@ def test_random_forrest_regression(rf_reg_model, background_reg_data): assert prediction == pytest.approx(sum_of_values) -def test_random_forrest_classification(rf_clf_model, background_clf_data): +def test_random_forest_classification(rf_clf_model, background_clf_data): """Test TreeExplainer with a simple decision tree regressor.""" class_label = 0 explainer = TreeExplainer( diff --git a/tests/tests_explainer/tests_tree_explainer/test_tree_explainer_conversion.py b/tests/tests_explainer/tests_tree_explainer/test_tree_explainer_conversion.py index cfb1b07b..c3448bd6 100644 --- a/tests/tests_explainer/tests_tree_explainer/test_tree_explainer_conversion.py +++ b/tests/tests_explainer/tests_tree_explainer/test_tree_explainer_conversion.py @@ -1,8 +1,12 @@ """This test module collects all tests for the conversions of the supported tree models for the TreeExplainer class.""" +import random + import numpy as np +import pytest +from shapiq import TreeExplainer from shapiq.explainer.tree.base import TreeModel from shapiq.explainer.tree.conversion.edges import create_edge_tree from shapiq.explainer.tree.conversion.sklearn import ( @@ -10,7 +14,10 @@ convert_sklearn_isolation_forest, convert_sklearn_tree, ) +from shapiq.explainer.tree.validation import SUPPORTED_MODELS +from shapiq.explainer.utils import get_predict_function_and_model_type from shapiq.utils import safe_isinstance +from tests.conftest import TREE_MODEL_FIXTURES def test_tree_model_init(): @@ -138,3 +145,25 @@ def test_sklearn_if_conversion(if_clf_model): assert isinstance(tree_model, list) assert safe_isinstance(tree_model[0], tree_model_class_path_str) assert tree_model[0].empty_prediction is not None + + +@pytest.mark.external_libraries +@pytest.mark.parametrize("model_fixture, model_class", TREE_MODEL_FIXTURES) +def test_conversion_predict_identity(model_fixture, model_class, background_reg_data, request): + if model_class not in SUPPORTED_MODELS: + pytest.skip( + f"skipped test, {model_class} not in the supported models for the tree explainer." + ) + else: + model = request.getfixturevalue(model_fixture) + predict_function, _ = get_predict_function_and_model_type(model, model_class) + original_pred = predict_function(model, background_reg_data) + tree_explainer = TreeExplainer(model=model, max_order=1, min_order=1) + for _ in range(1, 150): + index = random.randint(0, 99) + sv = tree_explainer.explain(background_reg_data[index]) + prediction = sum(sv.values) + if sv[()] == 0: + prediction += sv.baseline_value + tolerance = 1e-5 + assert abs(prediction - original_pred[index]) <= tolerance diff --git a/tests/tests_explainer/tests_tree_explainer/test_tree_explainer_validate.py b/tests/tests_explainer/tests_tree_explainer/test_tree_explainer_validate.py index cc453044..0c9ff46e 100644 --- a/tests/tests_explainer/tests_tree_explainer/test_tree_explainer_validate.py +++ b/tests/tests_explainer/tests_tree_explainer/test_tree_explainer_validate.py @@ -4,7 +4,8 @@ import pytest from shapiq import safe_isinstance -from shapiq.explainer.tree.validation import validate_tree_model +from shapiq.explainer.tree.validation import SUPPORTED_MODELS, validate_tree_model +from tests.conftest import TREE_MODEL_FIXTURES def test_validate_model(dt_clf_model, dt_reg_model, rf_reg_model, rf_clf_model, if_clf_model): @@ -30,3 +31,18 @@ def test_validate_model(dt_clf_model, dt_reg_model, rf_reg_model, rf_clf_model, # test the unsupported model with pytest.raises(TypeError): validate_tree_model("unsupported_model") + + +@pytest.mark.external_libraries +@pytest.mark.parametrize("model_fixture, model_class", TREE_MODEL_FIXTURES) +def test_validate_model_fixtures(model_fixture, model_class, background_reg_data, request): + if model_class not in SUPPORTED_MODELS: + return + else: + model = request.getfixturevalue(model_fixture) + class_path_str = ["shapiq.explainer.tree.base.TreeModel"] + tree_model = validate_tree_model(model) + if type(tree_model) is not list: + tree_model = [tree_model] + for tree in tree_model: + assert safe_isinstance(tree, class_path_str) diff --git a/tests/tests_game_theory/__init__.py b/tests/tests_game_theory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_aggregation.py b/tests/tests_game_theory/test_aggregation.py similarity index 97% rename from tests/test_aggregation.py rename to tests/tests_game_theory/test_aggregation.py index 38e81e84..e0ad8006 100644 --- a/tests/test_aggregation.py +++ b/tests/tests_game_theory/test_aggregation.py @@ -3,11 +3,11 @@ import numpy as np import pytest -from shapiq.aggregation import aggregate_to_one_dimension from shapiq.approximator import ( SHAPIQ, PermutationSamplingSII, ) +from shapiq.game_theory.aggregation import aggregate_to_one_dimension from shapiq.games.benchmark import DummyGame diff --git a/tests/test_core.py b/tests/tests_game_theory/test_core.py similarity index 99% rename from tests/test_core.py rename to tests/tests_game_theory/test_core.py index af48134b..84d9e6f5 100644 --- a/tests/test_core.py +++ b/tests/tests_game_theory/test_core.py @@ -4,7 +4,7 @@ import pytest import shapiq -from shapiq.core import egalitarian_least_core +from shapiq.game_theory.core import egalitarian_least_core from shapiq.games.benchmark.synthetic.soum import SOUM from shapiq.utils import powerset diff --git a/tests/tests_game_theory/test_exact_computer.py b/tests/tests_game_theory/test_exact_computer.py new file mode 100644 index 00000000..ddce2acf --- /dev/null +++ b/tests/tests_game_theory/test_exact_computer.py @@ -0,0 +1,455 @@ +"""This test module tests the ExactComputer class.""" + +import numpy as np +import pytest + +from shapiq import powerset +from shapiq.game_theory.exact import ExactComputer +from shapiq.game_theory.moebius_converter import MoebiusConverter +from shapiq.games.benchmark.synthetic.soum import SOUM + + +def test_exact_computer_on_soum(): + for i in range(20): + n = np.random.randint(low=2, high=10) + order = np.random.randint(low=1, high=min(n, 5)) + n_basis_games = np.random.randint(low=1, high=100) + soum = SOUM(n, n_basis_games=n_basis_games) + + predicted_value = soum(np.ones(n))[0] + + # Compute via exactComputer + exact_computer = ExactComputer(n_players=n, game_fun=soum) + + # Compute via sparse Möbius representation + moebius_converter = MoebiusConverter(soum.moebius_coefficients) + + moebius_transform = exact_computer.moebius_transform() + # Assert equality with ground truth Möbius coefficients from SOUM + assert np.sum((moebius_transform - soum.moebius_coefficients).values ** 2) < 10e-7 + + # Compare ground truth via MoebiusConvert with exact computation of ExactComputer + shapley_interactions_gt = {} + shapley_interactions_exact = {} + for index in ["STII", "k-SII", "FSII"]: + shapley_interactions_gt[index] = moebius_converter.moebius_to_shapley_interaction( + index=index, order=order + ) + shapley_interactions_exact[index] = exact_computer.shapley_interaction( + index=index, order=order + ) + # Check equality with ground truth calculations from SOUM + assert ( + np.sum( + (shapley_interactions_exact[index] - shapley_interactions_gt[index]).values ** 2 + ) + < 10e-7 + ) + + index = "JointSV" + shapley_generalized_values = exact_computer.shapley_generalized_value( + order=order, index=index + ) + # Assert efficiency + assert (np.sum(shapley_generalized_values.values) - predicted_value) ** 2 < 10e-7 + + index = "kADD-SHAP" + shapley_interactions_exact[index] = exact_computer.shapley_interaction( + index=index, order=order + ) + + base_interaction_indices = ["SII", "BII", "CHII", "Co-Moebius"] + base_interactions = {} + for base_index in base_interaction_indices: + base_interactions[base_index] = exact_computer.shapley_base_interaction( + order=order, index=base_index + ) + + base_gv_indices = ["SGV", "BGV", "CHGV", "IGV", "EGV"] + base_gv = {} + for base_gv_index in base_gv_indices: + base_gv[base_gv_index] = exact_computer.base_generalized_value( + order=order, index=base_gv_index + ) + + probabilistic_values_indices = ["SV", "BV"] + probabilistic_values = {} + for pv_index in probabilistic_values_indices: + probabilistic_values[pv_index] = exact_computer.probabilistic_value(index=pv_index) + + # Assert efficiency for SV + assert (np.sum(probabilistic_values["SV"].values) - predicted_value) ** 2 < 10e-7 + + +@pytest.mark.parametrize( + "index, order", + [("ELC", 1)], +) +def test_exact_elc_computer_call(index, order): + """Tests the call function for the ExactComputer.""" + n = 5 + soum = SOUM(n, n_basis_games=10, normalize=True) + exact_computer = ExactComputer(n_players=n, game_fun=soum) + interaction_values = exact_computer(index=index, order=order) + if order is None: + order = n + assert interaction_values is not None # should return something + assert interaction_values.max_order == order # order should be the same + assert interaction_values.min_order == 1 # ELC only has singleton values + assert interaction_values.index == index # index should be the same + assert interaction_values.baseline_value == 0 # ELC needs baseline_value zero + assert interaction_values.estimated is False # nothing should be estimated + assert interaction_values.values is not None # values should be computed + assert exact_computer._elc_stability_subsidy is not None # ELC should have stored subsidy + + +@pytest.mark.parametrize( + "index, order", + [ + ("SV", 1), + ("BV", 1), + ("SII", 2), + ("BII", 2), + ("CHII", 2), + ("Co-Moebius", 2), + ("SGV", 2), + ("BGV", 2), + ("CHGV", 2), + ("EGV", 2), + ("IGV", 2), + ("STII", 2), + ("k-SII", 2), + ("FSII", 2), + ("FBII", 2), + ("JointSV", 2), + ("kADD-SHAP", 2), + ("SII", None), + ], +) +def test_exact_computer_call(index, order): + """Tests the call function for the ExactComputer.""" + n = 5 + soum = SOUM(n, n_basis_games=10) + exact_computer = ExactComputer(n_players=n, game_fun=soum) + interaction_values = exact_computer(index=index, order=order) + if order is None: + order = n + assert interaction_values is not None # should return something + assert interaction_values.max_order == order # order should be the same + assert interaction_values.index == index # index should be the same + assert interaction_values.estimated is False # nothing should be estimated + assert interaction_values.values is not None # values should be computed + + +def test_basic_functions(): + """Tests the basic functions of the ExactComputer.""" + n = 5 + soum = SOUM(n, n_basis_games=10) + exact_computer = ExactComputer(n_players=n, game_fun=soum) + isinstance(repr(exact_computer), str) + isinstance(str(exact_computer), str) + + +def test_lazy_computation(): + """Tests if the lazy computation (calling without params) works.""" + n = 5 + soum = SOUM(n, n_basis_games=10) + exact_computer = ExactComputer(n_players=n, game_fun=soum) + isinstance(repr(exact_computer), str) + isinstance(str(exact_computer), str) + sv = exact_computer("SV", 1) + assert sv.index == "SV" + assert sv.max_order == 1 + + +@pytest.fixture +def original_game(): + """This fixture returns a game function with interactions.""" + + def _game_fun(X: np.ndarray): + x_as_float = np.zeros_like(X, dtype=float) + x_as_float[X] = 1 + fist_order_coefficients = [0, 0.2, -0.1, -0.9, 0] + second_order_coefficients = np.asarray( + [ + [0, 0.4, 0, 0, 0], # interaction btw 0, 1; 1, 3 and 2, 4 + [0, 0, 0, 0.3, 0], + [0, 0, 0, 0, 1], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + ] + ) + + def _interaction(arr: np.ndarray): # dtype bool + outer = np.outer(arr, arr) + interaction_array = second_order_coefficients.copy() + interaction_array[~outer] = 0 + return np.sum(interaction_array) + + value = np.sum(fist_order_coefficients * x_as_float, axis=1) + interaction_addition = np.apply_along_axis(_interaction, axis=1, arr=X) + return value + interaction_addition + + return _game_fun + + +# (fails for [CHII-2] bc empty set is nan) +@pytest.mark.parametrize( + "index, order", + [ + ("SV", 1), + ("BV", 1), + ("SII", 2), + ("BII", 2), + # ("CHII", 2), # TODO: fix this + ("Co-Moebius", 2), + ("SGV", 2), + ("BGV", 2), + ("CHGV", 2), + ("EGV", 2), + ("IGV", 2), + ("STII", 2), + ("k-SII", 2), + ("FSII", 2), + ("FBII", 2), + ("JointSV", 2), + ("kADD-SHAP", 2), + ("SII", None), + ], +) +def test_permutation_symmetry(index, order, original_game): + """This test checks that the values are invariant under permutations of the players.""" + n = 5 + if order is None: + order = n + permutation = (4, 1, 3, 2, 0) # order = 1, its own inverse + + def permutation_game(X: np.ndarray): + return original_game(X[:, permutation]) + + exact_computer = ExactComputer(n_players=n, game_fun=original_game) + interaction_values = exact_computer(index=index, order=order) + + perm_exact_computer = ExactComputer(n_players=n, game_fun=permutation_game) + perm_interaction_values = perm_exact_computer(index=index, order=order) + + # permutation does not matter + for coalition, value in interaction_values.dict_values.items(): + perm_coalition = tuple(sorted([permutation[player] for player in coalition])) + assert (value - perm_interaction_values[perm_coalition]) < 10e-7 + + +@pytest.mark.parametrize( + "index, order", + [ + ("SV", 1), + ("BV", 1), + ("SII", 2), + ("BII", 2), + ("CHII", 2), + ("Co-Moebius", 2), + ("SGV", 2), + ("BGV", 2), + ("CHGV", 2), + ("EGV", 2), + ("IGV", 2), + ("STII", 2), + ("k-SII", 2), + ("FSII", 2), + ("FBII", 2), + ("JointSV", 2), + ("kADD-SHAP", 2), + ("SII", None), + ], +) +def test_player_symmetry(index, order): + """This test checks that the players with the same attribution get the same value.""" + n = 5 + if order is None: + order = n + + def _game_fun(X: np.ndarray): + x_as_float = np.zeros_like(X, dtype=float) + x_as_float[X] = 1 + fist_order_coefficients = [0.4, 0.2, -0.1, -0.9, 0.4] + second_order_coefficients = np.asarray( + [ + [0, 0.4, 0.1, 0, 0], # interaction btw 0, 1; 0, 2; 1, 4; 2, 4 + [0, 0, 0, 0, 0.4], + [0, 0, 0, 0, 0.1], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + ] + ) + + def _interaction(arr: np.ndarray): # dtype bool + outer = np.outer(arr, arr) + interaction_array = second_order_coefficients.copy() + interaction_array[~outer] = 0 + return np.sum(interaction_array) + + value = np.sum(fist_order_coefficients * x_as_float, axis=1) + interaction_addition = np.apply_along_axis(_interaction, axis=1, arr=X) + return value + interaction_addition + + exact_computer = ExactComputer(n_players=n, game_fun=_game_fun) + interaction_values = exact_computer(index=index, order=order) + + # symmetry of players with same attribution + for coalition in powerset(range(n - 2)): + coalition_with_first = (0,) + tuple([player + 1 for player in coalition]) + coalition_with_last = tuple([player + 1 for player in coalition]) + (4,) + # print(f"{interaction_values[coalition_with_first]} for {coalition_with_first}") + assert ( + interaction_values[coalition_with_first] - interaction_values[coalition_with_last] + ) < 10e-7 + + +@pytest.mark.parametrize( + "index, order", + [ + ("SV", 1), + ("BV", 1), + ("SII", 2), + ("BII", 2), + ("CHII", 2), + ("Co-Moebius", 2), + ("STII", 2), + ("k-SII", 2), + ("FSII", 2), + ("FBII", 2), + ("kADD-SHAP", 2), + ("SII", None), + ], +) +def test_null_player(index, order): + """This test checks that the null players don't get any attribution in the values.""" + n = 5 + if order is None: + order = n + + # game with 0, 4 as null players, has interactions + def _game_fun(X: np.ndarray): + x_as_float = np.zeros_like(X, dtype=float) + x_as_float[X] = 1 + fist_order_coefficients = [0, 0.2, -0.1, -0.9, 0] + second_order_coefficients = np.asarray( + [ + [0, 0, 0, 0, 0], + [0, 0, 0, 0.3, 0], + [0, 0, 0, 0.4, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + ] + ) + + def _interaction(arr: np.ndarray): # dtype bool + outer = np.outer(arr, arr) + interaction_array = second_order_coefficients.copy() + interaction_array[~outer] = 0 + return np.sum(interaction_array) + + value = np.sum(fist_order_coefficients * x_as_float, axis=1) + interaction_addition = np.apply_along_axis(_interaction, axis=1, arr=X) + return value + interaction_addition + + exact_computer = ExactComputer(n_players=n, game_fun=_game_fun) + interaction_values = exact_computer(index=index, order=order) + + # no attribution for coalitions which include the null players. + for coalition in powerset(range(n - 2)): + coalition_with_first = (0,) + tuple([player + 1 for player in coalition]) + coalition_with_last = tuple([player + 1 for player in coalition]) + (4,) + # print(f"{interaction_values[coalition_with_first]} for {coalition_with_first}") + assert interaction_values[coalition_with_first] < 10e-7 + assert interaction_values[coalition_with_last] < 10e-7 + + +@pytest.mark.parametrize( + "index, order", + [ + ("SV", 1), + ("BV", 1), + ("SII", 2), + ("BII", 2), + ("CHII", 2), + ("Co-Moebius", 2), + ("STII", 2), + ("k-SII", 2), + ("FSII", 2), + ("FBII", 2), + ("kADD-SHAP", 2), + ("SII", None), + ], +) +def test_no_artefact_interaction(index, order): + """This test checks that the interactions are zero for the game without interactions.""" + n = 5 + if order is None: + order = n + + # game without interactions + def _game_fun(X: np.ndarray): + x_as_float = np.zeros_like(X, dtype=float) + x_as_float[X] = 1 + fist_order_coefficients = [0, 0.2, -0.1, -0.9, 0] + return np.sum(fist_order_coefficients * x_as_float, axis=1) + + exact_computer = ExactComputer(n_players=n, game_fun=_game_fun) + interaction_values = exact_computer(index=index, order=order) + + for coalition, value in interaction_values.dict_values.items(): + if len(coalition) > 1: + assert value < 10e-7 + + +@pytest.mark.parametrize( + "index, order", + [ + ("SGV", 2), + ("BGV", 2), + ("CHGV", 2), + ("EGV", 2), + ("IGV", 2), + ("JointSV", 2), + ], +) +def test_generalized_null_player(index, order): + """This test checks that the null players don't get any attribution in the generalized values""" + # implicit in above test for the rest of the indices + n = 5 + if order is None: + order = n + + # game with 0, 4 as null players, has interactions + def _game_fun(X: np.ndarray): + x_as_float = np.zeros_like(X, dtype=float) + x_as_float[X] = 1 + fist_order_coefficients = [0, 0.2, -0.1, -0.9, 0] + second_order_coefficients = np.asarray( + [ + [0, 0, 0, 0, 0], + [0, 0, 0, 0.3, 0], + [0, 0, 0, 0.4, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + ] + ) + + def _interaction(arr: np.ndarray): # dtype bool + outer = np.outer(arr, arr) + interaction_array = second_order_coefficients.copy() + interaction_array[~outer] = 0 + return np.sum(interaction_array) + + value = np.sum(fist_order_coefficients * x_as_float, axis=1) + interaction_addition = np.apply_along_axis(_interaction, axis=1, arr=X) + return value + interaction_addition + + exact_computer = ExactComputer(n_players=n, game_fun=_game_fun) + interaction_values = exact_computer(index=index, order=order) + + # no attribution for coalitions consisting of the null players. + assert interaction_values[(0, 4)] < 10e-7 + assert interaction_values[(0,)] < 10e-7 + assert interaction_values[(4,)] < 10e-7 diff --git a/tests/test_moebius_converter.py b/tests/tests_game_theory/test_moebius_converter.py similarity index 95% rename from tests/test_moebius_converter.py rename to tests/tests_game_theory/test_moebius_converter.py index 9f819028..d1fa169f 100644 --- a/tests/test_moebius_converter.py +++ b/tests/tests_game_theory/test_moebius_converter.py @@ -2,8 +2,8 @@ import numpy as np +from shapiq.game_theory.moebius_converter import MoebiusConverter from shapiq.games.benchmark.synthetic.soum import SOUM -from shapiq.moebius_converter import MoebiusConverter def test_soum_moebius_conversion(): diff --git a/tests/tests_games/test_treeshapiq_xai.py b/tests/tests_games/test_treeshapiq_xai.py index d9b6fe6d..fd1986e0 100644 --- a/tests/tests_games/test_treeshapiq_xai.py +++ b/tests/tests_games/test_treeshapiq_xai.py @@ -4,7 +4,7 @@ import pytest from shapiq.approximator.montecarlo import SHAPIQ -from shapiq.exact import ExactComputer +from shapiq.game_theory.exact import ExactComputer from shapiq.games import Game from shapiq.games.benchmark import ( AdultCensusTreeSHAPIQXAI,