added references to approximators

mmschlk · Jan 13, 2025 · 0cb56da · 0cb56da
1 parent f5e8c21
commit 0cb56da
Show file tree

Hide file tree

Showing 10 changed files with 194 additions and 111 deletions.
diff --git a/shapiq/approximator/marginals/stratified.py b/shapiq/approximator/marginals/stratified.py
@@ -1,6 +1,4 @@
-"""This module contains the Stratified Sampling approximation method for the Shapley value
-by Maleki et al. (2013). It estimates the Shapley values by sampling random marginal contributions
-grouped by size."""
+"""This module contains the Stratified Sampling approximation method for the Shapley values."""
 
 from typing import Callable, Optional
 
@@ -11,19 +9,23 @@
 
 
 class StratifiedSamplingSV(Approximator):
-    """The Stratified Sampling algorithm estimates the Shapley values (SV) by sampling random
+    """The Stratifield Sampling algorithm for estimating the Shapley values.
+
+    The Stratified Sampling algorithm estimates the Shapley values (SV) by sampling random
     marginal contributions for each player and each coalition size. The marginal contributions are
     grouped into strata by size. The strata are aggregated for each player after sampling to obtain
-    the final estimate. For more information, see `Maleki et al. (2009) <http://arxiv.org/abs/1306.4265>`_.
+    the final estimate. For more information, see Maleki et al. (2013)[1]_.
 
     Args:
         n: The number of players.
         random_state: The random state to use for the permutation sampling. Defaults to ``None``.
 
-    Attributes:
-        n: The number of players.
-        _grand_coalition_array: The array of players (starting from ``0`` to ``n``).
-        iteration_cost: The cost of a single iteration of the approximator.
+    See Also:
+        - :class:`~shapiq.approximator.montecarlo.svarmiq.SVARM`: The SVARM approximator
+        - :class:`~shapiq.approximator.montecarlo.svarmiq.SVARMIQ`: The SVARMIQ approximator
+
+    References:
+        .. [1] Maleki, S., Tran-Thanh, L., Hines, G., Rahwan, T., and Rogers, A, (2013). Bounding the Estimation Error of Sampling-based Shapley Value Approximation With/Without Stratifying
     """
 
     def __init__(
@@ -42,7 +44,6 @@ def approximate(
         Args:
             budget: The number of game evaluations for approximation
             game: The game function as a callable that takes a set of players and returns the value.
-            batch_size: The size of the batch. If ``None``, the batch size is set to ``1``. Defaults to ``5``.
 
         Returns:
             The estimated interaction values.
@@ -68,7 +69,6 @@ def approximate(
                     if ((size == 0 or size == self.n - 1) and used_budget < budget) or (
                         size in range(1, self.n - 1) and used_budget + 2 <= budget
                     ):
-                        marginal_con = 0
                         # if coalition size is 0 or n-1, empty or grand coalition value can be reuse
                         if size == 0:
                             coalition = np.zeros(self.n, dtype=bool)
@@ -80,11 +80,13 @@ def approximate(
                             coalition[player] = False
                             marginal_con = full_value - game(coalition)[0]
                             used_budget += 1
-                        # otherwise both coalitions that make up the marginal contribution have to eb evaluated
+                        # otherwise both coalitions that make up the marginal contribution have
+                        # to eb evaluated
                         else:
                             available_players = list(self._grand_coalition_set)
                             available_players.remove(player)
-                            # draw a subset of the player set without player of size stratum uniformly at random
+                            # draw a subset of the player set without player of size stratum
+                            # uniformly at random
                             coalition_list = list(
                                 self._rng.choice(available_players, size, replace=False)
                             )
@@ -98,7 +100,8 @@ def approximate(
                         strata[player][size] += marginal_con
                         counts[player][size] += 1
 
-        # aggregate the stratum estimates: divide each stratum sum by its sample number, sum up the means, divide by the number of valid stratum estimates
+        # aggregate the stratum estimates: divide each stratum sum by its sample number, sum up
+        # the means, divide by the number of valid stratum estimates
         strata = np.divide(strata, counts, out=strata, where=counts != 0)
         result = np.sum(strata, axis=1)
         non_zeros = np.count_nonzero(counts, axis=1)

diff --git a/shapiq/approximator/montecarlo/shapiq.py b/shapiq/approximator/montecarlo/shapiq.py
@@ -8,9 +8,12 @@
 
 
 class SHAPIQ(MonteCarlo):
-    """SHAP-IQ approximator uses standard form of Shapley interactions.
-    This is the default method from MonteCarlo approximator with no stratification.
-    For details, refer to `Fumagalli et al. (2023) <https://doi.org/10.48550/arXiv.2303.01179>`_.
+    """SHAP-IQ approximator for estimating Shapley interactions.
+
+    The SHAP-IQ estimator[1]_ is a MonteCarlo approximation algorithm that estimates Shapley
+    interactions. It is the default method from MonteCarlo approximator with no stratification.
+    For details, see the original paper by Fumagalli et al. (2023)[1]_. SHAP-IQ can be seen as
+    a generalization of the Unbiased KernelSHAP method[2]_ for any-order Shapley interactions.
 
     Args:
         n: The number of players.
@@ -24,13 +27,24 @@ class SHAPIQ(MonteCarlo):
             to ``False``.
         random_state: The random state of the estimator. Defaults to ``None``.
 
-    Attributes:
-        n: The number of players.
-        N: The set of players (starting from ``0`` to ``n - 1``).
-        max_order: The interaction order of the approximation.
-        min_order: The minimum order of the approximation. For the regression estimator, min_order
-            is equal to ``1``.
-        iteration_cost: The cost of a single iteration of the regression SII.
+    Examples:
+        >>> from shapiq.games.benchmark import DummyGame
+        >>> from shapiq import SHAPIQ
+        >>> game = DummyGame(n=5, interaction=(1, 2))
+        >>> approximator = SHAPIQ(game.n_players, max_order=2, index="k-SII")
+        >>> approximator.approximate(budget=20, game=game)
+        InteractionValues(
+            index=k-SII, order=2, estimated=True, estimation_budget=20
+        )
+
+    See Also:
+        - :class:`~shapiq.approximator.montecarlo.shapiq.UnbiasedKernelSHAP`: The Unbiased
+        KernelSHAP approximator.
+
+    References:
+        .. [1] Fumagalli, F., Muschalik, M., Kolpaczki, P., Hüllermeier, E., (2023). SHAP-IQ: Unified Approximation of any-order Shapley Interactions. In Thirty-seventh Conference on Neural Information Processing Systems. url: https://openreview.net/forum?id=IEMLNF4gK4
+
+        .. [2] Covert, I., and Lee, S.-I. (2021). Improving KernelSHAP: Practical Shapley Value Estimation via Linear Regression. In Proceedings of The 24th International Conference on Artificial Intelligence and Statistics, PMLR 130:3457-3465. url: https://proceedings.mlr.press/v130/covert21a.html
     """
 
     def __init__(
@@ -59,10 +73,10 @@ def __init__(
 class UnbiasedKernelSHAP(SHAPIQ):
     """The Unbiased KernelSHAP approximator for estimating the Shapley value (SV).
 
-    The Unbiased KernelSHAP estimator is a variant of the KernelSHAP estimator (though deeply
-    different). Unbiased KernelSHAP was proposed by `Covert and Lee (2021) <https://doi.org/10.48550/arXiv.2012.01536>`_
-    as an unbiased version of KernelSHAP. `Fumagalli et al. (2023) <https://doi.org/10.48550/arXiv.2303.01179>`_,
-    shown that Unbiased KernelSHAP is a more specific variant of the ShapIQ approximation method (Theorem 4.5).
+    The Unbiased KernelSHAP estimator[1]_ is a variant of the KernelSHAP estimator (though deeply
+    different). Unbiased KernelSHAP was proposed by Covert and Lee (2021)[1]_ as an unbiased
+    version of KernelSHAP. In Fumagalli et al. (2023)[2]_ it was shown that Unbiased KernelSHAP is
+    a more specific variant of the SHAP-IQ approximation method (Theorem 4.5).
 
     Args:
         n: The number of players.
@@ -78,9 +92,9 @@ class UnbiasedKernelSHAP(SHAPIQ):
         >>> from shapiq.approximator import UnbiasedKernelSHAP
         >>> game = DummyGame(n=5, interaction=(1, 2))
         >>> approximator = UnbiasedKernelSHAP(n=5)
-        >>> approximator.approximate(budget=100, game=game)
+        >>> approximator.approximate(budget=20, game=game)
         InteractionValues(
-            index=SV, order=1, estimated=False, estimation_budget=32,
+            index=SV, order=1, estimated=True, estimation_budget=20,
             values={
                 (0,): 0.2,
                 (1,): 0.7,
@@ -89,6 +103,14 @@ class UnbiasedKernelSHAP(SHAPIQ):
                 (4,): 0.2,
             }
         )
+
+    See Also:
+        - :class:`~shapiq.approximator.montecarlo.shapiq.SHAPIQ`: The SHAPIQ approximator.
+
+    References:
+        .. [1] Covert, I., and Lee, S.-I. (2021). Improving KernelSHAP: Practical Shapley Value Estimation via Linear Regression. In Proceedings of The 24th International Conference on Artificial Intelligence and Statistics, PMLR 130:3457-3465. url: https://proceedings.mlr.press/v130/covert21a.html
+
+        .. [2] Fumagalli, F., Muschalik, M., Kolpaczki, P., Hüllermeier, E., (2023). SHAP-IQ: Unified Approximation of any-order Shapley Interactions. In Thirty-seventh Conference on Neural Information Processing Systems. url: https://openreview.net/forum?id=IEMLNF4gK4
     """
 
     def __init__(

diff --git a/shapiq/approximator/montecarlo/svarmiq.py b/shapiq/approximator/montecarlo/svarmiq.py
@@ -6,9 +6,12 @@
 
 
 class SVARMIQ(MonteCarlo):
-    """SVARM-IQ approximator uses standard form of Shapley interactions.
-    SVARM-IQ utilizes MonteCarlo approximation with both stratification strategies.
-    For details, refer to `Kolpaczki et al. (2024) <https://doi.org/10.48550/arXiv.2401.13371>`_.
+    """The SVARM-IQ[1]_ approximator for Shapley interactions.
+
+    SVARM-IQ utilizes MonteCarlo approximation with two stratification strategies. SVARM-IQ is a
+    generalization of the SVARM algorithm[2]_ and can approximate any-order Shapley interactions
+    efficiently. For details about the algorithm see the original paper by Kolpaczki et al.
+    (2024)[1]_.
 
     Args:
         n: The number of players.
@@ -18,17 +21,13 @@ class SVARMIQ(MonteCarlo):
         pairing_trick: If ``True``, the pairing trick is applied to the sampling procedure. Defaults
             to ``False``.
         sampling_weights: An optional array of weights for the sampling procedure. The weights must
-            be of shape ``(n + 1,)`` and are used to determine the probability of sampling a coalition
-            of a certain size. Defaults to `None`.
+            be of shape ``(n + 1,)`` and are used to determine the probability of sampling a
+            coalition of a certain size. Defaults to ``None``.
 
+    References:
+        .. [1] Kolpaczki, P., Muschalik M., Fumagalli, F., Hammer, B., and Hüllermeier, E., (2024). SVARM-IQ: Efficient Approximation of Any-order Shapley Interactions through Stratification. Proceedings of The 27th International Conference on Artificial Intelligence and Statistics, PMLR 238:3520-3528. url: https://proceedings.mlr.press/v238/kolpaczki24a
 
-    Attributes:
-        n: The number of players.
-        N: The set of players (starting from ``0`` to ``n - 1``).
-        max_order: The interaction order of the approximation.
-        min_order: The minimum order of the approximation. For the regression estimator, ``min_order``
-            is equal to ``1``.
-        iteration_cost: The cost of a single iteration of the regression SII.
+        .. [2] Kolpaczki, P., Bengs, V., Muschalik, M., & Hüllermeier, E. (2024). Approximating the Shapley Value without Marginal Contributions. Proceedings of the AAAI Conference on Artificial Intelligence, 38(12), 13246-13255. https://doi.org/10.1609/aaai.v38i12.29225
     """
 
     def __init__(
@@ -55,26 +54,22 @@ def __init__(
 
 
 class SVARM(SVARMIQ):
-    """The SVARM approximator for estimating the Shapley value (SV).
+    """The SVARM[1]_ approximator for estimating the Shapley value (SV).
 
-    For details, refer to `Kolpaczki et al. (2024) <https://doi.org/10.48550/arXiv.2302.00736>`_.
+    SVARM is a MonteCarlo approximation algorithm that estimates the Shapley value. For details
+    about the algorithm see the original paper by Kolpaczki et al. (2024)[1]_.
 
     Args:
         n: The number of players.
         random_state: The random state of the estimator. Defaults to ``None``.
         pairing_trick: If `True`, the pairing trick is applied to the sampling procedure. Defaults
             to ``False``.
         sampling_weights: An optional array of weights for the sampling procedure. The weights must
-            be of shape ``(n + 1,)`` and are used to determine the probability of sampling a coalition
-            of a certain size. Defaults to `None`.
+            be of shape ``(n + 1,)`` and are used to determine the probability of sampling a
+            coalition of a certain size. Defaults to `None`.
 
-    Attributes:
-        n: The number of players.
-        N: The set of players (starting from ``0`` to ``n - 1``).
-        max_order: The interaction order of the approximation.
-        min_order: The minimum order of the approximation. For the regression estimator, ``min_order``
-            is equal to ``1``.
-        iteration_cost: The cost of a single iteration of the regression SII.
+    References:
+        .. [1] Kolpaczki, P., Bengs, V., Muschalik, M., & Hüllermeier, E. (2024). Approximating the Shapley Value without Marginal Contributions. Proceedings of the AAAI Conference on Artificial Intelligence, 38(12), 13246-13255. https://doi.org/10.1609/aaai.v38i12.29225
     """
 
     def __init__(

diff --git a/shapiq/approximator/permutation/sii.py b/shapiq/approximator/permutation/sii.py
@@ -20,13 +20,11 @@ class PermutationSamplingSII(Approximator):
             to the specified order (``False``, default).
         random_state: The random state to use for the permutation sampling. Defaults to ``None``.
 
-    Attributes:
-        n: The number of players.
-        max_order: The interaction order of the approximation.
-        top_order: Whether to approximate only the top order interactions (``True``) or all orders up
-            to the specified order (``False``).
-        min_order: The minimum order to approximate.
-        iteration_cost: The cost of a single iteration of the permutation sampling.
+    See Also:
+        - :class:`~shapiq.approximator.permutation.stii.PermutationSamplingSTII`: The Permutation
+            Sampling approximator for the STII index
+        - :class:`~shapiq.approximator.permutation.sv.PermutationSamplingSV`: The Permutation
+            Sampling approximator for the SV index
     """
 
     def __init__(

diff --git a/shapiq/approximator/permutation/stii.py b/shapiq/approximator/permutation/stii.py
@@ -20,14 +20,14 @@ class PermutationSamplingSTII(Approximator):
         random_state: The random state to use for the permutation sampling.
             Defaults to ``None``.
 
-    Attributes:
-        n: The number of players.
-        max_order: The interaction order of the approximation.
-        min_order: The minimum order to approximate.
-        iteration_cost: The cost of a single iteration of the permutation sampling.
+    See Also:
+        - :class:`~shapiq.approximator.permutation.sii.PermutationSamplingSII`: The Permutation
+            Sampling approximator for the SII index
+        - :class:`~shapiq.approximator.permutation.sv.PermutationSamplingSV`: The Permutation
+            Sampling approximator for the SV index
 
     Example:
-        >>> from shapiq.games import DummyGame
+        >>> from shapiq.games.benchmark import DummyGame
         >>> from approximator import PermutationSamplingSTII
         >>> game = DummyGame(n=5, interaction=(1, 2))
         >>> approximator = PermutationSamplingSTII(n=5, max_order=2)

diff --git a/shapiq/approximator/permutation/sv.py b/shapiq/approximator/permutation/sv.py
@@ -1,6 +1,4 @@
-"""This module contains the permutation sampling approximation method for the Shapley value (SV).
-It estimates the Shapley values by sampling random permutations of the player set
-and extracting all marginal contributions from each permutation."""
+"""This module contains the permutation sampling approximation method for the Shapley value (SV)."""
 
 from typing import Callable, Optional
 
@@ -11,19 +9,25 @@
 
 
 class PermutationSamplingSV(Approximator):
-    """The  Permutation Sampling algorithm ApproShapley estimates the Shapley values by
-    sampling random permutations of the player set and extracting all marginal contributions
-    from each permutation. For details, refer to `Castro et al. (2009) <https://doi.org/10.1016/j.cor.2008.04.004>`_.
+    """The Permutation Sampling algorithm for estimating the Shapley values.
+
+    Permutation Sampling [1]_ (also known as ApproShapley) estimates the Shapley values by drawing
+    random permutations of the player set and extracting all marginal contributions from each
+    permutation. For details, see Castro et al. (2009)[1]_.
 
     Args:
         n: The number of players.
         random_state: The random state to use for the permutation sampling. Defaults to ``None``.
 
-    Attributes:
-        n: The number of players.
-        N: The set of players (starting from ``0`` to ``n - 1``).
-        _grand_coalition_array: The array of players (starting from ``0`` to ``n``).
-        iteration_cost: The cost of a single iteration of the approximator.
+    See Also:
+        - :class:`~shapiq.approximator.permutation.sii.PermutationSamplingSII`: The Permutation
+            Sampling approximator for the SII index
+        - :class:`~shapiq.approximator.permutation.stii.PermutationSamplingSTII`: The Permutation
+            Sampling approximator for the STII index
+
+    References:
+        .. [1] Castro, J., Gómez, D., and Tejada, J. (2009) Polynomial calculation of the Shapley value based on sampling. In Computers & Operations Research 36(5), 1726-1730. doi: https://doi.org/10.1016/j.cor.2008.04.004
+
     """
 
     def __init__(

diff --git a/shapiq/approximator/regression/fsi.py b/shapiq/approximator/regression/fsi.py
@@ -8,8 +8,10 @@
 
 
 class RegressionFSII(Regression):
-    """Estimates the FSII values using KernelSHAP. The Algorithm is described
-    in `Tsai et al. (2023) <https://doi.org/10.48550/arXiv.2203.00870>`_.
+    """Estimates the FSII values using KernelSHAP.
+
+    The Faithful KernelSHAP regression is described in Tsai et al. (2023)[1]_. The method estimates
+    the Faithful Shapley interaction index (FSII).
 
     Args:
         n: The number of players.
@@ -21,13 +23,16 @@ class RegressionFSII(Regression):
             of a certain size. Defaults to ``None``.
         random_state: The random state of the estimator. Defaults to ``None``.
 
-    Attributes:
-        n: The number of players.
-        N: The set of players (starting from ``0`` to ``n - 1``).
-        max_order: The interaction order of the approximation.
-        min_order: The minimum order of the approximation. For the regression estimator, min_order
-            is equal to ``1``.
-        iteration_cost: The cost of a single iteration of the regression SII.
+    See Also:
+        - :class:`~shapiq.approximator.regression.kernelshap.KernelSHAP`: The KernelSHAP
+            approximator for estimating the Shapley values.
+        - :class:`~shapiq.approximator.regression.kernelshapiq.KernelSHAPIQ`: The KernelSHAPIQ
+            approximator for estimating the Shapley interaction index (SII) and the
+            k-Shapley interaction index (k-SII).
+
+    References:
+        .. [1] Tsai, C.-P., Yeh, C.-K., and Ravikumar, P. (2023). In Journal of Machine Learning Research 24(94), pp. 1--42. url: http://jmlr.org/papers/v24/22-0202.html
+
     """
 
     def __init__(