diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1fb01a29..7598771f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,4 +21,4 @@ repos: name: ruff language: python types: [python] - entry: ruff --fix + entry: ruff --no-cache --fix diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..a0f77968 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,134 @@ + +# ✌️ Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +`maximilian.muschalik"at"ifi.lmu.de`. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..171c8c45 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,152 @@ +# ✌️ Contributing Guidelines + +This document outlines the guidelines for contributing to the project. It should enable contributors +to understand the process for applying changes to the project and how to interact with the community. +For the code of conduct, please refer to the [CODE_OF_CONDUCT.md](./CODE_OF_CONDUCT.md). + +## πŸ—ΊοΈ What to Work On +First, we welcome contributions from everyone in every form. If you feel that something is missing +or could be improved, feel free to change it. However, to streamline the process of contributing +higher-tier changes or features to the project, we maintain an open +[roadmap](https://github.com/users/mmschlk/projects/4/views/4). There, we collect ideas and features +that we want to add to the project. If you want to work on something, please check the roadmap first +to see if the feature is already planned or if there is a similar feature that you could contribute +to. + +### πŸ™ Discussions +If you have an idea for a new feature or a change, we encourage everyone to open a discussion in the +[Discussions](https://github.com/mmschlk/shapiq/discussions/new/choose) section. +We encourage you to open a discussion so that we can align on the work to be done. It's generally a +good idea to have a quick discussion before opening a pull request that is potentially out-of-scope. + + +## πŸ“ Typical Setup: Fork, Clone, and Pull + +The typical workflow for contributing to `shapiq` is: + +1. Fork the `main` branch from the [GitHub repository](https://github.com/mmschlk/shapiq/). +2. Clone your fork locally. +3. Commit changes. +4. Push the changes to your fork. +5. Send a pull request from your fork back to the original `main` branch. + +## πŸ“¦ Development Setup + +Start by cloning the repository: + +```sh +git clone https://github.com/mmschlk/shapiq/ +``` + +Next you need a python environment with a supported version of python. We recommend using +[pyenv](https://github.com/pyenv/pyenv-installer). Once you have pyenv, you can install the latest +Python version `shapiq` supports: + +```sh +pyenv install 3.9 +``` + +Then, create a virtual environment and install the development dependencies: + +```sh +cd shapiq +python -m venv .venv +source .venv/bin/activate +pip install -e .[dev] +``` + +Finally, install the [pre-commit](https://pre-commit.com/) push hooks. This will run some code +quality checks every time you push to GitHub. + +```sh +pre-commit install --hook-type pre-push +``` + +If you want, you can optionally run `pre-commit` at any time as so: + +```sh +pre-commit run --all-files +``` + +## πŸ“ Commit Messages + +We do not enforce a strict commit message format, but we encourage you to follow good practices. +We recommend to use action-words to automatically close issues or pull requests (example: `closes #123`). +For example, start the commit message with a verb in the imperative mood, and keep the message short +and concise. For example: + +``` +add feature-xyz and closes #123 +``` + +## πŸ› οΈ Making Changes + +Now, you're ready to make changes to the code. We recommend that you check out `shapiq`'s source +code for inspiration before getting started. How you make changes is, of course, up to you. However, +we can give you some tips on how to document and test your changes. + +### πŸ“– Documenting Changes +If you are adding a new class of function, you will need to add a docstring to the class or +function. With `shapiq`, we use the [Google Style Convention](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html). +Please add a docstring in the same style. + +To build the documentation on your end and to check if your changes are documented correctly, you +need to install the documentation dependencies: + +```sh +pip install -e .[docs] +``` + +Then, you can build the documentation from the root of the repository with: + +```sh +sphinx-build docs/source docs/build +``` + +This will render the documentation in the `docs/build` directory. You can open the `index.html` file +in your browser to see the rendered documentation. + +### 🎯 Testing Changes + +We use `pytest` for running unit tests and coverage. In the near future we will add `mypy` to the +static type checking. + +#### Unit Tests + +Unit tests **absolutely need to pass**. Write unit tests for your changes. If you are adding a new +feature, you need to add tests for the new feature. If you are fixing a bug it is a good idea to add +a test that shows the bug and that your fix works. +Unit tests are located in the `tests` directory. To run the tests, you can use the following command: + +```sh +pytest +``` + +#### Coverage + +With `shapiq`, we aim to have a high test coverage (95% -100%). We aim that every pull request does +not decrease the test coverage. +We use `pytest-cov` to measure the test coverage. To run the tests with coverage, you can use the +following command: + +```sh +pytest --cov=shapiq +``` + +#### Static Type Checking and Code Quality + +Currently, we do not have static type checking in place. We use `pre-commit` to run some code quality +checks. These checks **absolutely need to pass**. You can run the checks with the following command: + +```sh +pre-commit run --all-files +``` + +In the near future we aim to use `mypy` for type checking. Once added this will also be part of the +pre-commit pipeline and hence **absolutely need to pass**. + +If you want, you can run `mypy` with the following command: + +```sh +mypy shapiq +``` \ No newline at end of file diff --git a/setup.py b/setup.py index e1d181d8..ebc6dba2 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ with io.open(os.path.join(work_directory, "README.md"), encoding="utf-8") as f: long_description = "\n" + f.read() -base_packages = ["numpy", "scipy", "pandas"] +base_packages = ["numpy", "scipy", "pandas", "tqdm"] plotting_packages = ["matplotlib", "colour", "networkx"] @@ -76,6 +76,15 @@ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ], - keywords=["python", "machine learning", "shap", "xai", "interaction"], + keywords=[ + "python", + "machine learning", + "shap", + "xai", + "interaction", + "shapley interactions", + "shapley values", + "feature interaction", + ], zip_safe=True, ) diff --git a/shapiq/__init__.py b/shapiq/__init__.py index 717989af..ed65d25e 100644 --- a/shapiq/__init__.py +++ b/shapiq/__init__.py @@ -7,10 +7,11 @@ from .approximator import ( PermutationSamplingSII, PermutationSamplingSTI, - RegressionSII, RegressionFSI, + RegressionSII, ShapIQ, ) +from .datasets import load_bike # explainer classes from .explainer import InteractionExplainer @@ -27,12 +28,10 @@ get_explicit_subsets, get_parent_array, powerset, - split_subsets_budget, safe_isinstance, + split_subsets_budget, ) -from .datasets import load_bike - __all__ = [ # version "__version__", diff --git a/shapiq/approximator/__init__.py b/shapiq/approximator/__init__.py index b4743192..ef1f236b 100644 --- a/shapiq/approximator/__init__.py +++ b/shapiq/approximator/__init__.py @@ -2,7 +2,7 @@ from .k_sii import convert_ksii_into_one_dimension, transforms_sii_to_ksii from .permutation.sii import PermutationSamplingSII from .permutation.sti import PermutationSamplingSTI -from .regression import RegressionSII, RegressionFSI, KernelSHAP +from .regression import KernelSHAP, RegressionFSI, RegressionSII from .shapiq import ShapIQ __all__ = [ @@ -12,4 +12,6 @@ "RegressionFSI", "RegressionSII", "ShapIQ", + "transforms_sii_to_ksii", + "convert_ksii_into_one_dimension", ] diff --git a/shapiq/approximator/_base.py b/shapiq/approximator/_base.py index d0708d4a..e1db3d59 100644 --- a/shapiq/approximator/_base.py +++ b/shapiq/approximator/_base.py @@ -3,7 +3,6 @@ from typing import Callable, Optional import numpy as np - from approximator._config import AVAILABLE_INDICES from approximator._interaction_values import InteractionValues from approximator._utils import _generate_interaction_lookup diff --git a/shapiq/approximator/_interaction_values.py b/shapiq/approximator/_interaction_values.py index da34b1bd..f8c95a84 100644 --- a/shapiq/approximator/_interaction_values.py +++ b/shapiq/approximator/_interaction_values.py @@ -3,9 +3,9 @@ from typing import Optional import numpy as np - -from approximator._utils import _generate_interaction_lookup from approximator._config import AVAILABLE_INDICES +from approximator._utils import _generate_interaction_lookup + from shapiq.utils import powerset diff --git a/shapiq/approximator/k_sii.py b/shapiq/approximator/k_sii.py index c6ca65f8..73c4f48e 100644 --- a/shapiq/approximator/k_sii.py +++ b/shapiq/approximator/k_sii.py @@ -1,13 +1,12 @@ """This module provides the functionality to transform SII values into k-SII values.""" -from typing import Union, Optional +from typing import Optional, Union import numpy as np -from scipy.special import bernoulli - from approximator._base import Approximator +from approximator._interaction_values import InteractionValues from approximator._utils import _generate_interaction_lookup +from scipy.special import bernoulli -from approximator._interaction_values import InteractionValues from shapiq.utils import powerset diff --git a/shapiq/approximator/permutation/sii.py b/shapiq/approximator/permutation/sii.py index 2e7933dd..dcbc9ed3 100644 --- a/shapiq/approximator/permutation/sii.py +++ b/shapiq/approximator/permutation/sii.py @@ -3,8 +3,8 @@ import numpy as np from approximator._base import Approximator -from approximator.k_sii import KShapleyMixin from approximator._interaction_values import InteractionValues +from approximator.k_sii import KShapleyMixin from utils import powerset diff --git a/shapiq/approximator/regression/__init__.py b/shapiq/approximator/regression/__init__.py index 0693191b..1038f335 100644 --- a/shapiq/approximator/regression/__init__.py +++ b/shapiq/approximator/regression/__init__.py @@ -1,7 +1,7 @@ """This module contains the regression-based approximators to estimate Shapley interaction values. """ -from .sii import RegressionSII from .fsi import RegressionFSI +from .sii import RegressionSII from .sv import KernelSHAP __all__ = ["RegressionSII", "RegressionFSI", "KernelSHAP"] diff --git a/shapiq/approximator/regression/_base.py b/shapiq/approximator/regression/_base.py index 33e9d849..823a56f3 100644 --- a/shapiq/approximator/regression/_base.py +++ b/shapiq/approximator/regression/_base.py @@ -3,11 +3,10 @@ import numpy as np from approximator._base import Approximator -from approximator.sampling import ShapleySamplingMixin from approximator._interaction_values import InteractionValues -from scipy.special import binom, bernoulli - -from utils import powerset, get_explicit_subsets +from approximator.sampling import ShapleySamplingMixin +from scipy.special import bernoulli, binom +from utils import powerset AVAILABLE_INDICES_REGRESSION = ["FSI", "SII", "SV"] @@ -214,8 +213,8 @@ def _get_bernoulli_weight(self, intersection_size: int, r_prime: int) -> float: The Bernoulli weights. """ weight = 0 - for l in range(1, intersection_size + 1): - weight += binom(intersection_size, l) * self._bernoulli_numbers[r_prime - l] + for size in range(1, intersection_size + 1): + weight += binom(intersection_size, size) * self._bernoulli_numbers[r_prime - size] return weight def _get_bernoulli_weights( diff --git a/shapiq/approximator/regression/fsi.py b/shapiq/approximator/regression/fsi.py index 01035838..0497efc5 100644 --- a/shapiq/approximator/regression/fsi.py +++ b/shapiq/approximator/regression/fsi.py @@ -1,8 +1,8 @@ """Regression with Faithful Shapley Interaction (FSI) index approximation.""" from typing import Optional -from ._base import Regression from ..k_sii import KShapleyMixin +from ._base import Regression class RegressionFSI(Regression, KShapleyMixin): diff --git a/shapiq/approximator/regression/sii.py b/shapiq/approximator/regression/sii.py index 39b248e3..d371c053 100644 --- a/shapiq/approximator/regression/sii.py +++ b/shapiq/approximator/regression/sii.py @@ -1,8 +1,8 @@ """Regression with Shapley interaction index (SII) approximation.""" from typing import Optional -from ._base import Regression from ..k_sii import KShapleyMixin +from ._base import Regression class RegressionSII(Regression, KShapleyMixin): diff --git a/shapiq/approximator/regression/sv.py b/shapiq/approximator/regression/sv.py index d1f9beaf..0360040a 100644 --- a/shapiq/approximator/regression/sv.py +++ b/shapiq/approximator/regression/sv.py @@ -1,6 +1,5 @@ -"""This module contains the KernelSHAP regression approximator for estimating the SV.""" - -"""Regression with Faithful Shapley Interaction (FSI) index approximation.""" +"""This module contains the KernelSHAP regression approximator for estimating the SV. +Regression with Faithful Shapley Interaction (FSI) index approximation.""" from typing import Optional from ._base import Regression diff --git a/shapiq/approximator/sampling.py b/shapiq/approximator/sampling.py index 34320e82..1b961246 100644 --- a/shapiq/approximator/sampling.py +++ b/shapiq/approximator/sampling.py @@ -3,10 +3,10 @@ from typing import Union import numpy as np +from approximator._base import Approximator from scipy.special import binom -from approximator._base import Approximator -from shapiq.utils import split_subsets_budget, get_explicit_subsets +from shapiq.utils import get_explicit_subsets, split_subsets_budget class ShapleySamplingMixin(ABC): diff --git a/shapiq/approximator/shapiq/shapiq.py b/shapiq/approximator/shapiq/shapiq.py index ff868a7b..3514f102 100644 --- a/shapiq/approximator/shapiq/shapiq.py +++ b/shapiq/approximator/shapiq/shapiq.py @@ -4,9 +4,9 @@ import numpy as np from approximator._base import Approximator -from approximator.sampling import ShapleySamplingMixin -from approximator.k_sii import KShapleyMixin from approximator._interaction_values import InteractionValues +from approximator.k_sii import KShapleyMixin +from approximator.sampling import ShapleySamplingMixin from utils import powerset AVAILABLE_INDICES_SHAPIQ = {"SII", "STI", "FSI", "k-SII"} diff --git a/shapiq/datasets/_all.py b/shapiq/datasets/_all.py index 9053a107..ede37855 100644 --- a/shapiq/datasets/_all.py +++ b/shapiq/datasets/_all.py @@ -1,7 +1,7 @@ """This module contains functions to load datasets.""" import os -import pandas as pd +import pandas as pd GITHUB_DATA_URL = "https://github.com/mmschlk/shapiq/raw/main/data/" diff --git a/shapiq/explainer/_base.py b/shapiq/explainer/_base.py index 29afdf5c..5c342c77 100644 --- a/shapiq/explainer/_base.py +++ b/shapiq/explainer/_base.py @@ -3,7 +3,6 @@ from typing import Callable import numpy as np - from approximator._interaction_values import InteractionValues from explainer.imputer.marginal_imputer import MarginalImputer diff --git a/shapiq/explainer/imputer/marginal_imputer.py b/shapiq/explainer/imputer/marginal_imputer.py index 5403db65..505d2c4c 100644 --- a/shapiq/explainer/imputer/marginal_imputer.py +++ b/shapiq/explainer/imputer/marginal_imputer.py @@ -2,7 +2,6 @@ from typing import Callable, Optional import numpy as np - from explainer.imputer._base import Imputer diff --git a/shapiq/explainer/interaction.py b/shapiq/explainer/interaction.py index 205e80af..e055fcf1 100644 --- a/shapiq/explainer/interaction.py +++ b/shapiq/explainer/interaction.py @@ -1,20 +1,19 @@ """This module contains the interaction explainer for the shapiq package. This is the main interface for users of the shapiq package.""" -from typing import Callable, Union, Optional +from typing import Callable, Optional, Union import numpy as np - -from approximator._base import Approximator -from approximator._interaction_values import InteractionValues -from ._base import Explainer from approximator import ( - RegressionSII, - RegressionFSI, PermutationSamplingSII, PermutationSamplingSTI, + RegressionFSI, + RegressionSII, ShapIQ, ) +from approximator._base import Approximator +from approximator._interaction_values import InteractionValues +from ._base import Explainer __all__ = ["InteractionExplainer"] diff --git a/shapiq/explainer/tree.py b/shapiq/explainer/tree.py index 977e732d..c10638a7 100644 --- a/shapiq/explainer/tree.py +++ b/shapiq/explainer/tree.py @@ -1,11 +1,9 @@ """This module contains the TreeSHAP-IQ explainer for computing exact any order Shapley interactions for trees and tree ensembles.""" import numpy as np - from approximator._interaction_values import InteractionValues from explainer._base import Explainer - __all__ = ["TreeExplainer"] diff --git a/shapiq/plot/network.py b/shapiq/plot/network.py index 5a8d9a7c..b601a965 100644 --- a/shapiq/plot/network.py +++ b/shapiq/plot/network.py @@ -5,13 +5,12 @@ import networkx as nx import numpy as np +from approximator._interaction_values import InteractionValues from matplotlib import pyplot as plt from PIL import Image - -from approximator._interaction_values import InteractionValues from utils import powerset -from ._config import BLUE, RED, NEUTRAL, LINES +from ._config import BLUE, LINES, NEUTRAL, RED __all__ = [ "network_plot", diff --git a/shapiq/plot/stacked_bar.py b/shapiq/plot/stacked_bar.py index b727f528..535c4a91 100644 --- a/shapiq/plot/stacked_bar.py +++ b/shapiq/plot/stacked_bar.py @@ -2,7 +2,7 @@ __all__ = ["stacked_bar_plot"] from copy import deepcopy -from typing import Union, Optional +from typing import Optional, Union import numpy as np from matplotlib import pyplot as plt diff --git a/shapiq/utils/__init__.py b/shapiq/utils/__init__.py index 01f1ec3d..9f59657c 100644 --- a/shapiq/utils/__init__.py +++ b/shapiq/utils/__init__.py @@ -1,8 +1,8 @@ """This module contains utility functions for the shapiq package.""" +from .modules import safe_isinstance from .sets import get_explicit_subsets, pair_subset_sizes, powerset, split_subsets_budget from .tree import get_conditional_sample_weights, get_parent_array -from .modules import safe_isinstance __all__ = [ # sets