Skip to content

Commit

Permalink
Merge pull request #9 from BBVA/fix/linear_combinations_show_continou…
Browse files Browse the repository at this point in the history
…us_lin_comb

Show in LinearCombinationsTest the continuous variables with linear combinations
  • Loading branch information
DaniSanchezSantolaya authored May 27, 2024
2 parents 297e165 + 44f50c5 commit 9befee6
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 1 deletion.
9 changes: 8 additions & 1 deletion mercury/robust/data_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,14 @@ def run(self, *args, **kwargs):
)

if lin_combinations is not None:
raise FailedTestError("Test failed. Linear combinations for continuous features were encountered.")
# Create message with the linear combinations found
lin_combinations = np.around(lin_combinations, decimals=5)
lin_combs_found = []
for i in range(lin_combinations.shape[0]):
lin_comb_idx = np.where(lin_combinations[i] != 0)[0]
lin_comb_cols = self.base_dataset.loc[:, numeric_feats].columns[lin_comb_idx].tolist()
lin_combs_found.append(lin_comb_cols)
raise FailedTestError(f"Test failed. Linear combinations for continuous features were encountered: {lin_combs_found}.")

individually_redundant = CategoryStruct.individually_redundant(self.base_dataset, current_schema.categorical_feats)
if len(individually_redundant) > 0:
Expand Down
16 changes: 16 additions & 0 deletions tests/test_data_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,4 +857,20 @@ def test_no_duplicated_test():
with pytest.raises(FailedTestError):
test.run()

def test_lin_combinations_cont():

df = pd.DataFrame()
df["f1"] = np.random.uniform(size=100)
df["f2"] = df["f1"] * 2
df["f3"] = np.random.uniform(size=100)
df["f4"] = df["f1"] + df["f2"]
df["f5"] = np.random.uniform(size=100)

df["f6"] = np.random.uniform(size=100)
df["f7"] = df["f6"] * 3
df["f8"] = df["f6"] * 0.1

schma_reference = DataSchema().generate(df).calculate_statistics()
linear_comb_test = LinearCombinationsTest(df, dataset_schema=schma_reference)
with pytest.raises(FailedTestError, match="'f1', 'f2'"):
linear_comb_test.run()

0 comments on commit 9befee6

Please sign in to comment.