From 0f06ac9079869280523a34cd012e9c90878739f8 Mon Sep 17 00:00:00 2001 From: Gwenneth Straub Date: Tue, 15 Oct 2024 15:31:53 -0700 Subject: [PATCH] Switched to Geometric Mean for Peptide Level Scores (#392) * switched to geometric mean for peptide level score * non zero aa score test case, changelog item --- CHANGELOG.md | 1 + casanovo/denovo/model.py | 2 +- tests/unit_tests/test_unit.py | 13 +++++++++---- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb7afa81..240185d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - The `--output` option has been split into two options, `--output_dir` and `--output_root`. - The `--validation_peak_path` is now optional when training; if `--validation_peak_path` is not set then the `train_peak_path` will also be used for validation. - The `tb_summarywriter` config option is now a boolean config option, and if set to true the TensorBoard summary will be written to a sub-directory of the output directory named `tensorboard`. +- The Casanovo model peptide level score is now reported as the geometric mean of the raw amino acid scores, rather then the arithmetic mean. ### Fixed diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py index ce8621d8..8a2b421f 100644 --- a/casanovo/denovo/model.py +++ b/casanovo/denovo/model.py @@ -1076,7 +1076,7 @@ def _aa_pep_score( peptide_score : float The peptide score. """ - peptide_score = np.mean(aa_scores) + peptide_score = np.exp(np.mean(np.log(aa_scores))) aa_scores = (aa_scores + peptide_score) / 2 if not fits_precursor_mz: peptide_score -= 1 diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py index c2c5b628..985cfb4b 100644 --- a/tests/unit_tests/test_unit.py +++ b/tests/unit_tests/test_unit.py @@ -439,12 +439,17 @@ def test_aa_pep_score(): aa_scores_raw = np.asarray([0.0, 0.5, 1.0]) aa_scores, peptide_score = _aa_pep_score(aa_scores_raw, True) - np.testing.assert_array_equal(aa_scores, np.asarray([0.25, 0.5, 0.75])) - assert peptide_score == pytest.approx(0.5) + np.testing.assert_array_equal(aa_scores, np.asarray([0.0, 0.25, 0.5])) + assert peptide_score == pytest.approx(0.0) aa_scores, peptide_score = _aa_pep_score(aa_scores_raw, False) - np.testing.assert_array_equal(aa_scores, np.asarray([0.25, 0.5, 0.75])) - assert peptide_score == pytest.approx(-0.5) + np.testing.assert_array_equal(aa_scores, np.asarray([0.0, 0.25, 0.5])) + assert peptide_score == pytest.approx(-1.0) + + aa_scores_raw = np.asarray([1.0, 0.25]) + aa_scores, peptide_score = _aa_pep_score(aa_scores_raw, True) + np.testing.assert_array_equal(aa_scores, np.asarray([0.75, 0.375])) + assert peptide_score == pytest.approx(0.5) def test_beam_search_decode():