From 1fcec6a6130b705acb47079e4b7296e3a95e19db Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <bittremieux@users.noreply.github.com>
Date: Tue, 14 May 2024 20:33:47 +0200
Subject: [PATCH] Prepare release v4.2.0 (#331)

* Remove `train_from_scratch` config option (#275)

Instead of having to specify `train_from_scratch` in the config file, training will proceed from an existing model weights file if this is given as an argument to `casanovo train`.

Fixes #263.

* Stabilize torch.topk() behavior (#290)

* Add epsilon to index zero

* Fix typo

* Use base PyTorch for repeating along the vocabulary size

* Combine masking steps

* Lint with updated black version

* Lint test files

* Add topk unit test

* Fix lint

* Add fixme comment for future

* Update changelog

* Generate new screengrabs with rich-codex

---------

Co-authored-by: Wout Bittremieux <wout@bittremieux.be>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>

* Rename max_iters to cosine_schedule_period_iters (#300)

* Rename max_iters to cosine_schedule_period_iters

* Add deprecated config option unit test

* Fix missed rename

* Proper linting

* Remove unnecessary logging

* Test that checkpoints with deprecated config options can be loaded

* Minor change

* Add test for fine-tuning with deprecated config options

* Remove deprecated hyperparameters during model loading

* Include deprecated hyperparameter warning

* Test whether the warning is issued

* Verify that the deprecated option is removed

* Fix comments

* Avoid defining deprecated options twice

* Remap previous renamed config option `every_n_train_steps`

* Update changelog

---------

Co-authored-by: melihyilmaz <yilmazmelih97@gmail.com>

* Add FAQ entry about antibody sequencing

* Don't crash when multiple beams have identical peptide scores (#306)

* Test different beams with identical scores

* Randomly break ties for beams with identical peptide score

* Update changelog

* Don't remove unit test

* Allow csv to handle all newlines (#316)

* Add 9-species model weights link to FAQ (#303)

* Add model weights link

* Generate new screengrabs with rich-codex

* Clarify that these weights should only be used for benchmarking

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Wout Bittremieux <wout@bittremieux.be>

* Add FAQ entry about antibody sequencing (#304)

* Add FAQ entry about antibody sequencing

* Generate new screengrabs with rich-codex

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Melih Yilmaz <32707537+melihyilmaz@users.noreply.github.com>

* Allow csv to handle all newlines

The `csv` module tries to handle newlines itself. On Windows, this leads to line endings of `\r\r\n` instead of `\r\n`.

Setting `newline=''` produces the intended output on both platforms.

* Update CHANGELOG.md

* Fix linting issue

* Delete docs/images/help.svg

---------

Co-authored-by: Melih Yilmaz <32707537+melihyilmaz@users.noreply.github.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Wout Bittremieux <wout@bittremieux.be>
Co-authored-by: William Stafford Noble <wnoble@uw.edu>
Co-authored-by: Wout Bittremieux <bittremieux@users.noreply.github.com>

* Don't test on macOS versions with MPS (#327)

* Prepare for release v4.2.0

* Update CHANGELOG.md (#332)

---------

Co-authored-by: Melih Yilmaz <32707537+melihyilmaz@users.noreply.github.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: melihyilmaz <yilmazmelih97@gmail.com>
Co-authored-by: wsnoble <wnoble@uw.edu>
Co-authored-by: Joshua Klein <mobiusklein@gmail.com>
---
 .github/workflows/tests.yml     |  2 +-
 CHANGELOG.md                    | 18 ++++++-
 casanovo/config.py              | 20 ++++++-
 casanovo/config.yaml            | 89 +++++++++++++++---------------
 casanovo/data/ms_io.py          |  2 +-
 casanovo/denovo/model.py        | 96 +++++++++++++++++++++------------
 casanovo/denovo/model_runner.py | 12 ++---
 tests/conftest.py               |  2 +-
 tests/unit_tests/test_config.py | 12 +++++
 tests/unit_tests/test_runner.py | 32 ++++++++++-
 tests/unit_tests/test_unit.py   | 43 ++++++++++++---
 11 files changed, 231 insertions(+), 97 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 53483060..7e79b5a3 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -18,7 +18,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        os: [ubuntu-latest, windows-latest, macos-13]
 
     steps:
     - uses: actions/checkout@v4
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a9f5f939..20f7565d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ## [Unreleased]
 
+## [4.2.0] - 2024-05-14
+
+### Added
+
+- A deprecation warning will be issued when deprecated config options are used in the config file or in the model weights file.
+
+### Changed
+
+- Config option `max_iters` has been renamed to `cosine_schedule_period_iters` to better reflect that it controls the number of iterations for the cosine half period of the learning rate.
+
+### Fixed
+
+- Fix beam search caching failure when multiple beams have an equal predicted peptide score by breaking ties randomly.
+- The mzTab output file now has proper line endings regardless of platform, fixing the extra `\r` found when run on Windows.
+
 ## [4.1.0] - 2024-02-16
 
 ### Changed
@@ -233,7 +248,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 - Initial Casanovo version.
 
-[Unreleased]: https://github.com/Noble-Lab/casanovo/compare/v4.1.0...HEAD
+[Unreleased]: https://github.com/Noble-Lab/casanovo/compare/v4.2.0...HEAD
+[4.2.0]: https://github.com/Noble-Lab/casanovo/compare/v4.1.0...v4.2.0
 [4.1.0]: https://github.com/Noble-Lab/casanovo/compare/v4.0.1...v4.1.0
 [4.0.1]: https://github.com/Noble-Lab/casanovo/compare/v4.0.0...v4.0.1
 [4.0.0]: https://github.com/Noble-Lab/casanovo/compare/v3.5.0...v4.0.0
diff --git a/casanovo/config.py b/casanovo/config.py
index 817766ac..792da35a 100644
--- a/casanovo/config.py
+++ b/casanovo/config.py
@@ -2,6 +2,7 @@
 
 import logging
 import shutil
+import warnings
 from pathlib import Path
 from typing import Optional, Dict, Callable, Tuple, Union
 
@@ -12,6 +13,14 @@
 logger = logging.getLogger("casanovo")
 
 
+# FIXME: This contains deprecated config options to be removed in the next major
+#  version update.
+_config_deprecated = dict(
+    every_n_train_steps="val_check_interval",
+    max_iters="cosine_schedule_period_iters",
+)
+
+
 class Config:
     """The Casanovo configuration options.
 
@@ -56,7 +65,7 @@ class Config:
         tb_summarywriter=str,
         train_label_smoothing=float,
         warmup_iters=int,
-        max_iters=int,
+        cosine_schedule_period_iters=int,
         learning_rate=float,
         weight_decay=float,
         train_batch_size=int,
@@ -84,6 +93,15 @@ def __init__(self, config_file: Optional[str] = None):
         else:
             with Path(config_file).open() as f_in:
                 self._user_config = yaml.safe_load(f_in)
+                # Remap deprecated config entries.
+                for old, new in _config_deprecated.items():
+                    if old in self._user_config:
+                        self._user_config[new] = self._user_config.pop(old)
+                        warnings.warn(
+                            f"Deprecated config option '{old}' remapped to "
+                            f"'{new}'",
+                            DeprecationWarning,
+                        )
                 # Check for missing entries in config file.
                 config_missing = self._params.keys() - self._user_config.keys()
                 if len(config_missing) > 0:
diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index 24bf4623..c7186ff7 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -4,30 +4,29 @@
 ###
 
 ###
-# The following parameters can be modified when running inference or
-# when fine-tuning an existing Casanovo model.
+# The following parameters can be modified when running inference or when
+# fine-tuning an existing Casanovo model.
 ###
 
-# Max absolute difference allowed with respect to observed precursor m/z
+# Max absolute difference allowed with respect to observed precursor m/z.
 # Predictions outside the tolerance range are assigned a negative peptide score.
 precursor_mass_tol: 50  # ppm
-# Isotopes to consider when comparing predicted and observed precursor m/z's
+# Isotopes to consider when comparing predicted and observed precursor m/z's.
 isotope_error_range: [0, 1]
-# The minimum length of predicted peptides
+# The minimum length of predicted peptides.
 min_peptide_len: 6
-# Number of spectra in one inference batch
+# Number of spectra in one inference batch.
 predict_batch_size: 1024
-# Number of beams used in beam search
+# Number of beams used in beam search.
 n_beams: 1
-# Number of PSMs for each spectrum
+# Number of PSMs for each spectrum.
 top_match: 1
 # The hardware accelerator to use. Must be one of:
-# "cpu", "gpu", "tpu", "ipu", "hpu", "mps", or "auto"
+# "cpu", "gpu", "tpu", "ipu", "hpu", "mps", or "auto".
 accelerator: "auto"
-# The devices to use. Can be set to a positive number int,
-# or the value -1 to indicate all available devices should be used,
-# If left empty, the appropriate number will be automatically
-# selected for automatic selected on the chosen accelerator.
+# The devices to use. Can be set to a positive number int, or the value -1 to
+# indicate all available devices should be used. If left empty, the appropriate
+# number will be automatically selected for based on the chosen accelerator.
 devices:
 
 ###
@@ -35,72 +34,72 @@ devices:
 # Casanovo model from scratch.
 ###
 
-# Random seed to ensure reproducible results
+# Random seed to ensure reproducible results.
 random_seed: 454
 
 # OUTPUT OPTIONS
-# Logging frequency in training steps
+# Logging frequency in training steps.
 n_log: 1
-# Tensorboard directory to use for keeping track of training metrics
+# Tensorboard directory to use for keeping track of training metrics.
 tb_summarywriter:
-# Save the top k model checkpoints during training. -1 saves all, and
-# leaving this field empty saves none.
+# Save the top k model checkpoints during training. -1 saves all, and leaving
+# this field empty saves none.
 save_top_k: 5
-# Path to saved checkpoints
+# Path to saved checkpoints.
 model_save_folder_path: ""
-# Model validation and checkpointing frequency in training steps
+# Model validation and checkpointing frequency in training steps.
 val_check_interval: 50_000
 
 # SPECTRUM PROCESSING OPTIONS
-# Number of the most intense peaks to retain, any remaining peaks are discarded
+# Number of the most intense peaks to retain, any remaining peaks are discarded.
 n_peaks: 150
-# Min peak m/z allowed, peaks with smaller m/z are discarded
+# Min peak m/z allowed, peaks with smaller m/z are discarded.
 min_mz: 50.0
-# Max peak m/z allowed, peaks with larger m/z are discarded
+# Max peak m/z allowed, peaks with larger m/z are discarded.
 max_mz: 2500.0
-# Min peak intensity allowed, less intense peaks are discarded
+# Min peak intensity allowed, less intense peaks are discarded.
 min_intensity: 0.01
-# Max absolute m/z difference allowed when removing the precursor peak
+# Max absolute m/z difference allowed when removing the precursor peak.
 remove_precursor_tol: 2.0  # Da
-# Max precursor charge allowed, spectra with larger charge are skipped
+# Max precursor charge allowed, spectra with larger charge are skipped.
 max_charge: 10
 
 # MODEL ARCHITECTURE OPTIONS
-# Dimensionality of latent representations, i.e. peak embeddings
+# Dimensionality of latent representations, i.e. peak embeddings.
 dim_model: 512
-# Number of attention heads
+# Number of attention heads.
 n_head: 8
-# Dimensionality of fully connected layers
+# Dimensionality of fully connected layers.
 dim_feedforward: 1024
-# Number of transformer layers in spectrum encoder and peptide decoder
+# Number of transformer layers in spectrum encoder and peptide decoder.
 n_layers: 9
-# Dropout rate for model weights
+# Dropout rate for model weights.
 dropout: 0.0
-# Number of dimensions to use for encoding peak intensity
-# Projected up to ``dim_model`` by default and summed with the peak m/z encoding
+# Number of dimensions to use for encoding peak intensity.
+# Projected up to `dim_model` by default and summed with the peak m/z encoding.
 dim_intensity:
-# Max decoded peptide length
+# Max decoded peptide length.
 max_length: 100
-# Number of warmup iterations for learning rate scheduler
+# The number of iterations for the linear warm-up of the learning rate.
 warmup_iters: 100_000
-# Max number of iterations for learning rate scheduler
-max_iters: 600_000
-# Learning rate for weight updates during training
+# The number of iterations for the cosine half period of the learning rate.
+cosine_schedule_period_iters: 600_000
+# Learning rate for weight updates during training.
 learning_rate: 5e-4
-# Regularization term for weight updates
+# Regularization term for weight updates.
 weight_decay: 1e-5
-# Amount of label smoothing when computing the training loss
+# Amount of label smoothing when computing the training loss.
 train_label_smoothing: 0.01
 
 # TRAINING/INFERENCE OPTIONS
-# Number of spectra in one training batch
+# Number of spectra in one training batch.
 train_batch_size: 32
-# Max number of training epochs
+# Max number of training epochs.
 max_epochs: 30
-# Number of validation steps to run before training begins
+# Number of validation steps to run before training begins.
 num_sanity_val_steps: 0
-# Calculate peptide and amino acid precision during training. this
-# is expensive, so we recommend against it.
+# Calculate peptide and amino acid precision during training.
+# This is expensive, so we recommend against it.
 calculate_precision: False
 
 # AMINO ACID AND MODIFICATION VOCABULARY
diff --git a/casanovo/data/ms_io.py b/casanovo/data/ms_io.py
index 7be6ea8c..de69592e 100644
--- a/casanovo/data/ms_io.py
+++ b/casanovo/data/ms_io.py
@@ -147,7 +147,7 @@ def save(self) -> None:
         """
         Export the spectrum identifications to the mzTab file.
         """
-        with open(self.filename, "w") as f:
+        with open(self.filename, "w", newline="") as f:
             writer = csv.writer(f, delimiter="\t", lineterminator=os.linesep)
             # Write metadata.
             for row in self.metadata:
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 9ea9cb23..77df6df5 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -3,6 +3,7 @@
 import collections
 import heapq
 import logging
+import warnings
 from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
 
 import depthcharge.masses
@@ -14,6 +15,7 @@
 from depthcharge.components import ModelMixin, PeptideDecoder, SpectrumEncoder
 
 from . import evaluate
+from .. import config
 from ..data import ms_io
 
 logger = logging.getLogger("casanovo")
@@ -46,7 +48,7 @@ class Spec2Pep(pl.LightningModule, ModelMixin):
         linear layer, then summed with the m/z encoding for each peak.
     max_length : int
         The maximum peptide length to decode.
-    residues: Union[Dict[str, float], str]
+    residues : Union[Dict[str, float], str]
         The amino acid dictionary and their masses. By default ("canonical) this
         is only the 20 canonical amino acids, with cysteine carbamidomethylated.
         If "massivekb", this dictionary will include the modifications found in
@@ -65,24 +67,24 @@ class Spec2Pep(pl.LightningModule, ModelMixin):
         < precursor_mass_tol`
     min_peptide_len : int
         The minimum length of predicted peptides.
-    n_beams: int
+    n_beams : int
         Number of beams used during beam search decoding.
-    top_match: int
+    top_match : int
         Number of PSMs to return for each spectrum.
     n_log : int
         The number of epochs to wait between logging messages.
-    tb_summarywriter: Optional[str]
+    tb_summarywriter : Optional[str]
         Folder path to record performance metrics during training. If ``None``,
         don't use a ``SummaryWriter``.
-    train_label_smoothing: float
+    train_label_smoothing : float
         Smoothing factor when calculating the training loss.
-    warmup_iters: int
-        The number of warm up iterations for the learning rate scheduler.
-    max_iters: int
-        The total number of iterations for the learning rate scheduler.
-    out_writer: Optional[str]
+    warmup_iters : int
+        The number of iterations for the linear warm-up of the learning rate.
+    cosine_schedule_period_iters : int
+        The number of iterations for the cosine half period of the learning rate.
+    out_writer : Optional[str]
         The output writer for the prediction results.
-    calculate_precision: bool
+    calculate_precision : bool
         Calculate the validation set precision during training.
         This is expensive.
     **kwargs : Dict
@@ -111,7 +113,7 @@ def __init__(
         ] = None,
         train_label_smoothing: float = 0.01,
         warmup_iters: int = 100_000,
-        max_iters: int = 600_000,
+        cosine_schedule_period_iters: int = 600_000,
         out_writer: Optional[ms_io.MztabWriter] = None,
         calculate_precision: bool = False,
         **kwargs: Dict,
@@ -144,7 +146,15 @@ def __init__(
         self.val_celoss = torch.nn.CrossEntropyLoss(ignore_index=0)
         # Optimizer settings.
         self.warmup_iters = warmup_iters
-        self.max_iters = max_iters
+        self.cosine_schedule_period_iters = cosine_schedule_period_iters
+        # `kwargs` will contain additional arguments as well as unrecognized
+        # arguments, including deprecated ones. Remove the deprecated ones.
+        for k in config._config_deprecated:
+            kwargs.pop(k, None)
+            warnings.warn(
+                f"Deprecated hyperparameter '{k}' removed from the model.",
+                DeprecationWarning,
+            )
         self.opt_kwargs = kwargs
 
         # Data properties.
@@ -472,7 +482,9 @@ def _cache_finished_beams(
         step: int,
         beams_to_cache: torch.Tensor,
         beam_fits_precursor: torch.Tensor,
-        pred_cache: Dict[int, List[Tuple[float, np.ndarray, torch.Tensor]]],
+        pred_cache: Dict[
+            int, List[Tuple[float, float, np.ndarray, torch.Tensor]]
+        ],
     ):
         """
         Cache terminated beams.
@@ -493,11 +505,13 @@ def _cache_finished_beams(
         beam_fits_precursor: torch.Tensor of shape (n_spectra * n_beams)
             Boolean tensor indicating whether the beams are within the
             precursor m/z tolerance.
-        pred_cache : Dict[int, List[Tuple[float, np.ndarray, torch.Tensor]]]
+        pred_cache : Dict[
+                int, List[Tuple[float, float, np.ndarray, torch.Tensor]]
+        ]
             Priority queue with finished beams for each spectrum, ordered by
             peptide score. For each finished beam, a tuple with the (negated)
-            peptide score, amino acid-level scores, and the predicted tokens is
-            stored.
+            peptide score, a random tie-breaking float, the amino acid-level
+            scores, and the predicted tokens is stored.
         """
         for i in range(len(beams_to_cache)):
             if not beams_to_cache[i]:
@@ -538,7 +552,12 @@ def _cache_finished_beams(
                 heapadd = heapq.heappushpop
             heapadd(
                 pred_cache[spec_idx],
-                (peptide_score, aa_scores, torch.clone(pred_peptide)),
+                (
+                    peptide_score,
+                    np.random.random_sample(),
+                    aa_scores,
+                    torch.clone(pred_peptide),
+                ),
             )
 
     def _get_topk_beams(
@@ -636,17 +655,22 @@ def _get_topk_beams(
 
     def _get_top_peptide(
         self,
-        pred_cache: Dict[int, List[Tuple[float, np.ndarray, torch.Tensor]]],
+        pred_cache: Dict[
+            int, List[Tuple[float, float, np.ndarray, torch.Tensor]]
+        ],
     ) -> Iterable[List[Tuple[float, np.ndarray, str]]]:
         """
         Return the peptide with the highest confidence score for each spectrum.
 
         Parameters
         ----------
-        pred_cache : Dict[int, List[Tuple[float, np.ndarray, torch.Tensor]]]
+        pred_cache : Dict[
+                int, List[Tuple[float, float, np.ndarray, torch.Tensor]]
+        ]
             Priority queue with finished beams for each spectrum, ordered by
             peptide score. For each finished beam, a tuple with the peptide
-            score, amino acid-level scores, and the predicted tokens is stored.
+            score, a random tie-breaking float, the amino acid-level scores,
+            and the predicted tokens is stored.
 
         Returns
         -------
@@ -663,7 +687,7 @@ def _get_top_peptide(
                         aa_scores,
                         "".join(self.decoder.detokenize(pred_tokens)),
                     )
-                    for pep_score, aa_scores, pred_tokens in heapq.nlargest(
+                    for pep_score, _, aa_scores, pred_tokens in heapq.nlargest(
                         self.top_match, peptides
                     )
                 ]
@@ -960,29 +984,33 @@ def configure_optimizers(
         optimizer = torch.optim.Adam(self.parameters(), **self.opt_kwargs)
         # Apply learning rate scheduler per step.
         lr_scheduler = CosineWarmupScheduler(
-            optimizer, warmup=self.warmup_iters, max_iters=self.max_iters
+            optimizer, self.warmup_iters, self.cosine_schedule_period_iters
         )
         return [optimizer], {"scheduler": lr_scheduler, "interval": "step"}
 
 
 class CosineWarmupScheduler(torch.optim.lr_scheduler._LRScheduler):
     """
-    Learning rate scheduler with linear warm up followed by cosine shaped decay.
+    Learning rate scheduler with linear warm-up followed by cosine shaped decay.
 
     Parameters
     ----------
     optimizer : torch.optim.Optimizer
         Optimizer object.
-    warmup : int
-        The number of warm up iterations.
-    max_iters : torch.optim
-        The total number of iterations.
+    warmup_iters : int
+        The number of iterations for the linear warm-up of the learning rate.
+    cosine_schedule_period_iters : int
+        The number of iterations for the cosine half period of the learning rate.
     """
 
     def __init__(
-        self, optimizer: torch.optim.Optimizer, warmup: int, max_iters: int
+        self,
+        optimizer: torch.optim.Optimizer,
+        warmup_iters: int,
+        cosine_schedule_period_iters: int,
     ):
-        self.warmup, self.max_iters = warmup, max_iters
+        self.warmup_iters = warmup_iters
+        self.cosine_schedule_period_iters = cosine_schedule_period_iters
         super().__init__(optimizer)
 
     def get_lr(self):
@@ -990,9 +1018,11 @@ def get_lr(self):
         return [base_lr * lr_factor for base_lr in self.base_lrs]
 
     def get_lr_factor(self, epoch):
-        lr_factor = 0.5 * (1 + np.cos(np.pi * epoch / self.max_iters))
-        if epoch <= self.warmup:
-            lr_factor *= epoch / self.warmup
+        lr_factor = 0.5 * (
+            1 + np.cos(np.pi * epoch / self.cosine_schedule_period_iters)
+        )
+        if epoch <= self.warmup_iters:
+            lr_factor *= epoch / self.warmup_iters
         return lr_factor
 
 
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 3253419a..4bd2165e 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -204,8 +204,8 @@ def initialize_model(self, train: bool) -> None:
         Parameters
         ----------
         train : bool
-            Determines whether to set the model up for model training
-            or evaluation / inference.
+            Determines whether to set the model up for model training or
+            evaluation / inference.
         """
         model_params = dict(
             dim_model=self.config.dim_model,
@@ -226,14 +226,14 @@ def initialize_model(self, train: bool) -> None:
             tb_summarywriter=self.config.tb_summarywriter,
             train_label_smoothing=self.config.train_label_smoothing,
             warmup_iters=self.config.warmup_iters,
-            max_iters=self.config.max_iters,
+            cosine_schedule_period_iters=self.config.cosine_schedule_period_iters,
             lr=self.config.learning_rate,
             weight_decay=self.config.weight_decay,
             out_writer=self.writer,
             calculate_precision=self.config.calculate_precision,
         )
 
-        # Reconfigurable non-architecture related parameters for a loaded model
+        # Reconfigurable non-architecture related parameters for a loaded model.
         loaded_model_params = dict(
             max_length=self.config.max_length,
             precursor_mass_tol=self.config.precursor_mass_tol,
@@ -245,7 +245,7 @@ def initialize_model(self, train: bool) -> None:
             tb_summarywriter=self.config.tb_summarywriter,
             train_label_smoothing=self.config.train_label_smoothing,
             warmup_iters=self.config.warmup_iters,
-            max_iters=self.config.max_iters,
+            cosine_schedule_period_iters=self.config.cosine_schedule_period_iters,
             lr=self.config.learning_rate,
             weight_decay=self.config.weight_decay,
             out_writer=self.writer,
@@ -300,7 +300,7 @@ def initialize_model(self, train: bool) -> None:
             except RuntimeError:
                 raise RuntimeError(
                     "Weights file incompatible with the current version of "
-                    "Casanovo. "
+                    "Casanovo."
                 )
 
     def initialize_data_module(
diff --git a/tests/conftest.py b/tests/conftest.py
index 3345824e..02a6d0f2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -193,7 +193,7 @@ def tiny_config(tmp_path):
         "n_layers": 1,
         "train_label_smoothing": 0.01,
         "warmup_iters": 1,
-        "max_iters": 1,
+        "cosine_schedule_period_iters": 1,
         "max_epochs": 20,
         "val_check_interval": 1,
         "model_save_folder_path": str(tmp_path),
diff --git a/tests/unit_tests/test_config.py b/tests/unit_tests/test_config.py
index 1924d122..fbe10eee 100644
--- a/tests/unit_tests/test_config.py
+++ b/tests/unit_tests/test_config.py
@@ -37,3 +37,15 @@ def test_override(tmp_path, tiny_config):
 
     with pytest.raises(KeyError):
         Config(filename)
+
+
+def test_deprecated(tmp_path, tiny_config):
+    filename = str(tmp_path / "config_deprecated.yml")
+    with open(tiny_config, "r") as f_in, open(filename, "w") as f_out:
+        cfg = yaml.safe_load(f_in)
+        # Insert deprecated config option.
+        cfg["max_iters"] = 1
+        yaml.safe_dump(cfg, f_out)
+
+    with pytest.warns(DeprecationWarning):
+        Config(filename)
diff --git a/tests/unit_tests/test_runner.py b/tests/unit_tests/test_runner.py
index efaceb6b..7febf3f7 100644
--- a/tests/unit_tests/test_runner.py
+++ b/tests/unit_tests/test_runner.py
@@ -62,7 +62,7 @@ def test_save_and_load_weights(tmp_path, mgf_small, tiny_config):
     other_config = Config(tiny_config)
     other_config.n_layers = 50  # lol
     other_config.n_beams = 12
-    other_config.max_iters = 2
+    other_config.cosine_schedule_period_iters = 2
     with torch.device("meta"):
         # Now load the weights into a new model
         # The device should be meta for all the weights.
@@ -72,7 +72,7 @@ def test_save_and_load_weights(tmp_path, mgf_small, tiny_config):
     obs_layers = runner.model.encoder.transformer_encoder.num_layers
     assert obs_layers == 1  # Match the original arch.
     assert runner.model.n_beams == 12  # Match the config
-    assert runner.model.max_iters == 2  # Match the config
+    assert runner.model.cosine_schedule_period_iters == 2  # Match the config
     assert next(runner.model.parameters()).device == torch.device("meta")
 
     # If the Trainer correctly moves the weights to the accelerator,
@@ -99,6 +99,34 @@ def test_save_and_load_weights(tmp_path, mgf_small, tiny_config):
         runner.evaluate([mgf_small])
 
 
+def test_save_and_load_weights_deprecated(tmp_path, mgf_small, tiny_config):
+    """Test saving and loading weights with deprecated config options."""
+    config = Config(tiny_config)
+    config.max_epochs = 1
+    config.cosine_schedule_period_iters = 5
+    ckpt = tmp_path / "test.ckpt"
+
+    with ModelRunner(config=config) as runner:
+        runner.train([mgf_small], [mgf_small])
+        runner.trainer.save_checkpoint(ckpt)
+
+    # Replace the new config option with the deprecated one.
+    ckpt_data = torch.load(ckpt)
+    ckpt_data["hyper_parameters"]["max_iters"] = 5
+    del ckpt_data["hyper_parameters"]["cosine_schedule_period_iters"]
+    torch.save(ckpt_data, str(ckpt))
+
+    # Inference.
+    with ModelRunner(config=config, model_filename=str(ckpt)) as runner:
+        runner.initialize_model(train=False)
+        assert runner.model.cosine_schedule_period_iters == 5
+    # Fine-tuning.
+    with ModelRunner(config=config, model_filename=str(ckpt)) as runner:
+        with pytest.warns(DeprecationWarning):
+            runner.train([mgf_small], [mgf_small])
+            assert "max_iters" not in runner.model.opt_kwargs
+
+
 def test_calculate_precision(tmp_path, mgf_small, tiny_config):
     """Test that this parameter is working correctly."""
     config = Config(tiny_config)
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 61d61efa..f615a099 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -203,7 +203,7 @@ def test_beam_search_decode():
     )
     # Verify that the correct peptides have been cached.
     correct_cached = 0
-    for _, _, pep in pred_cache[0]:
+    for _, _, _, pep in pred_cache[0]:
         if torch.equal(pep, torch.tensor([4, 14, 4, 13])):
             correct_cached += 1
         elif torch.equal(pep, torch.tensor([4, 14, 4, 18])):
@@ -220,13 +220,13 @@ def test_beam_search_decode():
     # Return the candidate peptide with the highest score
     test_cache = collections.OrderedDict((i, []) for i in range(batch))
     heapq.heappush(
-        test_cache[0], (0.93, 4 * [0.93], torch.tensor([4, 14, 4, 19]))
+        test_cache[0], (0.93, 0.1, 4 * [0.93], torch.tensor([4, 14, 4, 19]))
     )
     heapq.heappush(
-        test_cache[0], (0.95, 4 * [0.95], torch.tensor([4, 14, 4, 13]))
+        test_cache[0], (0.95, 0.2, 4 * [0.95], torch.tensor([4, 14, 4, 13]))
     )
     heapq.heappush(
-        test_cache[0], (0.94, 4 * [0.94], torch.tensor([4, 14, 4, 4]))
+        test_cache[0], (0.94, 0.3, 4 * [0.94], torch.tensor([4, 14, 4, 4]))
     )
 
     assert list(model._get_top_peptide(test_cache))[0][0][-1] == "PEPK"
@@ -296,7 +296,7 @@ def test_beam_search_decode():
     )
     # Verify predictions with matching/non-matching precursor m/z.
     positive_score = negative_score = 0
-    for peptide_score, _, _ in pred_cache[0]:
+    for peptide_score, _, _, _ in pred_cache[0]:
         positive_score += peptide_score >= 0
         negative_score += peptide_score < 0
     assert positive_score == 2
@@ -435,7 +435,7 @@ def test_beam_search_decode():
     vocab = model.decoder.vocab_size + 1  # V
     step = 4
 
-    # Initialize dummyy scores and tokens.
+    # Initialize dummy scores and tokens.
     scores = torch.full(
         size=(batch, length, vocab, beam), fill_value=torch.nan
     )
@@ -467,6 +467,37 @@ def test_beam_search_decode():
 
     assert torch.equal(new_tokens[:, : step + 1], expected_tokens)
 
+    # Test that duplicate peptide scores don't lead to a conflict in the cache.
+    model = Spec2Pep(n_beams=5, residues="massivekb", min_peptide_len=3)
+    batch = 2  # B
+    beam = model.n_beams  # S
+    model.decoder.reverse = True
+    length = model.max_length + 1  # L
+    vocab = model.decoder.vocab_size + 1  # V
+    step = 4
+
+    # Simulate beams with identical amino acid scores but different tokens.
+    scores = torch.zeros(size=(batch * beam, length, vocab))
+    scores[: batch * beam, : step + 1, :] = torch.rand(1)
+    tokens = torch.zeros(batch * beam, length, dtype=torch.int64)
+    tokens[: batch * beam, :step] = torch.randint(
+        1, vocab, (batch * beam, step)
+    )
+
+    pred_cache = collections.OrderedDict((i, []) for i in range(batch))
+    model._cache_finished_beams(
+        tokens,
+        scores,
+        step,
+        torch.ones(batch * beam, dtype=torch.bool),
+        torch.ones(batch * beam, dtype=torch.bool),
+        pred_cache,
+    )
+    for beam_i, preds in pred_cache.items():
+        assert len(preds) == beam
+        peptide_scores = [pep[0] for pep in preds]
+        assert np.allclose(peptide_scores, peptide_scores[0])
+
 
 def test_eval_metrics():
     """