From 94023b02c5fa9348381eed889ae40b98487963b6 Mon Sep 17 00:00:00 2001
From: Ayush Shri <ayushshrivastava585@gmail.com>
Date: Fri, 18 Oct 2024 01:19:35 +0530
Subject: [PATCH 01/10] cleanup_doc

---
 lightly/models/modules/center.py     |  18 +++-
 lightly/models/modules/heads.py      | 151 ++++++++++++++++++---------
 lightly/models/modules/heads_timm.py |  41 ++++++++
 3 files changed, 159 insertions(+), 51 deletions(-)

diff --git a/lightly/models/modules/center.py b/lightly/models/modules/center.py
index 55eee220d..9282a6b2a 100644
--- a/lightly/models/modules/center.py
+++ b/lightly/models/modules/center.py
@@ -31,6 +31,11 @@ def __init__(
         mode: str = "mean",
         momentum: float = 0.9,
     ) -> None:
+        """Initializes the Center module with the specified parameters.
+
+        Raises:
+            ValueError: If an unknown mode is provided.
+        """
         super().__init__()
 
         center_fn = CENTER_MODE_TO_FUNCTION.get(mode)
@@ -49,8 +54,7 @@ def __init__(
 
     @property
     def value(self) -> Tensor:
-        """The current value of the center. Use this property to do any operations based
-        on the center."""
+        """The current value of the center. Use this property to do any operations based on the center."""
         return self.center
 
     @torch.no_grad()
@@ -75,7 +79,15 @@ def _center_mean(self, x: Tensor) -> Tensor:
 
 @torch.no_grad()
 def center_mean(x: Tensor, dim: Tuple[int, ...]) -> Tensor:
-    """Returns the center of the input tensor by calculating the mean."""
+    """Returns the center of the input tensor by calculating the mean.
+    
+    Args:
+        x: Input tensor.
+        dim: Dimensions along which the mean is calculated.
+
+    Returns:
+        The center of the input tensor.
+    """
     batch_center = torch.mean(x, dim=dim, keepdim=True)
     if dist.is_available() and dist.is_initialized():
         dist.all_reduce(batch_center)
diff --git a/lightly/models/modules/heads.py b/lightly/models/modules/heads.py
index d9dcb6989..905573b5e 100644
--- a/lightly/models/modules/heads.py
+++ b/lightly/models/modules/heads.py
@@ -29,7 +29,6 @@ class ProjectionHead(nn.Module):
         >>>     (256, 256, nn.BatchNorm1d(256), nn.ReLU()),
         >>>     (256, 128, None, None)
         >>> ])
-
     """
 
     def __init__(
@@ -41,6 +40,7 @@ def __init__(
             ],
         ],
     ) -> None:
+        """Initializes the ProjectionHead module with the specified blocks."""
         super().__init__()
 
         layers: List[nn.Module] = []
@@ -58,9 +58,7 @@ def forward(self, x: Tensor) -> Tensor:
         """Computes one forward pass through the projection head.
 
         Args:
-            x:
-                Input of shape bsz x num_ftrs.
-
+            x: Input of shape bsz x num_ftrs.
         """
         projection: Tensor = self.layers(x)
         return projection
@@ -73,13 +71,19 @@ class BarlowTwinsProjectionHead(ProjectionHead):
     units. The first two layers of the projector are followed by a batch
     normalization layer and rectified linear units." [0]
 
-    [0]: 2021, Barlow Twins, https://arxiv.org/abs/2103.03230
-
+    - [0]: 2021, Barlow Twins, https://arxiv.org/abs/2103.03230
     """
 
     def __init__(
         self, input_dim: int = 2048, hidden_dim: int = 8192, output_dim: int = 8192
     ):
+        """Initializes the BarlowTwinsProjectionHead with the specified dimensions.
+
+        Args:
+            input_dim: Dimensionality of the input features.
+            hidden_dim: Dimensionality of the hidden layers.
+            output_dim: Dimensionality of the output features.
+        """
         super(BarlowTwinsProjectionHead, self).__init__(
             [
                 (input_dim, hidden_dim, nn.BatchNorm1d(hidden_dim), nn.ReLU()),
@@ -96,13 +100,12 @@ class BYOLProjectionHead(ProjectionHead):
     batch normalization, rectified linear units (ReLU), and a final
     linear layer with output dimension 256." [0]
 
-    [0]: BYOL, 2020, https://arxiv.org/abs/2006.07733
-
+    - [0]: BYOL, 2020, https://arxiv.org/abs/2006.07733
     """
-
     def __init__(
         self, input_dim: int = 2048, hidden_dim: int = 4096, output_dim: int = 256
     ):
+        """Initializes the BYOLProjectionHead with the specified dimensions."""
         super(BYOLProjectionHead, self).__init__(
             [
                 (input_dim, hidden_dim, nn.BatchNorm1d(hidden_dim), nn.ReLU()),
@@ -118,8 +121,7 @@ class BYOLPredictionHead(ProjectionHead):
     batch normalization, rectified linear units (ReLU), and a final
     linear layer with output dimension 256." [0]
 
-    [0]: BYOL, 2020, https://arxiv.org/abs/2006.07733
-
+    - [0]: BYOL, 2020, https://arxiv.org/abs/2006.07733
     """
 
     def __init__(
@@ -143,9 +145,9 @@ class MoCoProjectionHead(ProjectionHead):
     hidden layers of both MLPs are 4096-d and are with ReLU; the output layers of both
     MLPs are 256-d, without ReLU. In MoCo v3, all layers in both MLPs have BN" [2]
 
-    [0]: MoCo v1, 2020, https://arxiv.org/abs/1911.05722
-    [1]: MoCo v2, 2020, https://arxiv.org/abs/2003.04297
-    [2]: MoCo v3, 2021, https://arxiv.org/abs/2104.02057
+    - [0]: MoCo v1, 2020, https://arxiv.org/abs/1911.05722
+    - [1]: MoCo v2, 2020, https://arxiv.org/abs/2003.04297
+    - [2]: MoCo v3, 2021, https://arxiv.org/abs/2104.02057
     """
 
     def __init__(
@@ -163,8 +165,7 @@ def __init__(
             hidden_dim: Number of hidden dimensions (2048 for v2, 4096 for v3).
             output_dim: Number of output dimensions (128 for v2, 256 for v3).
             num_layers: Number of hidden layers (2 for v2, 3 for v3).
-            batch_norm: Whether or not to use batch norms.
-                (False for v2, True for v3)
+            batch_norm: Whether or not to use batch norms. (False for v2, True for v3).
         """
         layers: List[Tuple[int, int, Optional[nn.Module], Optional[nn.Module]]] = []
         layers.append(
@@ -204,13 +205,19 @@ class NNCLRProjectionHead(ProjectionHead):
     layers are followed by batch-normalization [36]. All the batch-norm layers
     except the last layer are followed by ReLU activation." [0]
 
-    [0]: NNCLR, 2021, https://arxiv.org/abs/2104.14548
-
+    - [0]: NNCLR, 2021, https://arxiv.org/abs/2104.14548
     """
 
     def __init__(
         self, input_dim: int = 2048, hidden_dim: int = 2048, output_dim: int = 256
     ):
+        """Initializes the NNCLRProjectionHead with the specified dimensions.
+
+        Args:
+            input_dim: Dimensionality of the input features.
+            hidden_dim: Dimensionality of the hidden layers.
+            output_dim: Dimensionality of the output features.
+        """
         super(NNCLRProjectionHead, self).__init__(
             [
                 (input_dim, hidden_dim, nn.BatchNorm1d(hidden_dim), nn.ReLU()),
@@ -227,10 +234,8 @@ class NNCLRPredictionHead(ProjectionHead):
     of size [4096,d]. The hidden layer of the prediction MLP is followed by
     batch-norm and ReLU. The last layer has no batch-norm or activation." [0]
 
-    [0]: NNCLR, 2021, https://arxiv.org/abs/2104.14548
-
+    - [0]: NNCLR, 2021, https://arxiv.org/abs/2104.14548
     """
-
     def __init__(
         self, input_dim: int = 256, hidden_dim: int = 4096, output_dim: int = 256
     ):
@@ -307,8 +312,7 @@ class SimSiamProjectionHead(ProjectionHead):
     layer, including its output fc. Its output fc has no ReLU. The hidden fc is
     2048-d. This MLP has 3 layers." [0]
 
-    [0]: SimSiam, 2020, https://arxiv.org/abs/2011.10566
-
+    - [0]: SimSiam, 2020, https://arxiv.org/abs/2011.10566
     """
 
     def __init__(
@@ -329,13 +333,19 @@ def __init__(
 
 
 class SMoGPrototypes(nn.Module):
-    """SMoG prototypes module for synchronous momentum grouping."""
+      """SMoG prototypes module for synchronous momentum grouping.
+
+    Args:
+        group_features: Tensor containing the group features.
+        beta: Beta parameter for momentum updating.
+    """
 
     def __init__(
         self,
         group_features: Tensor,
         beta: float,
     ):
+        """Initializes the SMoGPrototypes module with the specified parameter."""
         super(SMoGPrototypes, self).__init__()
         self.group_features = nn.Parameter(group_features, requires_grad=False)
         self.beta = beta
@@ -354,8 +364,7 @@ def forward(
                 Temperature parameter for calculating the logits.
 
         Returns:
-            The logits.
-
+            The computed logits.
         """
         x = torch.nn.functional.normalize(x, dim=1)
         group_features = torch.nn.functional.normalize(group_features, dim=1)
@@ -366,12 +375,10 @@ def get_updated_group_features(self, x: Tensor) -> Tensor:
         """Performs the synchronous momentum update of the group vectors.
 
         Args:
-            x:
-                Tensor of shape bsz x dim.
+            x: Tensor of shape bsz x dim.
 
         Returns:
             The updated group features.
-
         """
         assignments = self.assign_groups(x)
         group_features = torch.clone(self.group_features.data)
@@ -392,11 +399,10 @@ def assign_groups(self, x: Tensor) -> Tensor:
         """Assigns each representation in x to a group based on cosine similarity.
 
         Args:
-            Tensor of shape bsz x dim.
+            Tensor of shape (bsz, dim).
 
         Returns:
-            Tensor of shape bsz indicating group assignments.
-
+            Tensor of shape (bsz,) indicating group assignments.
         """
         return torch.argmax(self.forward(x, self.group_features), dim=-1)
 
@@ -408,13 +414,19 @@ class SMoGProjectionHead(ProjectionHead):
     followed by a BatchNorm [28] and an activation function. (...) The output
     layer of projection head also has BN" [0]
 
-    [0]: SMoG, 2022, https://arxiv.org/pdf/2207.06167.pdf
-
+    - [0]: SMoG, 2022, https://arxiv.org/pdf/2207.06167.pdf
     """
 
     def __init__(
         self, input_dim: int = 2048, hidden_dim: int = 2048, output_dim: int = 128
     ):
+        """Initializes the SMoGProjectionHead with the specified dimensions.
+
+        Args:
+            input_dim: Dimensionality of the input features.
+            hidden_dim: Dimensionality of the hidden layers.
+            output_dim: Dimensionality of the output features.
+        """
         super(SMoGProjectionHead, self).__init__(
             [
                 (input_dim, hidden_dim, nn.BatchNorm1d(hidden_dim), nn.ReLU()),
@@ -435,13 +447,20 @@ class SMoGPredictionHead(ProjectionHead):
     followed by a BatchNorm [28] and an activation function. (...) The output
     layer of projection head also has BN" [0]
 
-    [0]: SMoG, 2022, https://arxiv.org/pdf/2207.06167.pdf
-
+    - [0]: SMoG, 2022, https://arxiv.org/pdf/2207.06167.pdf
     """
 
     def __init__(
         self, input_dim: int = 128, hidden_dim: int = 2048, output_dim: int = 128
     ):
+        """Initializes the SMoGPredictionHead with the specified dimensions.
+
+        Args:
+            input_dim: Dimensionality of the input features.
+            hidden_dim: Dimensionality of the hidden layers.
+            output_dim: Dimensionality of the output features.
+        """
+
         super(SMoGPredictionHead, self).__init__(
             [
                 (input_dim, hidden_dim, nn.BatchNorm1d(hidden_dim), nn.ReLU()),
@@ -456,13 +475,19 @@ class SimSiamPredictionHead(ProjectionHead):
     "The prediction MLP (h) has BN applied to its hidden fc layers. Its output
     fc does not have BN (...) or ReLU. This MLP has 2 layers." [0]
 
-    [0]: SimSiam, 2020, https://arxiv.org/abs/2011.10566
-
+    - [0]: SimSiam, 2020, https://arxiv.org/abs/2011.10566
     """
 
     def __init__(
         self, input_dim: int = 2048, hidden_dim: int = 512, output_dim: int = 2048
     ):
+        """Initializes the SimSiamPredictionHead with the specified dimensions.
+
+        Args:
+            input_dim: Dimensionality of the input features.
+            hidden_dim: Dimensionality of the hidden layers.
+            output_dim: Dimensionality of the output features.
+        """
         super(SimSiamPredictionHead, self).__init__(
             [
                 (input_dim, hidden_dim, nn.BatchNorm1d(hidden_dim), nn.ReLU()),
@@ -474,12 +499,13 @@ def __init__(
 class SwaVProjectionHead(ProjectionHead):
     """Projection head used for SwaV.
 
-    [0]: SwAV, 2020, https://arxiv.org/abs/2006.09882
+    - [0]: SwAV, 2020, https://arxiv.org/abs/2006.09882
     """
 
     def __init__(
         self, input_dim: int = 2048, hidden_dim: int = 2048, output_dim: int = 128
     ):
+        """Initializes the SwaVProjectionHead with the specified dimensions."""
         super(SwaVProjectionHead, self).__init__(
             [
                 (input_dim, hidden_dim, nn.BatchNorm1d(hidden_dim), nn.ReLU()),
@@ -522,7 +548,9 @@ def __init__(
         n_prototypes: Union[List[int], int] = 3000,
         n_steps_frozen_prototypes: int = 0,
     ):
+        """Intializes the SwaVPrototypes module with the specified parameters"""
         super(SwaVPrototypes, self).__init__()
+        
         # Default to a list of 1 if n_prototypes is an int.
         self.n_prototypes = (
             n_prototypes if isinstance(n_prototypes, list) else [n_prototypes]
@@ -536,6 +564,16 @@ def __init__(
     def forward(
         self, x: Tensor, step: Optional[int] = None
     ) -> Union[Tensor, List[Tensor]]:
+        """Forward pass of the SwaVPrototypes module.
+
+        Args:
+            x: Input tensor.
+            step: Current training step.
+
+        Returns:
+            The logits after passing through the prototype heads. Returns a single tensor 
+            if there's one prototype head, otherwise returns a list of tensors.
+    """
         self._freeze_prototypes_if_required(step)
         out = []
         for layer in self.heads:
@@ -548,6 +586,7 @@ def normalize(self) -> None:
             utils.normalize_weight(layer.weight)
 
     def _freeze_prototypes_if_required(self, step: Optional[int] = None) -> None:
+        """Freezes the prototypes if the specified number of steps has been reached."""
         if self.n_steps_frozen_prototypes > 0:
             if step is None:
                 raise ValueError(
@@ -588,7 +627,6 @@ class DINOProjectionHead(ProjectionHead):
             Whether or not to weight normalize the last layer of the DINO head.
             Not normalizing leads to better performance but can make the
             training unstable.
-
     """
 
     def __init__(
@@ -601,6 +639,7 @@ def __init__(
         freeze_last_layer: int = -1,
         norm_last_layer: bool = True,
     ):
+        """Initializes the DINOProjectionHead with the specified dimensions."""
         bn = nn.BatchNorm1d(hidden_dim) if batch_norm else None
 
         super().__init__(
@@ -682,6 +721,8 @@ def __init__(
         layers: List[
             Tuple[int, int, Optional[nn.Module], Optional[nn.Module], bool]
         ] = []
+        
+        # Add the first layer
         layers.append(
             (
                 input_dim,
@@ -691,6 +732,8 @@ def __init__(
                 use_bias,
             )
         )
+        
+        # Add the hidden layers
         for _ in range(num_layers - 1):
             layers.append(
                 (
@@ -701,6 +744,8 @@ def __init__(
                     use_bias,
                 )
             )
+        
+        # Add the output layer
         layers.append((hidden_dim, output_dim, None, None, use_bias))
         super().__init__(layers)
 
@@ -710,6 +755,7 @@ class MSNProjectionHead(ProjectionHead):
 
     "We train with a 3-layer projection head with output dimension 256 and
     batch-normalization at the input and hidden layers.." [0]
+    
     Code inspired by [1].
 
     - [0]: Masked Siamese Networks, 2022, https://arxiv.org/abs/2204.07141
@@ -730,6 +776,7 @@ def __init__(
         hidden_dim: int = 2048,
         output_dim: int = 256,
     ):
+        """Initializes the MSNProjectionHead with the specified dimensions."""
         super().__init__(
             blocks=[
                 (input_dim, hidden_dim, nn.BatchNorm1d(hidden_dim), nn.GELU()),
@@ -746,13 +793,13 @@ class TiCoProjectionHead(ProjectionHead):
     batch normalization, rectified linear units (ReLU), and a final
     linear layer with output dimension 256." [0]
 
-    [0]: TiCo, 2022, https://arxiv.org/pdf/2206.10698.pdf
-
+    - [0]: TiCo, 2022, https://arxiv.org/pdf/2206.10698.pdf
     """
 
     def __init__(
         self, input_dim: int = 2048, hidden_dim: int = 4096, output_dim: int = 256
     ):
+        """Initializes the TiCoProjectionHead with the specified dimensions."""
         super(TiCoProjectionHead, self).__init__(
             [
                 (input_dim, hidden_dim, nn.BatchNorm1d(hidden_dim), nn.ReLU()),
@@ -768,8 +815,7 @@ class VICRegProjectionHead(ProjectionHead):
     units. The first two layers of the projector are followed by a batch
     normalization layer and rectified linear units." [0]
 
-    [0]: 2022, VICReg, https://arxiv.org/pdf/2105.04906.pdf
-
+    - [0]: 2022, VICReg, https://arxiv.org/pdf/2105.04906.pdf
     """
 
     def __init__(
@@ -779,6 +825,14 @@ def __init__(
         output_dim: int = 8192,
         num_layers: int = 3,
     ):
+        """Initializes the VICRegProjectionHead with the specified dimensions.
+
+        Args:
+            input_dim: Dimensionality of the input features.
+            hidden_dim: Dimensionality of the hidden layers.
+            output_dim: Dimensionality of the output features.
+            num_layers: Number of layers in the projection head.
+        """
         hidden_layers = [
             (hidden_dim, hidden_dim, nn.BatchNorm1d(hidden_dim), nn.ReLU())
             for _ in range(num_layers - 2)  # Exclude first and last layer.
@@ -795,16 +849,16 @@ def __init__(
 class VicRegLLocalProjectionHead(ProjectionHead):
     """Projection head used for the local head of VICRegL.
 
-    The projector network has three linear layers. The first two layers of the projector
-    are followed by a batch normalization layer and rectified linear units.
-
-    2022, VICRegL, https://arxiv.org/abs/2210.01571
+    "The projector network has three linear layers. The first two layers of the projector
+    are followed by a batch normalization layer and rectified linear units." [0]
 
+    - [0]: 2022, VICRegL, https://arxiv.org/abs/2210.01571
     """
 
     def __init__(
         self, input_dim: int = 2048, hidden_dim: int = 8192, output_dim: int = 8192
     ):
+        """Initializes the VicRegLLocalProjectionHead with the specified dimensions."""
         super(VicRegLLocalProjectionHead, self).__init__(
             [
                 (input_dim, hidden_dim, nn.LayerNorm(hidden_dim), nn.ReLU()),
@@ -826,6 +880,7 @@ class DenseCLProjectionHead(ProjectionHead):
     def __init__(
         self, input_dim: int = 2048, hidden_dim: int = 2048, output_dim: int = 128
     ):
+        """Initializes the DenseCLProjectionHead with the specified dimensions."""
         super().__init__(
             [
                 (input_dim, hidden_dim, None, nn.ReLU()),
diff --git a/lightly/models/modules/heads_timm.py b/lightly/models/modules/heads_timm.py
index 7ea20de21..3e33e8768 100644
--- a/lightly/models/modules/heads_timm.py
+++ b/lightly/models/modules/heads_timm.py
@@ -9,6 +9,15 @@ class AIMPredictionHeadBlock(Module):
     """Prediction head block for AIM [0].
 
     - [0]: AIM, 2024, https://arxiv.org/abs/2401.08541
+
+    Args:
+        input_dim: Dimensionality of the input features.
+        output_dim: Dimensionality of the output features.
+        mlp_ratio: Ratio used to determine the hidden layer size in the MLP.
+        proj_drop: Dropout rate for the projection layer.
+        act_layer: Activation layer to use.
+        norm_layer: Normalization layer to use.
+        mlp_layer: MLP layer to use.
     """
 
     def __init__(
@@ -21,6 +30,8 @@ def __init__(
         norm_layer: Type[Module] = LayerNorm,
         mlp_layer: Type[Module] = Mlp,
     ) -> None:
+        """Initializes the AIMPredictionHeadBlock module with the specified parameters."""
+
         super().__init__()
         self.norm = norm_layer(input_dim)  # type: ignore[call-arg]
         self.mlp = mlp_layer(  # type: ignore[call-arg]
@@ -33,6 +44,14 @@ def __init__(
         )
 
     def forward(self, x: Tensor) -> Tensor:
+        """Forward pass of the AIMPredictionHeadBlock.
+
+        Args:
+            x: Input tensor.
+
+        Returns:
+            Output tensor after applying the MLP and normalization.
+        """
         x = x + self.mlp(self.norm(x))
         return x
 
@@ -41,6 +60,18 @@ class AIMPredictionHead(Module):
     """Prediction head for AIM [0].
 
     - [0]: AIM, 2024, https://arxiv.org/abs/2401.08541
+    
+    Args:
+        input_dim: Dimensionality of the input features.
+        output_dim: Dimensionality of the output features.
+        hidden_dim: Dimensionality of the hidden layer.
+        num_blocks: Number of blocks in the prediction head.
+        mlp_ratio: Ratio used to determine the hidden layer size in the MLP.
+        proj_drop: Dropout rate for the projection layer.
+        act_layer: Activation layer to use.
+        norm_layer: Normalization layer to use.
+        mlp_layer: MLP layer to use.
+        block_fn: Block function to use for the prediction head.
     """
 
     def __init__(
@@ -56,6 +87,8 @@ def __init__(
         mlp_layer: Type[Module] = Mlp,
         block_fn: Type[Module] = AIMPredictionHeadBlock,
     ) -> None:
+        """Initializes the AIMPredictionHead module with the specified parameters."""
+
         super().__init__()
         self.blocks = Sequential(
             # Linear layer to project the input dimension to the hidden dimension.
@@ -79,5 +112,13 @@ def __init__(
         )
 
     def forward(self, x: Tensor) -> Tensor:
+        """Forward pass of the AIMPredictionHead.
+
+        Args:
+            x: Input tensor.
+
+        Returns:
+            Output tensor after processing through the prediction head blocks.
+        """
         x = self.blocks(x)
         return x

From e5b81615be3198101cc4e9f895c37b39ac4ce1f6 Mon Sep 17 00:00:00 2001
From: Ayush Shri <ayushshrivastava585@gmail.com>
Date: Fri, 18 Oct 2024 01:29:15 +0530
Subject: [PATCH 02/10] cleanup

---
 lightly/models/modules/heads.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/lightly/models/modules/heads.py b/lightly/models/modules/heads.py
index 905573b5e..a9c6c42be 100644
--- a/lightly/models/modules/heads.py
+++ b/lightly/models/modules/heads.py
@@ -333,12 +333,12 @@ def __init__(
 
 
 class SMoGPrototypes(nn.Module):
-      """SMoG prototypes module for synchronous momentum grouping.
+        """SMoG prototypes module for synchronous momentum grouping.
 
-    Args:
-        group_features: Tensor containing the group features.
-        beta: Beta parameter for momentum updating.
-    """
+        Args:
+            group_features: Tensor containing the group features.
+            beta: Beta parameter for momentum updating.
+        """
 
     def __init__(
         self,
@@ -539,7 +539,6 @@ class SwaVPrototypes(nn.Module):
         >>>
         >>> # logits has shape bsz x 512
         >>> logits = prototypes(features)
-
     """
 
     def __init__(
@@ -573,7 +572,7 @@ def forward(
         Returns:
             The logits after passing through the prototype heads. Returns a single tensor 
             if there's one prototype head, otherwise returns a list of tensors.
-    """
+        """
         self._freeze_prototypes_if_required(step)
         out = []
         for layer in self.heads:

From b952f9c0e73a8bbe54be79f07e76b5cc177c8d24 Mon Sep 17 00:00:00 2001
From: Ayush Shri <ayushshrivastava585@gmail.com>
Date: Fri, 18 Oct 2024 01:31:26 +0530
Subject: [PATCH 03/10] format1

---
 lightly/models/modules/center.py     | 2 +-
 lightly/models/modules/heads_timm.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lightly/models/modules/center.py b/lightly/models/modules/center.py
index 9282a6b2a..d27722f9d 100644
--- a/lightly/models/modules/center.py
+++ b/lightly/models/modules/center.py
@@ -80,7 +80,7 @@ def _center_mean(self, x: Tensor) -> Tensor:
 @torch.no_grad()
 def center_mean(x: Tensor, dim: Tuple[int, ...]) -> Tensor:
     """Returns the center of the input tensor by calculating the mean.
-    
+
     Args:
         x: Input tensor.
         dim: Dimensions along which the mean is calculated.
diff --git a/lightly/models/modules/heads_timm.py b/lightly/models/modules/heads_timm.py
index 3e33e8768..d31668ba0 100644
--- a/lightly/models/modules/heads_timm.py
+++ b/lightly/models/modules/heads_timm.py
@@ -60,7 +60,7 @@ class AIMPredictionHead(Module):
     """Prediction head for AIM [0].
 
     - [0]: AIM, 2024, https://arxiv.org/abs/2401.08541
-    
+
     Args:
         input_dim: Dimensionality of the input features.
         output_dim: Dimensionality of the output features.

From f3823447e31b06ff888226391201f7332c6999d2 Mon Sep 17 00:00:00 2001
From: Ayush Shri <ayushshrivastava585@gmail.com>
Date: Fri, 18 Oct 2024 01:36:13 +0530
Subject: [PATCH 04/10] 11

---
 lightly/models/modules/center.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lightly/models/modules/center.py b/lightly/models/modules/center.py
index d27722f9d..4b57a2921 100644
--- a/lightly/models/modules/center.py
+++ b/lightly/models/modules/center.py
@@ -62,9 +62,7 @@ def update(self, x: Tensor) -> None:
         """Update the center with a new batch of features.
 
         Args:
-            x:
-                Feature tensor used to update the center. Must have the same number of
-                dimensions as self.size.
+            x: Feature tensor used to update the center. Must have the same number of dimensions as self.size.
         """
         batch_center = self._center_fn(x=x, dim=self.dim)
         self.center = center_momentum(

From 790b6ede2b883c50d57c2a71b189c45b67f3f1c6 Mon Sep 17 00:00:00 2001
From: Ayush Shri <ayushshrivastava585@gmail.com>
Date: Fri, 18 Oct 2024 01:46:37 +0530
Subject: [PATCH 05/10] 111

---
 lightly/models/modules/center.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lightly/models/modules/center.py b/lightly/models/modules/center.py
index 4b57a2921..58bec5b09 100644
--- a/lightly/models/modules/center.py
+++ b/lightly/models/modules/center.py
@@ -54,7 +54,9 @@ def __init__(
 
     @property
     def value(self) -> Tensor:
-        """The current value of the center. Use this property to do any operations based on the center."""
+        """The current value of the center. Use this property to do any operations 
+        based on the center.
+        """
         return self.center
 
     @torch.no_grad()
@@ -62,7 +64,9 @@ def update(self, x: Tensor) -> None:
         """Update the center with a new batch of features.
 
         Args:
-            x: Feature tensor used to update the center. Must have the same number of dimensions as self.size.
+            x:
+                Feature tensor used to update the center. Must have the same number of
+                dimensions as self.size.
         """
         batch_center = self._center_fn(x=x, dim=self.dim)
         self.center = center_momentum(

From 2d6258bae3d75eb3cca5751a27ce10699a736fbe Mon Sep 17 00:00:00 2001
From: Ayush Shri <ayushshrivastava585@gmail.com>
Date: Fri, 18 Oct 2024 01:47:35 +0530
Subject: [PATCH 06/10] format11

---
 lightly/models/modules/center.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lightly/models/modules/center.py b/lightly/models/modules/center.py
index 58bec5b09..9bef1b145 100644
--- a/lightly/models/modules/center.py
+++ b/lightly/models/modules/center.py
@@ -54,7 +54,7 @@ def __init__(
 
     @property
     def value(self) -> Tensor:
-        """The current value of the center. Use this property to do any operations 
+        """The current value of the center. Use this property to do any operations
         based on the center.
         """
         return self.center

From 9d47498477894e9d41284727f374c5ba6ca2ef8c Mon Sep 17 00:00:00 2001
From: Ayush Shri <ayushshrivastava585@gmail.com>
Date: Fri, 18 Oct 2024 17:47:37 +0530
Subject: [PATCH 07/10] requested_changes

---
 lightly/models/modules/center.py     |  11 ++-
 lightly/models/modules/heads.py      | 126 ++++++++++++++++++---------
 lightly/models/modules/heads_timm.py |  57 ++++++++----
 3 files changed, 129 insertions(+), 65 deletions(-)

diff --git a/lightly/models/modules/center.py b/lightly/models/modules/center.py
index 9bef1b145..05b3b2e0a 100644
--- a/lightly/models/modules/center.py
+++ b/lightly/models/modules/center.py
@@ -54,8 +54,9 @@ def __init__(
 
     @property
     def value(self) -> Tensor:
-        """The current value of the center. Use this property to do any operations
-        based on the center.
+        """The current value of the center. 
+        
+        Use this property to do any operations based on the center.
         """
         return self.center
 
@@ -84,8 +85,10 @@ def center_mean(x: Tensor, dim: Tuple[int, ...]) -> Tensor:
     """Returns the center of the input tensor by calculating the mean.
 
     Args:
-        x: Input tensor.
-        dim: Dimensions along which the mean is calculated.
+        x: 
+            Input tensor.
+        dim: 
+            Dimensions along which the mean is calculated.
 
     Returns:
         The center of the input tensor.
diff --git a/lightly/models/modules/heads.py b/lightly/models/modules/heads.py
index a9c6c42be..58d7b78f3 100644
--- a/lightly/models/modules/heads.py
+++ b/lightly/models/modules/heads.py
@@ -58,7 +58,8 @@ def forward(self, x: Tensor) -> Tensor:
         """Computes one forward pass through the projection head.
 
         Args:
-            x: Input of shape bsz x num_ftrs.
+            x: 
+                Input of shape bsz x num_ftrs.
         """
         projection: Tensor = self.layers(x)
         return projection
@@ -80,9 +81,12 @@ def __init__(
         """Initializes the BarlowTwinsProjectionHead with the specified dimensions.
 
         Args:
-            input_dim: Dimensionality of the input features.
-            hidden_dim: Dimensionality of the hidden layers.
-            output_dim: Dimensionality of the output features.
+            input_dim: 
+                Dimensionality of the input features.
+            hidden_dim: 
+                Dimensionality of the hidden layers.
+            output_dim: 
+                Dimensionality of the output features.
         """
         super(BarlowTwinsProjectionHead, self).__init__(
             [
@@ -161,11 +165,16 @@ def __init__(
         """Initialize a new MoCoProjectionHead instance.
 
         Args:
-            input_dim: Number of input dimensions.
-            hidden_dim: Number of hidden dimensions (2048 for v2, 4096 for v3).
-            output_dim: Number of output dimensions (128 for v2, 256 for v3).
-            num_layers: Number of hidden layers (2 for v2, 3 for v3).
-            batch_norm: Whether or not to use batch norms. (False for v2, True for v3).
+            input_dim: 
+                Number of input dimensions.
+            hidden_dim: 
+                Number of hidden dimensions (2048 for v2, 4096 for v3).
+            output_dim: 
+                Number of output dimensions (128 for v2, 256 for v3).
+            num_layers: 
+                Number of hidden layers (2 for v2, 3 for v3).
+            batch_norm: 
+                Whether or not to use batch norms. (False for v2, True for v3).
         """
         layers: List[Tuple[int, int, Optional[nn.Module], Optional[nn.Module]]] = []
         layers.append(
@@ -214,9 +223,12 @@ def __init__(
         """Initializes the NNCLRProjectionHead with the specified dimensions.
 
         Args:
-            input_dim: Dimensionality of the input features.
-            hidden_dim: Dimensionality of the hidden layers.
-            output_dim: Dimensionality of the output features.
+            input_dim: 
+                Dimensionality of the input features.
+            hidden_dim: 
+                Dimensionality of the hidden layers.
+            output_dim: 
+                Dimensionality of the output features.
         """
         super(NNCLRProjectionHead, self).__init__(
             [
@@ -270,11 +282,16 @@ def __init__(
         """Initialize a new SimCLRProjectionHead instance.
 
         Args:
-            input_dim: Number of input dimensions.
-            hidden_dim: Number of hidden dimensions.
-            output_dim: Number of output dimensions.
-            num_layers: Number of hidden layers (2 for v1, 3+ for v2).
-            batch_norm: Whether or not to use batch norms.
+            input_dim: 
+                Number of input dimensions.
+            hidden_dim: 
+                Number of hidden dimensions.
+            output_dim: 
+                Number of output dimensions.
+            num_layers: 
+                Number of hidden layers (2 for v1, 3+ for v2).
+            batch_norm: 
+                Whether or not to use batch norms.
         """
         layers: List[Tuple[int, int, Optional[nn.Module], Optional[nn.Module]]] = []
         layers.append(
@@ -336,8 +353,10 @@ class SMoGPrototypes(nn.Module):
         """SMoG prototypes module for synchronous momentum grouping.
 
         Args:
-            group_features: Tensor containing the group features.
-            beta: Beta parameter for momentum updating.
+            group_features: 
+                Tensor containing the group features.
+            beta: 
+                Beta parameter for momentum updating.
         """
 
     def __init__(
@@ -375,7 +394,8 @@ def get_updated_group_features(self, x: Tensor) -> Tensor:
         """Performs the synchronous momentum update of the group vectors.
 
         Args:
-            x: Tensor of shape bsz x dim.
+            x: 
+                Tensor of shape bsz x dim.
 
         Returns:
             The updated group features.
@@ -399,7 +419,8 @@ def assign_groups(self, x: Tensor) -> Tensor:
         """Assigns each representation in x to a group based on cosine similarity.
 
         Args:
-            Tensor of shape (bsz, dim).
+            x:
+                Tensor of shape (bsz, dim).
 
         Returns:
             Tensor of shape (bsz,) indicating group assignments.
@@ -423,9 +444,12 @@ def __init__(
         """Initializes the SMoGProjectionHead with the specified dimensions.
 
         Args:
-            input_dim: Dimensionality of the input features.
-            hidden_dim: Dimensionality of the hidden layers.
-            output_dim: Dimensionality of the output features.
+            input_dim: 
+                Dimensionality of the input features.
+            hidden_dim: 
+                Dimensionality of the hidden layers.
+            output_dim: 
+                Dimensionality of the output features.
         """
         super(SMoGProjectionHead, self).__init__(
             [
@@ -456,9 +480,12 @@ def __init__(
         """Initializes the SMoGPredictionHead with the specified dimensions.
 
         Args:
-            input_dim: Dimensionality of the input features.
-            hidden_dim: Dimensionality of the hidden layers.
-            output_dim: Dimensionality of the output features.
+            input_dim: 
+                Dimensionality of the input features.
+            hidden_dim: 
+                Dimensionality of the hidden layers.
+            output_dim: 
+                Dimensionality of the output features.
         """
 
         super(SMoGPredictionHead, self).__init__(
@@ -484,9 +511,12 @@ def __init__(
         """Initializes the SimSiamPredictionHead with the specified dimensions.
 
         Args:
-            input_dim: Dimensionality of the input features.
-            hidden_dim: Dimensionality of the hidden layers.
-            output_dim: Dimensionality of the output features.
+            input_dim: 
+                Dimensionality of the input features.
+            hidden_dim: 
+                Dimensionality of the hidden layers.
+            output_dim: 
+                Dimensionality of the output features.
         """
         super(SimSiamPredictionHead, self).__init__(
             [
@@ -566,8 +596,10 @@ def forward(
         """Forward pass of the SwaVPrototypes module.
 
         Args:
-            x: Input tensor.
-            step: Current training step.
+            x: 
+                Input tensor.
+            step: 
+                Current training step.
 
         Returns:
             The logits after passing through the prototype heads. Returns a single tensor 
@@ -710,12 +742,18 @@ def __init__(
         """Initialize a new MMCRProjectionHead instance.
 
         Args:
-            input_dim: Number of input dimensions.
-            hidden_dim: Number of hidden dimensions.
-            output_dim: Number of output dimensions.
-            num_layers: Number of hidden layers.
-            batch_norm: Whether or not to use batch norms.
-            use_bias: Whether or not to use bias in the linear layers.
+            input_dim: 
+                Number of input dimensions.
+            hidden_dim: 
+                Number of hidden dimensions.
+            output_dim: 
+                Number of output dimensions.
+            num_layers: 
+                Number of hidden layers.
+            batch_norm: 
+                Whether or not to use batch norms.
+            use_bias: 
+                Whether or not to use bias in the linear layers.
         """
         layers: List[
             Tuple[int, int, Optional[nn.Module], Optional[nn.Module], bool]
@@ -827,10 +865,14 @@ def __init__(
         """Initializes the VICRegProjectionHead with the specified dimensions.
 
         Args:
-            input_dim: Dimensionality of the input features.
-            hidden_dim: Dimensionality of the hidden layers.
-            output_dim: Dimensionality of the output features.
-            num_layers: Number of layers in the projection head.
+            input_dim: 
+                Dimensionality of the input features.
+            hidden_dim: 
+                Dimensionality of the hidden layers.
+            output_dim: 
+                Dimensionality of the output features.
+            num_layers: 
+                Number of layers in the projection head.
         """
         hidden_layers = [
             (hidden_dim, hidden_dim, nn.BatchNorm1d(hidden_dim), nn.ReLU())
diff --git a/lightly/models/modules/heads_timm.py b/lightly/models/modules/heads_timm.py
index d31668ba0..13c48a258 100644
--- a/lightly/models/modules/heads_timm.py
+++ b/lightly/models/modules/heads_timm.py
@@ -11,13 +11,20 @@ class AIMPredictionHeadBlock(Module):
     - [0]: AIM, 2024, https://arxiv.org/abs/2401.08541
 
     Args:
-        input_dim: Dimensionality of the input features.
-        output_dim: Dimensionality of the output features.
-        mlp_ratio: Ratio used to determine the hidden layer size in the MLP.
-        proj_drop: Dropout rate for the projection layer.
-        act_layer: Activation layer to use.
-        norm_layer: Normalization layer to use.
-        mlp_layer: MLP layer to use.
+        input_dim: 
+            Dimensionality of the input features.
+        output_dim: 
+            Dimensionality of the output features.
+        mlp_ratio: 
+            Ratio used to determine the hidden layer size in the MLP.
+        proj_drop: 
+            Dropout rate for the projection layer.
+        act_layer: 
+            Activation layer to use.
+        norm_layer: 
+            Normalization layer to use.
+        mlp_layer: 
+            MLP layer to use.
     """
 
     def __init__(
@@ -47,7 +54,8 @@ def forward(self, x: Tensor) -> Tensor:
         """Forward pass of the AIMPredictionHeadBlock.
 
         Args:
-            x: Input tensor.
+            x: 
+                Input tensor.
 
         Returns:
             Output tensor after applying the MLP and normalization.
@@ -62,16 +70,26 @@ class AIMPredictionHead(Module):
     - [0]: AIM, 2024, https://arxiv.org/abs/2401.08541
 
     Args:
-        input_dim: Dimensionality of the input features.
-        output_dim: Dimensionality of the output features.
-        hidden_dim: Dimensionality of the hidden layer.
-        num_blocks: Number of blocks in the prediction head.
-        mlp_ratio: Ratio used to determine the hidden layer size in the MLP.
-        proj_drop: Dropout rate for the projection layer.
-        act_layer: Activation layer to use.
-        norm_layer: Normalization layer to use.
-        mlp_layer: MLP layer to use.
-        block_fn: Block function to use for the prediction head.
+        input_dim: 
+            Dimensionality of the input features.
+        output_dim: 
+            Dimensionality of the output features.
+        hidden_dim: 
+            Dimensionality of the hidden layer.
+        num_blocks: 
+            Number of blocks in the prediction head.
+        mlp_ratio: 
+            Ratio used to determine the hidden layer size in the MLP.
+        proj_drop: 
+            Dropout rate for the projection layer.
+        act_layer: 
+            Activation layer to use.
+        norm_layer: 
+            Normalization layer to use.
+        mlp_layer: 
+            MLP layer to use.
+        block_fn: 
+            Block function to use for the prediction head.
     """
 
     def __init__(
@@ -115,7 +133,8 @@ def forward(self, x: Tensor) -> Tensor:
         """Forward pass of the AIMPredictionHead.
 
         Args:
-            x: Input tensor.
+            x: 
+                Input tensor.
 
         Returns:
             Output tensor after processing through the prediction head blocks.

From d4fc1bf7ff168bf22b1142f2c144b4d9b20610c2 Mon Sep 17 00:00:00 2001
From: Ayush Shri <ayushshrivastava585@gmail.com>
Date: Fri, 18 Oct 2024 17:49:52 +0530
Subject: [PATCH 08/10] format_1

---
 lightly/models/modules/center.py     |  8 +++---
 lightly/models/modules/heads_timm.py | 38 ++++++++++++++--------------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/lightly/models/modules/center.py b/lightly/models/modules/center.py
index 05b3b2e0a..21be7b9bf 100644
--- a/lightly/models/modules/center.py
+++ b/lightly/models/modules/center.py
@@ -54,8 +54,8 @@ def __init__(
 
     @property
     def value(self) -> Tensor:
-        """The current value of the center. 
-        
+        """The current value of the center.
+
         Use this property to do any operations based on the center.
         """
         return self.center
@@ -85,9 +85,9 @@ def center_mean(x: Tensor, dim: Tuple[int, ...]) -> Tensor:
     """Returns the center of the input tensor by calculating the mean.
 
     Args:
-        x: 
+        x:
             Input tensor.
-        dim: 
+        dim:
             Dimensions along which the mean is calculated.
 
     Returns:
diff --git a/lightly/models/modules/heads_timm.py b/lightly/models/modules/heads_timm.py
index 13c48a258..44ae6a4ca 100644
--- a/lightly/models/modules/heads_timm.py
+++ b/lightly/models/modules/heads_timm.py
@@ -11,19 +11,19 @@ class AIMPredictionHeadBlock(Module):
     - [0]: AIM, 2024, https://arxiv.org/abs/2401.08541
 
     Args:
-        input_dim: 
+        input_dim:
             Dimensionality of the input features.
-        output_dim: 
+        output_dim:
             Dimensionality of the output features.
-        mlp_ratio: 
+        mlp_ratio:
             Ratio used to determine the hidden layer size in the MLP.
-        proj_drop: 
+        proj_drop:
             Dropout rate for the projection layer.
-        act_layer: 
+        act_layer:
             Activation layer to use.
-        norm_layer: 
+        norm_layer:
             Normalization layer to use.
-        mlp_layer: 
+        mlp_layer:
             MLP layer to use.
     """
 
@@ -54,7 +54,7 @@ def forward(self, x: Tensor) -> Tensor:
         """Forward pass of the AIMPredictionHeadBlock.
 
         Args:
-            x: 
+            x:
                 Input tensor.
 
         Returns:
@@ -70,25 +70,25 @@ class AIMPredictionHead(Module):
     - [0]: AIM, 2024, https://arxiv.org/abs/2401.08541
 
     Args:
-        input_dim: 
+        input_dim:
             Dimensionality of the input features.
-        output_dim: 
+        output_dim:
             Dimensionality of the output features.
-        hidden_dim: 
+        hidden_dim:
             Dimensionality of the hidden layer.
-        num_blocks: 
+        num_blocks:
             Number of blocks in the prediction head.
-        mlp_ratio: 
+        mlp_ratio:
             Ratio used to determine the hidden layer size in the MLP.
-        proj_drop: 
+        proj_drop:
             Dropout rate for the projection layer.
-        act_layer: 
+        act_layer:
             Activation layer to use.
-        norm_layer: 
+        norm_layer:
             Normalization layer to use.
-        mlp_layer: 
+        mlp_layer:
             MLP layer to use.
-        block_fn: 
+        block_fn:
             Block function to use for the prediction head.
     """
 
@@ -133,7 +133,7 @@ def forward(self, x: Tensor) -> Tensor:
         """Forward pass of the AIMPredictionHead.
 
         Args:
-            x: 
+            x:
                 Input tensor.
 
         Returns:

From 166a46a4e560312d2a4a811cef146a995f012631 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Fri, 18 Oct 2024 16:01:43 +0200
Subject: [PATCH 09/10] move SMoGPrototypes docstring to correct indent

---
 lightly/models/modules/heads.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lightly/models/modules/heads.py b/lightly/models/modules/heads.py
index 58d7b78f3..5deaf50a9 100644
--- a/lightly/models/modules/heads.py
+++ b/lightly/models/modules/heads.py
@@ -350,14 +350,14 @@ def __init__(
 
 
 class SMoGPrototypes(nn.Module):
-        """SMoG prototypes module for synchronous momentum grouping.
+    """SMoG prototypes module for synchronous momentum grouping.
 
-        Args:
-            group_features: 
-                Tensor containing the group features.
-            beta: 
-                Beta parameter for momentum updating.
-        """
+    Args:
+        group_features: 
+            Tensor containing the group features.
+        beta: 
+            Beta parameter for momentum updating.
+    """
 
     def __init__(
         self,

From 5b5c8be046a9c05cb219a2901f21eb18296146cd Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Fri, 18 Oct 2024 16:04:10 +0200
Subject: [PATCH 10/10] format heads.py

---
 lightly/models/modules/heads.py | 96 +++++++++++++++++----------------
 1 file changed, 49 insertions(+), 47 deletions(-)

diff --git a/lightly/models/modules/heads.py b/lightly/models/modules/heads.py
index 5deaf50a9..1bfb7ccbb 100644
--- a/lightly/models/modules/heads.py
+++ b/lightly/models/modules/heads.py
@@ -58,7 +58,7 @@ def forward(self, x: Tensor) -> Tensor:
         """Computes one forward pass through the projection head.
 
         Args:
-            x: 
+            x:
                 Input of shape bsz x num_ftrs.
         """
         projection: Tensor = self.layers(x)
@@ -81,11 +81,11 @@ def __init__(
         """Initializes the BarlowTwinsProjectionHead with the specified dimensions.
 
         Args:
-            input_dim: 
+            input_dim:
                 Dimensionality of the input features.
-            hidden_dim: 
+            hidden_dim:
                 Dimensionality of the hidden layers.
-            output_dim: 
+            output_dim:
                 Dimensionality of the output features.
         """
         super(BarlowTwinsProjectionHead, self).__init__(
@@ -106,6 +106,7 @@ class BYOLProjectionHead(ProjectionHead):
 
     - [0]: BYOL, 2020, https://arxiv.org/abs/2006.07733
     """
+
     def __init__(
         self, input_dim: int = 2048, hidden_dim: int = 4096, output_dim: int = 256
     ):
@@ -165,15 +166,15 @@ def __init__(
         """Initialize a new MoCoProjectionHead instance.
 
         Args:
-            input_dim: 
+            input_dim:
                 Number of input dimensions.
-            hidden_dim: 
+            hidden_dim:
                 Number of hidden dimensions (2048 for v2, 4096 for v3).
-            output_dim: 
+            output_dim:
                 Number of output dimensions (128 for v2, 256 for v3).
-            num_layers: 
+            num_layers:
                 Number of hidden layers (2 for v2, 3 for v3).
-            batch_norm: 
+            batch_norm:
                 Whether or not to use batch norms. (False for v2, True for v3).
         """
         layers: List[Tuple[int, int, Optional[nn.Module], Optional[nn.Module]]] = []
@@ -223,11 +224,11 @@ def __init__(
         """Initializes the NNCLRProjectionHead with the specified dimensions.
 
         Args:
-            input_dim: 
+            input_dim:
                 Dimensionality of the input features.
-            hidden_dim: 
+            hidden_dim:
                 Dimensionality of the hidden layers.
-            output_dim: 
+            output_dim:
                 Dimensionality of the output features.
         """
         super(NNCLRProjectionHead, self).__init__(
@@ -248,6 +249,7 @@ class NNCLRPredictionHead(ProjectionHead):
 
     - [0]: NNCLR, 2021, https://arxiv.org/abs/2104.14548
     """
+
     def __init__(
         self, input_dim: int = 256, hidden_dim: int = 4096, output_dim: int = 256
     ):
@@ -282,15 +284,15 @@ def __init__(
         """Initialize a new SimCLRProjectionHead instance.
 
         Args:
-            input_dim: 
+            input_dim:
                 Number of input dimensions.
-            hidden_dim: 
+            hidden_dim:
                 Number of hidden dimensions.
-            output_dim: 
+            output_dim:
                 Number of output dimensions.
-            num_layers: 
+            num_layers:
                 Number of hidden layers (2 for v1, 3+ for v2).
-            batch_norm: 
+            batch_norm:
                 Whether or not to use batch norms.
         """
         layers: List[Tuple[int, int, Optional[nn.Module], Optional[nn.Module]]] = []
@@ -353,9 +355,9 @@ class SMoGPrototypes(nn.Module):
     """SMoG prototypes module for synchronous momentum grouping.
 
     Args:
-        group_features: 
+        group_features:
             Tensor containing the group features.
-        beta: 
+        beta:
             Beta parameter for momentum updating.
     """
 
@@ -394,7 +396,7 @@ def get_updated_group_features(self, x: Tensor) -> Tensor:
         """Performs the synchronous momentum update of the group vectors.
 
         Args:
-            x: 
+            x:
                 Tensor of shape bsz x dim.
 
         Returns:
@@ -444,11 +446,11 @@ def __init__(
         """Initializes the SMoGProjectionHead with the specified dimensions.
 
         Args:
-            input_dim: 
+            input_dim:
                 Dimensionality of the input features.
-            hidden_dim: 
+            hidden_dim:
                 Dimensionality of the hidden layers.
-            output_dim: 
+            output_dim:
                 Dimensionality of the output features.
         """
         super(SMoGProjectionHead, self).__init__(
@@ -480,11 +482,11 @@ def __init__(
         """Initializes the SMoGPredictionHead with the specified dimensions.
 
         Args:
-            input_dim: 
+            input_dim:
                 Dimensionality of the input features.
-            hidden_dim: 
+            hidden_dim:
                 Dimensionality of the hidden layers.
-            output_dim: 
+            output_dim:
                 Dimensionality of the output features.
         """
 
@@ -511,11 +513,11 @@ def __init__(
         """Initializes the SimSiamPredictionHead with the specified dimensions.
 
         Args:
-            input_dim: 
+            input_dim:
                 Dimensionality of the input features.
-            hidden_dim: 
+            hidden_dim:
                 Dimensionality of the hidden layers.
-            output_dim: 
+            output_dim:
                 Dimensionality of the output features.
         """
         super(SimSiamPredictionHead, self).__init__(
@@ -579,7 +581,7 @@ def __init__(
     ):
         """Intializes the SwaVPrototypes module with the specified parameters"""
         super(SwaVPrototypes, self).__init__()
-        
+
         # Default to a list of 1 if n_prototypes is an int.
         self.n_prototypes = (
             n_prototypes if isinstance(n_prototypes, list) else [n_prototypes]
@@ -596,13 +598,13 @@ def forward(
         """Forward pass of the SwaVPrototypes module.
 
         Args:
-            x: 
+            x:
                 Input tensor.
-            step: 
+            step:
                 Current training step.
 
         Returns:
-            The logits after passing through the prototype heads. Returns a single tensor 
+            The logits after passing through the prototype heads. Returns a single tensor
             if there's one prototype head, otherwise returns a list of tensors.
         """
         self._freeze_prototypes_if_required(step)
@@ -742,23 +744,23 @@ def __init__(
         """Initialize a new MMCRProjectionHead instance.
 
         Args:
-            input_dim: 
+            input_dim:
                 Number of input dimensions.
-            hidden_dim: 
+            hidden_dim:
                 Number of hidden dimensions.
-            output_dim: 
+            output_dim:
                 Number of output dimensions.
-            num_layers: 
+            num_layers:
                 Number of hidden layers.
-            batch_norm: 
+            batch_norm:
                 Whether or not to use batch norms.
-            use_bias: 
+            use_bias:
                 Whether or not to use bias in the linear layers.
         """
         layers: List[
             Tuple[int, int, Optional[nn.Module], Optional[nn.Module], bool]
         ] = []
-        
+
         # Add the first layer
         layers.append(
             (
@@ -769,7 +771,7 @@ def __init__(
                 use_bias,
             )
         )
-        
+
         # Add the hidden layers
         for _ in range(num_layers - 1):
             layers.append(
@@ -781,7 +783,7 @@ def __init__(
                     use_bias,
                 )
             )
-        
+
         # Add the output layer
         layers.append((hidden_dim, output_dim, None, None, use_bias))
         super().__init__(layers)
@@ -792,7 +794,7 @@ class MSNProjectionHead(ProjectionHead):
 
     "We train with a 3-layer projection head with output dimension 256 and
     batch-normalization at the input and hidden layers.." [0]
-    
+
     Code inspired by [1].
 
     - [0]: Masked Siamese Networks, 2022, https://arxiv.org/abs/2204.07141
@@ -865,13 +867,13 @@ def __init__(
         """Initializes the VICRegProjectionHead with the specified dimensions.
 
         Args:
-            input_dim: 
+            input_dim:
                 Dimensionality of the input features.
-            hidden_dim: 
+            hidden_dim:
                 Dimensionality of the hidden layers.
-            output_dim: 
+            output_dim:
                 Dimensionality of the output features.
-            num_layers: 
+            num_layers:
                 Number of layers in the projection head.
         """
         hidden_layers = [