Clean up the nested if-else

huggingface · Jan 29, 2025 · 172cfd7 · 172cfd7
1 parent 8c69579
commit 172cfd7
Show file tree

Hide file tree

Showing 32 changed files with 320 additions and 352 deletions.
diff --git a/src/transformers/models/bloom/modeling_bloom.py b/src/transformers/models/bloom/modeling_bloom.py
@@ -1123,18 +1123,17 @@ def forward(
             raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
         if self.config.pad_token_id is None:
             last_non_pad_token = -1
+        elif input_ids is not None:
+            # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
+            non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
+            token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
+            last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
         else:
-            if input_ids is not None:
-                # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
-                non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
-                token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
-                last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
-            else:
-                last_non_pad_token = -1
-                logger.warning_once(
-                    f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
-                    "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
-                )
+            last_non_pad_token = -1
+            logger.warning_once(
+                f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
+                "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
+            )
 
         pooled_logits = logits[torch.arange(batch_size, device=logits.device), last_non_pad_token]
 

diff --git a/src/transformers/models/ctrl/modeling_ctrl.py b/src/transformers/models/ctrl/modeling_ctrl.py
@@ -791,18 +791,17 @@ def forward(
             raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
         if self.config.pad_token_id is None:
             last_non_pad_token = -1
+        elif input_ids is not None:
+            # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
+            non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
+            token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
+            last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
         else:
-            if input_ids is not None:
-                # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
-                non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
-                token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
-                last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
-            else:
-                last_non_pad_token = -1
-                logger.warning_once(
-                    f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
-                    "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
-                )
+            last_non_pad_token = -1
+            logger.warning_once(
+                f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
+                "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
+            )
 
         pooled_logits = logits[torch.arange(batch_size, device=logits.device), last_non_pad_token]
 

diff --git a/src/transformers/models/diffllama/modeling_diffllama.py b/src/transformers/models/diffllama/modeling_diffllama.py
@@ -1217,18 +1217,17 @@ def forward(
             raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
         if self.config.pad_token_id is None:
             last_non_pad_token = -1
+        elif input_ids is not None:
+            # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
+            non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
+            token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
+            last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
         else:
-            if input_ids is not None:
-                # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
-                non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
-                token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
-                last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
-            else:
-                last_non_pad_token = -1
-                logger.warning_once(
-                    f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
-                    "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
-                )
+            last_non_pad_token = -1
+            logger.warning_once(
+                f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
+                "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
+            )
 
         pooled_logits = logits[torch.arange(batch_size, device=logits.device), last_non_pad_token]
 

diff --git a/src/transformers/models/falcon/modeling_falcon.py b/src/transformers/models/falcon/modeling_falcon.py
@@ -1360,18 +1360,17 @@ def forward(
             raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
         if self.config.pad_token_id is None:
             last_non_pad_token = -1
+        elif input_ids is not None:
+            # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
+            non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
+            token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
+            last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
         else:
-            if input_ids is not None:
-                # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
-                non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
-                token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
-                last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
-            else:
-                last_non_pad_token = -1
-                logger.warning_once(
-                    f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
-                    "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
-                )
+            last_non_pad_token = -1
+            logger.warning_once(
+                f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
+                "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
+            )
 
         pooled_logits = logits[torch.arange(batch_size, device=logits.device), last_non_pad_token]
 

diff --git a/src/transformers/models/gemma/modeling_gemma.py b/src/transformers/models/gemma/modeling_gemma.py
@@ -949,18 +949,17 @@ def forward(
             raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
         if self.config.pad_token_id is None:
             last_non_pad_token = -1
+        elif input_ids is not None:
+            # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
+            non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
+            token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
+            last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
         else:
-            if input_ids is not None:
-                # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
-                non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
-                token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
-                last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
-            else:
-                last_non_pad_token = -1
-                logger.warning_once(
-                    f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
-                    "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
-                )
+            last_non_pad_token = -1
+            logger.warning_once(
+                f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
+                "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
+            )
 
         pooled_logits = logits[torch.arange(batch_size, device=logits.device), last_non_pad_token]
 

diff --git a/src/transformers/models/gemma2/modeling_gemma2.py b/src/transformers/models/gemma2/modeling_gemma2.py
@@ -1039,18 +1039,17 @@ def forward(
             raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
         if self.config.pad_token_id is None:
             last_non_pad_token = -1
+        elif input_ids is not None:
+            # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
+            non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
+            token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
+            last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
         else:
-            if input_ids is not None:
-                # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
-                non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
-                token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
-                last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
-            else:
-                last_non_pad_token = -1
-                logger.warning_once(
-                    f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
-                    "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
-                )
+            last_non_pad_token = -1
+            logger.warning_once(
+                f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
+                "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
+            )
 
         pooled_logits = logits[torch.arange(batch_size, device=logits.device), last_non_pad_token]
 

diff --git a/src/transformers/models/glm/modeling_glm.py b/src/transformers/models/glm/modeling_glm.py
@@ -959,18 +959,17 @@ def forward(
             raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
         if self.config.pad_token_id is None:
             last_non_pad_token = -1
+        elif input_ids is not None:
+            # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
+            non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
+            token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
+            last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
         else:
-            if input_ids is not None:
-                # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
-                non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
-                token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
-                last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
-            else:
-                last_non_pad_token = -1
-                logger.warning_once(
-                    f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
-                    "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
-                )
+            last_non_pad_token = -1
+            logger.warning_once(
+                f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
+                "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
+            )
 
         pooled_logits = logits[torch.arange(batch_size, device=logits.device), last_non_pad_token]
 

diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py
@@ -1397,18 +1397,17 @@ def forward(
             raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
         if self.config.pad_token_id is None:
             last_non_pad_token = -1
+        elif input_ids is not None:
+            # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
+            non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
+            token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
+            last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
         else:
-            if input_ids is not None:
-                # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
-                non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
-                token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
-                last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
-            else:
-                last_non_pad_token = -1
-                logger.warning_once(
-                    f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
-                    "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
-                )
+            last_non_pad_token = -1
+            logger.warning_once(
+                f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
+                "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
+            )
 
         pooled_logits = logits[torch.arange(batch_size, device=logits.device), last_non_pad_token]
 

diff --git a/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py b/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
@@ -1284,18 +1284,17 @@ def forward(
             raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
         if self.config.pad_token_id is None:
             last_non_pad_token = -1
+        elif input_ids is not None:
+            # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
+            non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
+            token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
+            last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
         else:
-            if input_ids is not None:
-                # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
-                non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
-                token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
-                last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
-            else:
-                last_non_pad_token = -1
-                logger.warning_once(
-                    f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
-                    "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
-                )
+            last_non_pad_token = -1
+            logger.warning_once(
+                f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
+                "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
+            )
 
         pooled_logits = logits[torch.arange(batch_size, device=logits.device), last_non_pad_token]
 

diff --git a/src/transformers/models/gpt_neo/modeling_gpt_neo.py b/src/transformers/models/gpt_neo/modeling_gpt_neo.py
@@ -1102,18 +1102,17 @@ def forward(
             raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
         if self.config.pad_token_id is None:
             last_non_pad_token = -1
+        elif input_ids is not None:
+            # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
+            non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
+            token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
+            last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
         else:
-            if input_ids is not None:
-                # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
-                non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
-                token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
-                last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
-            else:
-                last_non_pad_token = -1
-                logger.warning_once(
-                    f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
-                    "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
-                )
+            last_non_pad_token = -1
+            logger.warning_once(
+                f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
+                "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
+            )
 
         pooled_logits = logits[torch.arange(batch_size, device=logits.device), last_non_pad_token]
 

diff --git a/src/transformers/models/gpt_neox/modeling_gpt_neox.py b/src/transformers/models/gpt_neox/modeling_gpt_neox.py
@@ -1207,18 +1207,17 @@ def forward(
             raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
         if self.config.pad_token_id is None:
             last_non_pad_token = -1
+        elif input_ids is not None:
+            # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
+            non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
+            token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
+            last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
         else:
-            if input_ids is not None:
-                # To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
-                non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
-                token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
-                last_non_pad_token = (token_indices * non_pad_mask).max(-1).values
-            else:
-                last_non_pad_token = -1
-                logger.warning_once(
-                    f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
-                    "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
-                )
+            last_non_pad_token = -1
+            logger.warning_once(
+                f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
+                "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
+            )
 
         pooled_logits = logits[torch.arange(batch_size, device=logits.device), last_non_pad_token]