From 5a1af71aeaec1fc9048771cb54f5806f4190fe38 Mon Sep 17 00:00:00 2001
From: Michael Benayoun <mickbenayoun@gmail.com>
Date: Fri, 10 Nov 2023 18:08:59 +0100
Subject: [PATCH] Experiment with new mixin class

---
 tests/distributed/test_model_parallelization.py | 17 +----------------
 tests/distributed/test_training.py              |  7 +++++--
 tests/distributed/test_utils.py                 |  4 ++--
 tests/inference/inference_utils.py              |  4 ++--
 tests/test_cache_utils.py                       |  2 +-
 tests/test_examples.py                          |  2 +-
 tests/test_generate.py                          |  2 +-
 tests/test_runner.py                            |  4 ++--
 tests/test_trainers.py                          |  2 +-
 tests/utils.py                                  |  9 +++++----
 10 files changed, 21 insertions(+), 32 deletions(-)

diff --git a/tests/distributed/test_model_parallelization.py b/tests/distributed/test_model_parallelization.py
index 8c523bef9..efbd4257a 100644
--- a/tests/distributed/test_model_parallelization.py
+++ b/tests/distributed/test_model_parallelization.py
@@ -23,7 +23,6 @@
 
 import pytest
 import torch
-from huggingface_hub import HfFolder
 from parameterized import parameterized
 from transformers.models.auto.modeling_auto import (
     MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES,
@@ -47,8 +46,6 @@
 
 from optimum.neuron.utils.cache_utils import (
     get_num_neuron_cores,
-    load_custom_cache_repo_name_from_hf_home,
-    set_custom_cache_repo_name_in_hf_home,
     set_neuron_cache_path,
 )
 from optimum.neuron.utils.import_utils import is_neuronx_available
@@ -151,25 +148,13 @@ def _generate_supported_model_class_names(
 
 
 @is_trainium_test
-class ModelParallelizationTestCase(TestCase, TrainiumTestMixin):
+class ModelParallelizationTestCase(TrainiumTestMixin, TestCase):
     OUTPUTS_TO_IGNORE = {
         # It might not match in the sequence parallel setting because of mistmatched shapes.
         # Since these outputs are not needed during training, we do not want to perform an expensive gather for them.
         "encoder_last_hidden_state",
     }
 
-    @classmethod
-    def setUpClass(cls) -> None:
-        cls._token = HfFolder.get_token()
-        cls._cache_repo = load_custom_cache_repo_name_from_hf_home()
-
-    @classmethod
-    def tearDownClass(cls) -> None:
-        if cls._token is not None:
-            HfFolder.save_token(cls._token)
-        if cls._cache_repo is not None:
-            set_custom_cache_repo_name_in_hf_home(cls._cache_repo)
-
     def _check_output(self, name: str, original_output, output, lazy_load: bool):
         assert type(original_output) is type(output)
         if isinstance(original_output, (tuple, list, set)):
diff --git a/tests/distributed/test_training.py b/tests/distributed/test_training.py
index fb35e4571..f0bfc7351 100644
--- a/tests/distributed/test_training.py
+++ b/tests/distributed/test_training.py
@@ -22,6 +22,7 @@
 from huggingface_hub import HfFolder
 
 from optimum.neuron.utils.cache_utils import (
+    delete_custom_cache_repo_name_from_hf_home,
     load_custom_cache_repo_name_from_hf_home,
     set_custom_cache_repo_name_in_hf_home,
 )
@@ -37,7 +38,7 @@ class DistributedTrainingTestCase(TestCase):
     CACHE_REPO_NAME = "optimum-internal-testing/optimum-neuron-cache-for-testing"
 
     @classmethod
-    def setUpClass(cls) -> None:
+    def setUpClass(cls):
         orig_token = HfFolder.get_token()
         orig_cache_repo = load_custom_cache_repo_name_from_hf_home()
         ci_token = os.environ.get("HF_TOKEN_OPTIMUM_NEURON_CI", None)
@@ -49,12 +50,14 @@ def setUpClass(cls) -> None:
         cls._env = dict(os.environ)
 
     @classmethod
-    def tearDownClass(cls) -> None:
+    def tearDownClass(cls):
         os.environ = cls._env
         if cls._token is not None:
             HfFolder.save_token(cls._token)
         if cls._cache_repo is not None:
             set_custom_cache_repo_name_in_hf_home(cls._cache_repo)
+        else:
+            delete_custom_cache_repo_name_from_hf_home()
 
     def test_tp_save_and_resume_from_checkpoint(self):
         num_cores = 8
diff --git a/tests/distributed/test_utils.py b/tests/distributed/test_utils.py
index 4057dabe7..1d450f202 100644
--- a/tests/distributed/test_utils.py
+++ b/tests/distributed/test_utils.py
@@ -15,10 +15,10 @@
 """Tests for distributed utility functions and classes."""
 
 import copy
-import unittest
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import Literal, Union
+from unittest import TestCase
 
 import torch
 from safetensors.torch import save_file
@@ -63,7 +63,7 @@ def test_load_tensor_for_weight():
 
 
 @is_trainium_test
-class ParallelUtilsTestCase(unittest.TestCase, TrainiumTestMixin):
+class ParallelUtilsTestCase(TrainiumTestMixin, TestCase):
     TP_GROUP = 0
     TP_SIZE = 8
     TP_RANK = 0
diff --git a/tests/inference/inference_utils.py b/tests/inference/inference_utils.py
index c1e63ff10..c7e59630b 100644
--- a/tests/inference/inference_utils.py
+++ b/tests/inference/inference_utils.py
@@ -59,7 +59,7 @@ class NeuronModelIntegrationTestMixin(unittest.TestCase):
     STATIC_INPUTS_SHAPES = {}
 
     @classmethod
-    def setUpClass(cls) -> None:
+    def setUpClass(cls):
         if os.environ.get("HF_TOKEN_OPTIMUM_NEURON_CI", None) is not None:
             token = os.environ.get("HF_TOKEN_OPTIMUM_NEURON_CI")
             HfFolder.save_token(token)
@@ -80,7 +80,7 @@ def setUpClass(cls) -> None:
         neuron_model.push_to_hub(model_dir, repository_id=cls.neuron_model_id, use_auth_token=cls._token)
 
     @classmethod
-    def tearDownClass(cls) -> None:
+    def tearDownClass(cls):
         if cls._token is not None:
             HfFolder.save_token(cls._token)
         if cls.local_model_path is not None:
diff --git a/tests/test_cache_utils.py b/tests/test_cache_utils.py
index 7110d2745..13cbc297c 100644
--- a/tests/test_cache_utils.py
+++ b/tests/test_cache_utils.py
@@ -62,7 +62,7 @@
 
 
 @is_trainium_test
-class NeuronUtilsTestCase(TestCase, TrainiumTestMixin):
+class NeuronUtilsTestCase(TrainiumTestMixin, TestCase):
     def tearDown(self):
         # Cleaning the Neuron compiler flags to avoid breaking other tests.
         os.environ["NEURON_CC_FLAGS"] = ""
diff --git a/tests/test_examples.py b/tests/test_examples.py
index cc8119c35..943fe0276 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -430,7 +430,7 @@ def test(self):
         return test
 
 
-class ExampleTesterBase(TestCase, TrainiumTestMixin):
+class ExampleTesterBase(TrainiumTestMixin, TestCase):
     """
     Base example tester class.
     """
diff --git a/tests/test_generate.py b/tests/test_generate.py
index 6afcdc46f..0dd5c7b02 100644
--- a/tests/test_generate.py
+++ b/tests/test_generate.py
@@ -87,7 +87,7 @@ def _test_generative_decoding(
 ]
 
 
-class GenerateTestCase(TestCase, TrainiumTestMixin):
+class GenerateTestCase(TrainiumTestMixin, TestCase):
     @pytest.mark.skip("Remove once generate fix (#262) has been merged.")
     @is_trainium_test
     @parameterized.expand(GREEDY_TESTDATA)
diff --git a/tests/test_runner.py b/tests/test_runner.py
index 9ff52e59d..6c405e686 100644
--- a/tests/test_runner.py
+++ b/tests/test_runner.py
@@ -52,7 +52,7 @@ class TestExampleRunner(TestCase):
     CACHE_REPO_NAME = "optimum-internal-testing/optimum-neuron-cache-for-testing"
 
     @classmethod
-    def setUpClass(cls) -> None:
+    def setUpClass(cls):
         cls._token = HfFolder.get_token()
         cls._cache_repo = load_custom_cache_repo_name_from_hf_home()
         cls._env = dict(os.environ)
@@ -64,7 +64,7 @@ def setUpClass(cls) -> None:
             raise RuntimeError("Please specify the token via the HF_TOKEN_OPTIMUM_NEURON_CI environment variable.")
 
     @classmethod
-    def tearDownClass(cls) -> None:
+    def tearDownClass(cls):
         os.environ = cls._env
         if cls._token is not None:
             HfFolder.save_token(cls._token)
diff --git a/tests/test_trainers.py b/tests/test_trainers.py
index 93a122314..09a5e1671 100644
--- a/tests/test_trainers.py
+++ b/tests/test_trainers.py
@@ -315,7 +315,7 @@ def test_train_and_eval_multiple_workers(self):
 
 
 @is_trainium_test
-class NeuronTrainerTestCase(TestCase, TrainiumTestMixin):
+class NeuronTrainerTestCase(TrainiumTestMixin, TestCase):
     def _test_training_with_fsdp_mode(self, fsdp_mode: str):
         model_name = "prajjwal1/bert-tiny"
         task_name = "sst2"
diff --git a/tests/utils.py b/tests/utils.py
index b9b606984..7b9e17be2 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -130,13 +130,13 @@ def create_tiny_pretrained_model(
 
 class TrainiumTestMixin:
     @classmethod
-    def setUpClass(cls) -> None:
+    def setUpClass(cls):
         cls._token = HfFolder.get_token()
         cls._cache_repo = load_custom_cache_repo_name_from_hf_home()
         cls._env = dict(os.environ)
 
     @classmethod
-    def tearDownClass(cls) -> None:
+    def tearDownClass(cls):
         os.environ = cls._env
         if cls._token is not None:
             HfFolder.save_token(cls._token)
@@ -145,6 +145,7 @@ def tearDownClass(cls) -> None:
         else:
             delete_custom_cache_repo_name_from_hf_home()
 
+
 class StagingTestMixin:
     CUSTOM_CACHE_REPO_NAME = "optimum-neuron-cache-testing"
     CUSTOM_CACHE_REPO = f"{USER}/{CUSTOM_CACHE_REPO_NAME}"
@@ -160,7 +161,7 @@ def set_hf_hub_token(cls, token: str) -> str:
         return orig_token
 
     @classmethod
-    def setUpClass(cls) -> None:
+    def setUpClass(cls):
         cls._staging_token = TOKEN
         cls._token = cls.set_hf_hub_token(TOKEN)
         cls._custom_cache_repo_name = load_custom_cache_repo_name_from_hf_home()
@@ -178,7 +179,7 @@ def setUpClass(cls) -> None:
         cls.visited_num_linears = set()
 
     @classmethod
-    def tearDownClass(cls) -> None:
+    def tearDownClass(cls):
         delete_repo(repo_id=cls.CUSTOM_CACHE_REPO, repo_type="model")
         delete_repo(repo_id=cls.CUSTOM_PRIVATE_CACHE_REPO, repo_type="model")
         if cls._token: