Merge branch 'main' into v1.1-release

huggingface · Jan 13, 2025 · a68b056 · a68b056
2 parents f650560 + b961717
commit a68b056
Show file tree

Hide file tree

Showing 10 changed files with 22 additions and 17 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -18,7 +18,7 @@ jobs:
     name: Run unit tests
     strategy:
       matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
+        python-version: ['3.9', '3.10', '3.11', '3.12']
         os: [ubuntu-latest, windows-latest]
         requirements: ['.[tests]', '.[compat_tests]']
       fail-fast: false

diff --git a/docs/source/en/conceptual_guides/setfit.mdx b/docs/source/en/conceptual_guides/setfit.mdx
@@ -11,7 +11,7 @@ The first phase has one primary goal: finetune a sentence transformer embedding
 
 However, models that are good at Semantic Textual Similarity (STS) are not necessarily immediately good at *our* classification task. For example, according to an embedding model, the sentence of 1) `"He biked to work."` will be much more similar to 2) `"He drove his car to work."` than to 3) `"Peter decided to take the bicycle to the beach party!"`. But if our classification task involves classifying texts into transportation modes, then we want our embedding model to place sentences 1 and 3 closely together, and 2 further away.
 
-To do so, we can finetune the chosen sentence transformer embedding model. The goal here is to nudge the model to use its pretrained knowledge in a different way that better aligns with our classification task, rather than making the completely forget what it has learned. 
+To do so, we can finetune the chosen sentence transformer embedding model. The goal here is to nudge the model to use its pretrained knowledge in a different way that better aligns with our classification task, rather than making it completely forget what it has learned. 
 
 For finetuning, SetFit uses **contrastive learning**. This training approach involves creating **positive and negative pairs** of sentences. A sentence pair will be positive if both of the sentences are of the same class, and negative otherwise. For example, in the case of binary "positive"-"negative" sentiment analysis, `("The movie was awesome", "I loved it")` is a positive pair, and `("The movie was awesome", "It was quite disappointing")` is a negative pair.
 
@@ -25,4 +25,4 @@ Once the sentence transformer embedding model has been finetuned for our task at
 
 Unlike with the first phase, training the classifier is done from scratch and using the labeled samples directly, rather than using pairs. By default, the classifier is a simple **logistic regression** classifier from scikit-learn. First, all training sentences are fed through the now-finetuned sentence transformer embedding model, and then the sentence embeddings and labels are used to fit the logistic regression classifier. The result is a strong and efficient classifier. 
 
-Using these two parts, SetFit models are efficient, performant and easy to train, even on CPU-only devices.
+Using these two parts, SetFit models are efficient, performant and easy to train, even on CPU-only devices.
diff --git a/docs/source/en/installation.mdx b/docs/source/en/installation.mdx
@@ -1,7 +1,7 @@
 
 # Installation
 
-Before you start, you'll need to setup your environment and install the appropriate packages. 🤗 SetFit is tested on **Python 3.8+**.
+Before you start, you'll need to setup your environment and install the appropriate packages. 🤗 SetFit is tested on **Python 3.9+**.
 
 ## pip
 

diff --git a/setup.py b/setup.py
@@ -15,7 +15,7 @@
     "sentence-transformers[train]>=3",
     "transformers>=4.41.0",
     "evaluate>=0.3.0",
-    "huggingface_hub>=0.23.0",
+    "huggingface_hub>=0.24.0",
     "scikit-learn",
     "packaging",
 ]
@@ -79,7 +79,6 @@ def combine_requirements(base_keys):
         "License :: OSI Approved :: Apache Software License",
         "Operating System :: OS Independent",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",

diff --git a/src/setfit/span/trainer.py b/src/setfit/span/trainer.py
@@ -291,8 +291,8 @@ def push_to_hub(self, repo_id: str, polarity_repo_id: Optional[str] = None, **kw
                 Configuration object to be saved alongside the model weights.
             commit_message (`str`, *optional*):
                 Message to commit while pushing.
-            private (`bool`, *optional*, defaults to `False`):
-                Whether the repository created should be private.
+            private (`bool`, *optional*):
+                Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
             api_endpoint (`str`, *optional*):
                 The API endpoint to use when pushing the model to the hub.
             token (`str`, *optional*):

diff --git a/src/setfit/trainer.py b/src/setfit/trainer.py
@@ -4,12 +4,14 @@
 import evaluate
 import torch
 from datasets import Dataset, DatasetDict
+from packaging.version import parse as parse_version
 from sentence_transformers import SentenceTransformerTrainer, losses
 from sentence_transformers.losses.BatchHardTripletLoss import BatchHardTripletLossDistanceFunction
 from sentence_transformers.model_card import ModelCardCallback as STModelCardCallback
 from sentence_transformers.training_args import BatchSamplers
 from sklearn.preprocessing import LabelEncoder
 from torch import nn
+from transformers import __version__ as transformers_version
 from transformers.integrations import CodeCarbonCallback
 from transformers.trainer_callback import IntervalStrategy, TrainerCallback
 from transformers.trainer_utils import HPSearchBackend, default_compute_objective, number_of_arguments, set_seed
@@ -72,7 +74,11 @@ def overwritten_call_event(self, event, args, state, control, **kwargs):
                     model=self.setfit_model,
                     st_model=self.model,
                     st_args=args,
-                    tokenizer=self.tokenizer,
+                    tokenizer=(
+                        self.processing_class
+                        if parse_version(transformers_version) >= parse_version("4.46.0")
+                        else self.tokenizer
+                    ),
                     optimizer=self.optimizer,
                     lr_scheduler=self.lr_scheduler,
                     train_dataloader=self.train_dataloader,
@@ -156,9 +162,9 @@ def _set_logs_prefix(self, logs_prefix: str) -> None:
         """
         self.logs_prefix = logs_prefix
 
-    def log(self, logs: Dict[str, float]) -> None:
+    def log(self, logs: Dict[str, float], *args, **kwargs) -> None:
         logs = {f"{self.logs_prefix}_{k}" if k == "loss" else k: v for k, v in logs.items()}
-        return super().log(logs)
+        return super().log(logs, *args, **kwargs)
 
     def evaluate(
         self,
@@ -792,8 +798,8 @@ def push_to_hub(self, repo_id: str, **kwargs) -> str:
                 Configuration object to be saved alongside the model weights.
             commit_message (`str`, *optional*):
                 Message to commit while pushing.
-            private (`bool`, *optional*, defaults to `False`):
-                Whether the repository created should be private.
+            private (`bool`, *optional*):
+                Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
             api_endpoint (`str`, *optional*):
                 The API endpoint to use when pushing the model to the hub.
             token (`str`, *optional*):

diff --git a/src/setfit/training_args.py b/src/setfit/training_args.py
@@ -243,8 +243,6 @@ def __post_init__(self) -> None:
 
         if self.report_to in (None, "all", ["all"]):
             self.report_to = get_available_reporting_integrations()
-        elif self.report_to in ("none", ["none"]):
-            self.report_to = []
         elif not isinstance(self.report_to, list):
             self.report_to = [self.report_to]
 

diff --git a/tests/span/test_model_card.py b/tests/span/test_model_card.py
@@ -26,6 +26,7 @@ def test_model_card(absa_dataset: Dataset, tmp_path: Path) -> None:
         logging_steps=1,
         max_steps=2,
         eval_strategy="steps",
+        save_strategy="no",
     )
     trainer = AbsaTrainer(
         model=model,

diff --git a/tests/test_model_card.py b/tests/test_model_card.py
@@ -36,6 +36,7 @@ def test_model_card(tmp_path: Path) -> None:
         logging_steps=1,
         max_steps=2,
         eval_strategy="steps",
+        save_strategy="no",
     )
     trainer = Trainer(
         model=model,

diff --git a/tests/test_training_args.py b/tests/test_training_args.py
@@ -64,9 +64,9 @@ def test_learning_rates(self):
 
     def test_report_to(self):
         args = TrainingArguments(report_to="none")
-        self.assertEqual(args.report_to, [])
+        self.assertEqual(args.report_to, ["none"])
         args = TrainingArguments(report_to=["none"])
-        self.assertEqual(args.report_to, [])
+        self.assertEqual(args.report_to, ["none"])
         args = TrainingArguments(report_to="hello")
         self.assertEqual(args.report_to, ["hello"])