Merge branch 'main' into generate_forward_compile_fix

huggingface · Jan 28, 2025 · 16804ea · 16804ea
2 parents 7777cf1 + b764c20
commit 16804ea
Show file tree

Hide file tree

Showing 416 changed files with 13,134 additions and 2,068 deletions.
diff --git a/.github/workflows/self-comment-ci.yml b/.github/workflows/self-comment-ci.yml
@@ -30,7 +30,7 @@ jobs:
     runs-on: ubuntu-22.04
     name: Get PR number
     # For security: only allow team members to run
-    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
+    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
     outputs:
       PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
     steps:

diff --git a/docker/transformers-pytorch-amd-gpu/Dockerfile b/docker/transformers-pytorch-amd-gpu/Dockerfile
@@ -1,4 +1,4 @@
-FROM rocm/dev-ubuntu-22.04:6.3
+FROM rocm/dev-ubuntu-22.04:6.2.4
 LABEL maintainer="Hugging Face"
 
 ARG DEBIAN_FRONTEND=noninteractive
@@ -8,11 +8,9 @@ RUN apt update && \
     apt clean && \
     rm -rf /var/lib/apt/lists/*
 
-RUN export PATH="${PATH:+${PATH}:}~/opt/rocm/bin"
-
 RUN python3 -m pip install --no-cache-dir --upgrade pip numpy
 
-RUN python3 -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.3/
+RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2
 
 RUN python3 -m pip install --no-cache-dir --upgrade importlib-metadata setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0"
 

diff --git a/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile
@@ -1,11 +1,11 @@
-FROM rocm/dev-ubuntu-22.04:5.6
+FROM rocm/dev-ubuntu-22.04:6.2.4
 LABEL maintainer="Hugging Face"
 
 ARG DEBIAN_FRONTEND=noninteractive
-ARG PYTORCH='2.1.1'
-ARG TORCH_VISION='0.16.1'
-ARG TORCH_AUDIO='2.1.1'
-ARG ROCM='5.6'
+ARG PYTORCH='2.5.1'
+ARG TORCH_VISION='0.20.0'
+ARG TORCH_AUDIO='2.5.0'
+ARG ROCM='6.2'
 
 RUN apt update && \
     apt install -y --no-install-recommends \
@@ -45,4 +45,4 @@ RUN cd transformers && python3 setup.py develop
 RUN python3 -c "from deepspeed.launcher.runner import main"
 
 # Remove nvml as it is not compatible with ROCm
-RUN python3 -m pip uninstall py3nvml pynvml -y
+RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
@@ -448,6 +448,8 @@
         title: Granite
       - local: model_doc/granitemoe
         title: GraniteMoe
+      - local: model_doc/granitevision
+        title: GraniteVision
       - local: model_doc/helium
         title: Helium
       - local: model_doc/herbert
@@ -624,6 +626,8 @@
         title: YOSO
       - local: model_doc/zamba
         title: Zamba
+      - local: model_doc/zamba2
+        title: Zamba2
       title: Text models
     - isExpanded: false
       sections:
@@ -928,6 +932,8 @@
         title: Pix2Struct
       - local: model_doc/pixtral
         title: Pixtral
+      - local: model_doc/qwen2_5_vl
+        title: Qwen2.5-VL
       - local: model_doc/qwen2_audio
         title: Qwen2Audio
       - local: model_doc/qwen2_vl

diff --git a/docs/source/en/agents_advanced.md b/docs/source/en/agents_advanced.md
@@ -162,7 +162,7 @@ agent.run(
 improved_prompt could be "A bright blue space suit wearing rabbit, on the surface of the moon, under a bright orange sunset, with the Earth visible in the background"
 
 Now that I have improved the prompt, I can use the image generator tool to generate an image based on this prompt.
->>> Agent is executing the code below:
+=== Agent is executing the code below:
 image = image_generator(prompt="A bright blue space suit wearing rabbit, on the surface of the moon, under a bright orange sunset, with the Earth visible in the background")
 final_answer(image)
 ```

diff --git a/docs/source/en/chat_templating.md b/docs/source/en/chat_templating.md
diff --git a/docs/source/en/generation_strategies.md b/docs/source/en/generation_strategies.md
@@ -41,6 +41,13 @@ This guide describes:
 * common decoding strategies and their main parameters
 * saving and sharing custom generation configurations with your fine-tuned model on 🤗 Hub
 
+<Tip>
+
+`generate()` is a critical component of our [chat CLI](quicktour#chat-with-text-generation-models).
+You can apply the learnings of this guide there as well.
+
+</Tip>
+
 ## Default text generation configuration
 
 A decoding strategy for a model is defined in its generation configuration. When using pre-trained models for inference
@@ -224,7 +231,7 @@ to check if the text is machine-generated (outputs `True` for machine-generated
 >>> detector = WatermarkDetector(model_config=model.config, device="cpu", watermarking_config=watermarking_config)
 >>> detection_out = detector(out, return_dict=True)
 >>> detection_out.prediction
-array([True, True])
+array([ True,  True])
 ```
 
 
@@ -262,7 +269,7 @@ dimension you can act upon, in addition to selecting a decoding strategy. Popula
 >>> model = AutoModelForCausalLM.from_pretrained(checkpoint)
 >>> outputs = model.generate(**inputs)
 >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-['I look forward to seeing you all again!\n\n\n\n\n\n\n\n\n\n\n']
+['I look forward to seeing you all again!\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n']
 ```
 
 ### Contrastive search
@@ -438,7 +445,7 @@ To enable assisted decoding, set the `assistant_model` argument with a model.
 >>> assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
 >>> outputs = model.generate(**inputs, assistant_model=assistant_model)
 >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
+['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a glass of wine.']
 ```
 
 <Tip>
@@ -454,7 +461,7 @@ If you're using a `pipeline` object, all you need to do is to pass the assistant
 ...     model="meta-llama/Llama-3.1-8B",
 ...     assistant_model="meta-llama/Llama-3.2-1B",  # This extra line is all that's needed, also works with UAD
 ...     torch_dtype=torch.bfloat16
->>> )
+... )
 >>> pipe_output = pipe("Once upon a time, ", max_new_tokens=50, do_sample=False)
 >>> pipe_output[0]["generated_text"]
 'Once upon a time, 3D printing was a niche technology that was only'
@@ -481,7 +488,7 @@ just like in multinomial sampling. However, in assisted decoding, reducing the t
 >>> assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
 >>> outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.5)
 >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-['Alice and Bob, a couple of friends of mine, who are both in the same office as']
+['Alice and Bob are two people who are very different, but they are both very good at what they do. Alice']
 ```
 
 We recommend to install `scikit-learn` library to enhance the candidate generation strategy and achieve additional speedup.
@@ -511,7 +518,7 @@ to ensure the new tokens include the correct prompt suffix.
 >>> assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
 >>> outputs = model.generate(**inputs, assistant_model=assistant_model, tokenizer=tokenizer, assistant_tokenizer=assistant_tokenizer)
 >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
+['Alice and Bob are playing a game. Alice has a set of $n$ integers $a_1, a']
 ```
 
 #### Prompt Lookup
@@ -540,7 +547,7 @@ If the model you're using was trained to do early exit, you can pass
 >>> model = AutoModelForCausalLM.from_pretrained(checkpoint)
 >>> outputs = model.generate(**inputs, assistant_early_exit=4, do_sample=False, max_new_tokens=20)
 >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
+['Alice and Bob are playing a game. Alice has a set of $n$ integers $a_1, a']
 ```
 
 ### DoLa Decoding
@@ -564,10 +571,9 @@ See the following examples for DoLa decoding with the 32-layer LLaMA-7B model.
 >>> import torch
 >>> from accelerate.test_utils.testing import get_backend
 
->>> tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b")
->>> model = AutoModelForCausalLM.from_pretrained("huggyllama/llama-7b", torch_dtype=torch.float16)
 >>> device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
->>> model.to(device)
+>>> tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b")
+>>> model = AutoModelForCausalLM.from_pretrained("huggyllama/llama-7b", torch_dtype=torch.float16).to(device)
 >>> set_seed(42)
 
 >>> text = "On what date was the Declaration of Independence officially signed?"
@@ -586,7 +592,7 @@ See the following examples for DoLa decoding with the 32-layer LLaMA-7B model.
 # DoLa decoding with contrasting specific layers (layers 28 and 30)
 >>> dola_custom_output = model.generate(**inputs, do_sample=False, max_new_tokens=50, dola_layers=[28,30], repetition_penalty=1.2)
 >>> tokenizer.batch_decode(dola_custom_output[:, inputs.input_ids.shape[-1]:], skip_special_tokens=True)
-['\nIt was officially signed on 2 August 1776, when 56 members of the Second Continental Congress, representing the original 13 American colonies, voted unanimously for the resolution for independence. The 2']
+['\nIn 1891, when he was 54 years old, John Jacob Astor founded his empire. He opened a one-man business and spent the next 27 years working 10-hour days. When']
 ```
 
 #### Understanding the `dola_layers` argument

diff --git a/docs/source/en/index.md b/docs/source/en/index.md
@@ -285,6 +285,7 @@ Flax), PyTorch, and/or TensorFlow.
 |                        [PVTv2](model_doc/pvt_v2)                         |       ✅        |         ❌         |      ❌      |
 |                       [QDQBert](model_doc/qdqbert)                       |       ✅        |         ❌         |      ❌      |
 |                         [Qwen2](model_doc/qwen2)                         |       ✅        |         ❌         |      ❌      |
+|                    [Qwen2_5_VL](model_doc/qwen2_5_vl)                    |       ✅        |         ❌         |      ❌      |
 |                   [Qwen2Audio](model_doc/qwen2_audio)                    |       ✅        |         ❌         |      ❌      |
 |                     [Qwen2MoE](model_doc/qwen2_moe)                      |       ✅        |         ❌         |      ❌      |
 |                      [Qwen2VL](model_doc/qwen2_vl)                       |       ✅        |         ❌         |      ❌      |
@@ -384,6 +385,7 @@ Flax), PyTorch, and/or TensorFlow.
 |                         [YOLOS](model_doc/yolos)                         |       ✅        |         ❌         |      ❌      |
 |                          [YOSO](model_doc/yoso)                          |       ✅        |         ❌         |      ❌      |
 |                         [Zamba](model_doc/zamba)                         |       ✅        |         ❌         |      ❌      |
+|                        [Zamba2](model_doc/zamba2)                        |       ✅        |         ❌         |      ❌      |
 |                      [ZoeDepth](model_doc/zoedepth)                      |       ✅        |         ❌         |      ❌      |
 
 <!-- End table-->
diff --git a/docs/source/en/installation.md b/docs/source/en/installation.md
@@ -32,12 +32,32 @@ Install 🤗 Transformers for whichever deep learning library you're working wit
 
 You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, take a look at this [guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/). A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies.
 
-Now you're ready to install 🤗 Transformers with the following command:
+Create a virtual environment with [uv](https://docs.astral.sh/uv/) (refer to [Installation](https://docs.astral.sh/uv/getting-started/installation/) for installation instructions), a fast Rust-based Python package and project manager.
+
+```bash
+uv venv my-env
+source my-env/bin/activate
+```
+
+Now you're ready to install 🤗 Transformers with pip or uv.
+
+<hfoptions id="install">
+<hfoption id="uv">
+
+```bash
+uv pip install transformers
+```
+
+</hfoption>
+<hfoption id="pip">
 
 ```bash
 pip install transformers
 ```
 
+</hfoption>
+</hfoptions>
+
 For GPU acceleration, install the appropriate CUDA drivers for [PyTorch](https://pytorch.org/get-started/locally) and TensorFlow(https://www.tensorflow.org/install/pip).
 
 Run the command below to check if your system detects an NVIDIA GPU.