Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Aria CI and testing #35674

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 43 additions & 13 deletions tests/models/aria/test_modeling_aria.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,9 @@ def tearDown(self):
@require_bitsandbytes
def test_small_model_integration_test(self):
# Let' s make sure we test the preprocessing to replace what is used
model = AriaForConditionalGeneration.from_pretrained("rhymes-ai/Aria", load_in_4bit=True)
model = AriaForConditionalGeneration.from_pretrained(
"rhymes-ai/Aria", revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
)

prompt = "<image>\nUSER: What are the things I should be cautious about when I visit this place?\nASSISTANT:"
image_file = "https://aria-vl.github.io/static/images/view.jpg"
Expand All @@ -342,9 +344,11 @@ def test_small_model_integration_test_llama_single(self):
# Let' s make sure we test the preprocessing to replace what is used
model_id = "rhymes-ai/Aria"

model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
model = AriaForConditionalGeneration.from_pretrained(
model_id, revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
)
processor = AutoProcessor.from_pretrained(model_id)

breakpoint()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
breakpoint()

prompt = "USER: <image>\nWhat are the things I should be cautious about when I visit this place? ASSISTANT:"
image_file = "https://aria-vl.github.io/static/images/view.jpg"
raw_image = Image.open(requests.get(image_file, stream=True).raw)
Expand All @@ -364,7 +368,9 @@ def test_small_model_integration_test_llama_batched(self):
# Let' s make sure we test the preprocessing to replace what is used
model_id = "rhymes-ai/Aria"

model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
model = AriaForConditionalGeneration.from_pretrained(
model_id, revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
)
processor = AutoProcessor.from_pretrained(model_id)

prompts = [
Expand All @@ -389,7 +395,9 @@ def test_small_model_integration_test_llama_batched(self):
@require_bitsandbytes
def test_small_model_integration_test_batch(self):
# Let' s make sure we test the preprocessing to replace what is used
model = AriaForConditionalGeneration.from_pretrained("rhymes-ai/Aria", load_in_4bit=True)
model = AriaForConditionalGeneration.from_pretrained(
"rhymes-ai/Aria", revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
)
# The first batch is longer in terms of text, but only has 1 image. The second batch will be padded in text, but the first will be padded because images take more space!.
prompts = [
"USER: <image>\nWhat are the things I should be cautious about when I visit this place? What should I bring with me?\nASSISTANT:",
Expand Down Expand Up @@ -418,7 +426,12 @@ def test_small_model_integration_test_llama_batched_regression(self):
model_id = "rhymes-ai/Aria"

# Multi-image & multi-prompt (e.g. 3 images and 2 prompts now fails with SDPA, this tests if "eager" works as before)
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True, attn_implementation="eager")
model = AriaForConditionalGeneration.from_pretrained(
model_id,
revision="6583f58908d092e52f348069485e64fef4867730",
load_in_4bit=True,
attn_implementation="eager",
)
processor = AutoProcessor.from_pretrained(model_id, pad_token="<pad>")

prompts = [
Expand All @@ -443,7 +456,9 @@ def test_small_model_integration_test_llama_batched_regression(self):
@require_torch
@require_vision
def test_batched_generation(self):
model = AriaForConditionalGeneration.from_pretrained("rhymes-ai/Aria", load_in_4bit=True)
model = AriaForConditionalGeneration.from_pretrained(
"rhymes-ai/Aria", revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
)

processor = AutoProcessor.from_pretrained("rhymes-ai/Aria")

Expand Down Expand Up @@ -481,7 +496,9 @@ def test_aria_index_error_bug(self):
# Please refer to that PR, or specifically https://github.com/huggingface/transformers/pull/28032#issuecomment-1860650043 for
# more details
model_id = "rhymes-ai/Aria"
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
model = AriaForConditionalGeneration.from_pretrained(
model_id, revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
)

processor = AutoProcessor.from_pretrained(model_id)

Expand All @@ -501,7 +518,9 @@ def test_aria_index_error_bug(self):
def test_aria_merge_inputs_error_bug(self):
# This is a reproducer of https://github.com/huggingface/transformers/pull/28333 and makes sure it does not happen anymore
model_id = "rhymes-ai/Aria"
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
model = AriaForConditionalGeneration.from_pretrained(
model_id, revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
)

# Simulate some user inputs
pixel_values = torch.randn(
Expand Down Expand Up @@ -556,7 +575,9 @@ def test_tokenizer_integration(self):
@require_bitsandbytes
def test_generation_no_images(self):
model_id = "rhymes-ai/Aria"
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
model = AriaForConditionalGeneration.from_pretrained(
model_id, revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
)
processor = AutoProcessor.from_pretrained(model_id)

# Prepare inputs with no images
Expand All @@ -569,7 +590,12 @@ def test_generation_no_images(self):
@require_bitsandbytes
def test_generation_siglip_backbone(self):
model_id = "rhymes-ai/Aria"
model = AriaForConditionalGeneration.from_pretrained(model_id, torch_dtype="float16", device_map=torch_device)
model = AriaForConditionalGeneration.from_pretrained(
model_id,
revision="6583f58908d092e52f348069485e64fef4867730",
torch_dtype="float16",
device_map=torch_device,
)
processor = AutoProcessor.from_pretrained(model_id)

# check processing with expansion of inputs (w/o expansion should work with any backbone)
Expand All @@ -594,7 +620,9 @@ def test_generation_siglip_backbone(self):
@require_bitsandbytes
def test_expansion_in_processing(self):
model_id = "rhymes-ai/Aria"
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
model = AriaForConditionalGeneration.from_pretrained(
model_id, revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
)
processor = AutoProcessor.from_pretrained(model_id)

prompt = "USER: <image>\nDescribe the image:\nASSISTANT:"
Expand Down Expand Up @@ -624,7 +652,9 @@ def test_expansion_in_processing(self):
@require_bitsandbytes
def test_pixtral(self):
model_id = "rhymes-ai/Aria"
model = AriaForConditionalGeneration.from_pretrained(model_id)
model = AriaForConditionalGeneration.from_pretrained(
model_id, revision="6583f58908d092e52f348069485e64fef4867730"
)
processor = AutoProcessor.from_pretrained(model_id)

IMG_URLS = [
Expand Down
Loading