diff --git a/vlmeval/vlm/llava/llava.py b/vlmeval/vlm/llava/llava.py index 2099d02f..3f350b13 100644 --- a/vlmeval/vlm/llava/llava.py +++ b/vlmeval/vlm/llava/llava.py @@ -815,12 +815,12 @@ def generate_inner_image(self, message, dataset=None): { "role": "user", "content": [ - {"type": "text", "text": content.split("\n", 1)[-1]}, - {"type": "image"}, + {"type": "text", "text": content}, ], } ] prompt = self.processor.apply_chat_template(conversation, add_generation_prompt=True) + print(prompt) inputs = self.processor(images=images, text=prompt, return_tensors="pt").to('cuda', torch.float16) output = self.model.generate(**inputs, max_new_tokens=512)