Fix OWLV2 annotator (#40)

* Added padding & truncation to the processing * Format code * fix: change labels dtype to int64 * format: black --------- Co-authored-by: Jan Cuhel <[email protected]> Co-authored-by: Nikita Sokovnin <[email protected]>
luxonis · Mar 2, 2024 · 342945e · 342945e
1 parent 9784695
commit 342945e
Showing 1 changed file with 8 additions and 2 deletions.
diff --git a/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py
@@ -86,7 +86,11 @@ def _generate_annotations(
         # resize the images to the model's input size
         images = [images[i].resize((960, 960)) for i in range(n)]
         inputs = self.processor(
-            text=batched_prompts, images=images, return_tensors="pt"
+            text=batched_prompts,
+            images=images,
+            return_tensors="pt",
+            padding="max_length",
+            truncation=True,
         ).to(self.device)
         with torch.no_grad():
             outputs = self.model(**inputs)
@@ -128,7 +132,9 @@ def _get_annotations(
             boxes[:, [0, 2]] = img_dim - boxes[:, [2, 0]]
 
         if synonym_dict is not None:
-            labels = torch.tensor([synonym_dict_rev[label.item()] for label in labels])
+            labels = torch.tensor(
+                [synonym_dict_rev[label.item()] for label in labels], dtype=torch.int64
+            )
 
         return boxes, scores, labels