brycedrennan · brycedrennan · Dec 15, 2023 · Dec 5, 2023 · Dec 13, 2023
diff --git a/imaginairy/cli/imagine.py b/imaginairy/cli/imagine.py
@@ -59,6 +59,7 @@
             "inpaint",
             "details",
             "colorize",
+            "qrcode",
         ]
     ),
     help="how the control image is used as signal",

diff --git a/imaginairy/config.py b/imaginairy/config.py
@@ -276,6 +276,13 @@ class ControlConfig:
         config_path="configs/control-net-v15.yaml",
         weights_location="https://huggingface.co/ioclab/control_v1p_sd15_brightness/resolve/8509361eb1ba89c03839040ed8c75e5f11bbd9c5/diffusion_pytorch_model.safetensors",
     ),
+    ControlConfig(
+        name="qrcode",
+        control_type="qrcode",
+        config_path="configs/control-net-v15.yaml",
+        weights_location="https://huggingface.co/monster-labs/control_v1p_sd15_qrcode_monster/resolve/4a946e610f670c4cd6cf46b8641fca190e4f56c4/diffusion_pytorch_model.safetensors",
+        aliases=["qrcode"],
+    ),
 ]
 
 CONTROL_CONFIG_SHORTCUTS: dict[str, ControlConfig] = {}

diff --git a/imaginairy/img_processors/control_modes.py b/imaginairy/img_processors/control_modes.py
@@ -239,6 +239,64 @@ def noop(img: "Tensor") -> "Tensor":
 
 FunctionType = Union["Callable[[Tensor, Tensor], Tensor]", "Callable[[Tensor], Tensor]"]
 
+
+def adaptive_threshold_binarize(img: "Tensor") -> "Tensor":
+    """
+    Use adaptive thresholding to binarize the image.
+
+    Using OpenCV for adaptive thresholding as it provides robust and efficient implementation.
+    The output tensor will have values between 0 and 1.
+    """
+    import cv2
+    import numpy as np
+    import torch
+
+    from imaginairy.utils import get_device
+
+    # img = img.to("cpu")
+    # img = img.to(get_device())
+
+    if img.dim() != 4:
+        raise ValueError("Input should be a 4d tensor")
+    if img.size(1) != 3:
+        raise ValueError("Input should have 3 channels")
+
+    if not torch.all((img >= -1) & (img <= 1)):
+        raise ValueError("All tensor values must be between -1 and 1")
+
+    normalized = (img + 1) / 2
+
+    # returns img if it is already grayscale
+    if torch.allclose(
+        normalized[:, 0, :, :], normalized[:, 1, :, :]
+    ) and torch.allclose(normalized[:, 1, :, :], normalized[:, 2, :, :]):
+        return normalized
+
+    # grayscale = normalized.mean(dim=1, keepdim=True)
+    grayscale = to_grayscale(img)
+    grayscale = grayscale[:, 0:1, :, :]
+
+    grayscale_np = grayscale.squeeze(1).numpy()
+
+    blockSize = 129
+    C = 2
+    for i in range(grayscale_np.shape[0]):
+        grayscale_np[i] = cv2.adaptiveThreshold(
+            (grayscale_np[i] * 255).astype(np.uint8),
+            255,
+            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+            cv2.THRESH_BINARY,
+            blockSize,
+            C,
+        )
+
+    grayscale_np = grayscale_np / 255
+
+    binary = torch.from_numpy(grayscale_np).unsqueeze(1).to(get_device()).float()
+
+    return binary.repeat(1, 3, 1, 1)
+
+
 CONTROL_MODES: Dict[str, FunctionType] = {
     "canny": create_canny_edges,
     "depth": create_depth_map,
@@ -252,4 +310,5 @@ def noop(img: "Tensor") -> "Tensor":
     "inpaint": inpaint_prep,
     # "details": noop,
     "colorize": to_grayscale,
+    "qrcode": adaptive_threshold_binarize,
 }
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -208,7 +208,7 @@ def pytest_sessionfinish(session, exitstatus):
 
     # Write updated, sorted list of node IDs to file
     with open(cuda_test_tracker_filepath, "w", newline="") as file:
-        writer = csv.writer(file)
+        writer = csv.writer(file, lineterminator="\n")
         for node_id in sorted(updated_node_ids):
             writer.writerow([node_id])
 

diff --git a/tests/data/swirl.jpeg b/tests/data/swirl.jpeg
diff --git a/tests/expected_output/test_control_images[qrcode-adaptive_threshold_binarize]_.png b/tests/expected_output/test_control_images[qrcode-adaptive_threshold_binarize]_.png
diff --git a/tests/expected_output/test_controlnet[qrcode]_.png b/tests/expected_output/test_controlnet[qrcode]_.png
diff --git a/tests/test_api.py b/tests/test_api.py
@@ -325,6 +325,13 @@ def test_controlnet(filename_base_for_outputs, control_mode):
             mode=control_mode,
             image=mask_image,
         )
+    elif control_mode == "qrcode":
+        prompt_text = "a fruit salad"
+        swirl_img = LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/swirl.jpeg")
+        control_input = ControlInput(
+            mode=control_mode,
+            image=swirl_img,
+        )
 
     prompt = ImaginePrompt(
         prompt_text,