Update colab notebook

ixlab · Jan 22, 2025 · 4c9c598 · 4c9c598
1 parent 79b013b
commit 4c9c598
Showing 1 changed file with 148 additions and 61 deletions.
diff --git a/misc/Colab_Vidformer.ipynb b/misc/Colab_Vidformer.ipynb
@@ -4,127 +4,204 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "N1tb10v9WSZJ"
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "1gI19RpsKVwl",
+    "outputId": "3acd26c0-90d1-4947-cff3-9d163924255b"
    },
    "outputs": [],
    "source": [
-    "!wget https://github.com/ixlab/vidformer/releases/download/v0.9.0/vidformer-cli-ubuntu22.04-amd64\n",
-    "!mv  vidformer-cli-ubuntu22.04-amd64 /usr/local/bin/vidformer-cli\n",
-    "!chmod +x /usr/local/bin/vidformer-cli\n",
-    "!apt update && apt upgrade -y && apt install -y libopencv-dev libfdk-aac-dev\n",
-    "!pip3 install vidformer==0.9.0 --upgrade"
+    "!pip3 install vidformer supervision ultralytics"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "_M0bd0brLXw2"
+    "id": "ZcayluwkjWF2"
    },
    "source": [
-    "## [Vidformer](https://github.com/ixlab/vidformer) colab demo"
+    "# [Vidformer](https://github.com/ixlab/vidformer): Video Data Transformation\n",
+    "\n",
+    "Vidformer uses a `cv2`-compatability layer allowing `import vidformer.cv2 as cv2` conversion:"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {
-    "id": "FYQvFyR7WwJ5"
+    "id": "dfvXyItHKbOS"
    },
-   "source": []
+   "outputs": [],
+   "source": [
+    "import vidformer as vf\n",
+    "import vidformer.cv2 as cv2\n",
+    "import vidformer.supervision as vf_sv\n",
+    "import math\n",
+    "\n",
+    "# Use the api.vidformer.org guest account\n",
+    "# The guest account has few permissions (can't access other videos) and low limits\n",
+    "# To get around this:\n",
+    "#     1) Ask for a regular account\n",
+    "#     2) Use the locally-running YrdenServer instead\n",
+    "#     3) Self-host\n",
+    "server = vf.IgniServer(\"https://api.vidformer.org\", api_key=\"VF_GUEST\")\n",
+    "cv2.set_server(server)"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "JyRdCYYPWsel"
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 34
+    },
+    "id": "579M7jwiKgu2",
+    "outputId": "3006cbd1-334e-4c65-c103-e92fb0ffcc4e"
    },
    "outputs": [],
    "source": [
-    "import vidformer as vf\n",
-    "from fractions import Fraction\n",
-    "\n",
-    "server = vf.YrdenServer()\n",
-    "tos = vf.YrdenSource(server, \"tos_720p\", \"https://f.dominik.win/data/dve2/tos_720p.mp4\", stream=0)\n",
-    "\n",
-    "print(tos.fmt())\n",
-    "\n",
-    "domain = [Fraction(i, 24) for i in range(24 * 30)]\n",
-    "def render(t: Fraction, i: int):\n",
-    "    clip_start_point = Fraction(5 * 60, 1) # start at 5 * 60 seconds\n",
-    "    return tos[t + clip_start_point]\n",
+    "cap = cv2.VideoCapture(\n",
+    "    \"https://vidformer-sample-media.nyc3.digitaloceanspaces.com/tos_720p.mp4\"\n",
+    ")\n",
+    "assert cap.isOpened()\n",
+    "width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)\n",
+    "height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)\n",
+    "fps = cap.get(cv2.CAP_PROP_FPS)\n",
+    "out = cv2.VideoWriter(None, cv2.VideoWriter_fourcc(*\"mp4v\"), 24, [1280, 720])\n",
     "\n",
-    "spec = vf.YrdenSpec(domain, render, tos.fmt())\n",
-    "spec.save(server, \"my-clip.mp4\")"
+    "# Play the video in the notebook cell (outside jupyter add method=\"link\")\n",
+    "cv2.vidplay(out)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "ZWgu_US1YC_C"
+    "id": "BWHooZspKnhw"
    },
    "outputs": [],
    "source": [
-    "from IPython.display import Video\n",
-    "Video(\"my-clip.mp4\", embed=True)"
+    "radius = 100\n",
+    "center_x, center_y = 300, 300\n",
+    "speed = 2 * math.pi / 100\n",
+    "i = 0\n",
+    "while True:\n",
+    "    ret, frame = cap.read()\n",
+    "    if not ret:\n",
+    "        break\n",
+    "    angle = i * speed\n",
+    "    text_x = int(center_x + radius * math.cos(angle))\n",
+    "    text_y = int(center_y + radius * math.sin(angle))\n",
+    "    cv2.putText(\n",
+    "        frame,\n",
+    "        \"Hello, world!\",\n",
+    "        (text_x, text_y),\n",
+    "        cv2.FONT_HERSHEY_SIMPLEX,\n",
+    "        1,\n",
+    "        (0, 255, 0),\n",
+    "        2,\n",
+    "        cv2.LINE_AA,\n",
+    "    )\n",
+    "    out.write(frame)\n",
+    "    i += 1\n",
+    "out.release()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "xfi0dYKSMsp-"
+    "id": "uLvbF0ryjdZx"
    },
    "source": [
-    "# Or you can use the cv2 frontend; just change `import cv2` to `import vidformer.cv2 as cv2`:"
+    "## Vidformer for CV Annotation with supervision"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "LiNdbdD-MrMf"
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 382
+    },
+    "id": "51o-2E-2lnoo",
+    "outputId": "71738d9b-93f1-463a-f56c-543d4fed8dbb"
    },
    "outputs": [],
    "source": [
-    "import cv2\n",
-    "# import vidformer.cv2 as cv2 # Use me instead!\n",
+    "# # Run Yolov8m on the video\n",
+    "# import cv2 as ocv_cv2\n",
+    "# import supervision as sv\n",
+    "# from ultralytics import YOLO\n",
     "\n",
-    "video_url = \"https://f.dominik.win/data/dve2/tos_720p.mp4\"\n",
-    "cap = cv2.VideoCapture(video_url)\n",
-    "assert cap.isOpened()\n",
+    "# model = YOLO(\"yolov8m.pt\")\n",
     "\n",
-    "start_time = 5 * 60\n",
-    "clip_duration = 5\n",
-    "fps = cap.get(cv2.CAP_PROP_FPS)\n",
-    "frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))\n",
-    "frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))\n",
+    "# ocv_cap = ocv_cv2.VideoCapture(\n",
+    "#     \"https://vidformer-sample-media.nyc3.digitaloceanspaces.com/tos_720p.mp4\"\n",
+    "# )\n",
+    "# assert ocv_cap.isOpened()\n",
     "\n",
-    "start_frame = int(start_time * fps)\n",
-    "total_frames = int(clip_duration * fps)\n",
+    "# detections = []\n",
+    "# while True:\n",
+    "#   ret, frame = ocv_cap.read()\n",
+    "#   if not ret:\n",
+    "#     break\n",
+    "#   detections.append(sv.Detections.from_ultralytics(model(frame)[0]))\n",
     "\n",
-    "cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)\n",
     "\n",
-    "fourcc = cv2.VideoWriter_fourcc(*'mp4v')\n",
-    "out = cv2.VideoWriter('output_clip.mp4', fourcc, fps, (frame_width, frame_height))\n",
+    "# Load pre-computed detections\n",
+    "import pickle\n",
+    "import urllib\n",
     "\n",
-    "for _ in range(total_frames):\n",
-    "    ret, frame = cap.read()\n",
-    "    if not ret:\n",
-    "        print(\"Error: Could not read frame.\")\n",
-    "        break\n",
-    "    out.write(frame)\n",
+    "with urllib.request.urlopen(\n",
+    "    \"https://vidformer-sample-media.nyc3.cdn.digitaloceanspaces.com/tos_720p-yolov8l-detections.pkl\"\n",
+    ") as response:\n",
+    "    detections = pickle.load(response)\n",
     "\n",
-    "cap.release()\n",
-    "out.release()\n",
-    "print(\"Clip saved as 'output_clip.mp4'.\")"
+    "out = cv2.VideoWriter(None, cv2.VideoWriter_fourcc(*\"mp4v\"), 24, [1280, 720])\n",
+    "cv2.vidplay(out)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "uJaEbrHPLS2w"
+    "id": "g93941WT6W83"
+   },
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "Il7LiPRJ6KHg"
    },
+   "outputs": [],
    "source": [
-    "Want to go further? See https://ixlab.github.io/vidformer/getting-started.html"
+    "cap.set(cv2.CAP_PROP_POS_FRAMES, 0)\n",
+    "\n",
+    "box_anot = vf_sv.BoxAnnotator()\n",
+    "label_anot = vf_sv.LabelAnnotator()\n",
+    "i = 0\n",
+    "while True:\n",
+    "    ret, frame = cap.read()\n",
+    "    if not ret:\n",
+    "        break\n",
+    "\n",
+    "    det = detections[i]\n",
+    "    det = det[det.confidence > 0.5]\n",
+    "\n",
+    "    labels = [\n",
+    "        f\"{class_name} {confidence:.2f}\"\n",
+    "        for class_name, confidence in zip(det[\"class_name\"], det.confidence)\n",
+    "    ]\n",
+    "    frame = box_anot.annotate(frame.copy(), det)\n",
+    "    frame = label_anot.annotate(frame.copy(), det, labels)\n",
+    "\n",
+    "    out.write(frame)\n",
+    "    i += 1\n",
+    "out.release()"
    ]
   }
  ],
@@ -133,13 +210,23 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
    "name": "python3"
   },
   "language_info": {
-   "name": "python"
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.2"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 4
 }