diff --git a/api/Dockerfile b/api/Dockerfile index 0b09593613a..c3fc531d8f8 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -59,8 +59,6 @@ ENV PATH="/.venv/bin:$PATH" WORKDIR /api -COPY api/utils/fonts/SourceSansPro-Bold.ttf /usr/share/fonts/truetype/SourceSansPro-Bold.ttf - # Copy virtualenv from the builder image COPY --from=builder /.venv /.venv @@ -69,7 +67,6 @@ ARG AUDIOWAVEFORM_RELEASE=1.10.1 ARG AUDIOWAVEFORM_DEB=audiowaveform_${AUDIOWAVEFORM_RELEASE}-1-12_${BUILDARCH}.deb # - Install system packages needed for running Python dependencies -# - libexempi8: required for watermarking # - Create directory for dumping API logs # - apt --fix-broken install required to install missing dependencies for audiowaveform and audiowaveform itself # dpkg -i marks the dependencies for installation, apt-get installs them @@ -77,7 +74,6 @@ ARG AUDIOWAVEFORM_DEB=audiowaveform_${AUDIOWAVEFORM_RELEASE}-1-12_${BUILDARCH}.d RUN apt-get update \ && apt-get install -yqq --no-install-recommends \ curl \ - libexempi8 \ postgresql-client \ && curl -sLO https://github.com/bbc/audiowaveform/releases/download/${AUDIOWAVEFORM_RELEASE}/${AUDIOWAVEFORM_DEB} \ && (dpkg -i ${AUDIOWAVEFORM_DEB} || apt-get --fix-broken -y --no-install-recommends install) \ diff --git a/api/api/docs/image_docs.py b/api/api/docs/image_docs.py index b200120049c..7a01dc27fd3 100644 --- a/api/api/docs/image_docs.py +++ b/api/api/docs/image_docs.py @@ -141,11 +141,3 @@ }, eg=[image_oembed_curl], ) - -watermark = extend_schema( - deprecated=True, - responses={ - 401: AuthenticationFailed, - 404: NotFound, - }, -) diff --git a/api/api/serializers/image_serializers.py b/api/api/serializers/image_serializers.py index feb9a914f61..6b08058025c 100644 --- a/api/api/serializers/image_serializers.py +++ b/api/api/serializers/image_serializers.py @@ -181,14 +181,3 @@ def get_width(self, obj) -> int: def get_height(self, obj) -> int: return self.context.get("height", obj.height) - - -class WatermarkRequestSerializer(serializers.Serializer): - embed_metadata = serializers.BooleanField( - help_text="Whether to embed ccREL metadata via XMP.", default=True - ) - watermark = serializers.BooleanField( - help_text="Whether to draw a frame around the image with attribution" - " text at the bottom.", - default=True, - ) diff --git a/api/api/utils/ccrel.py b/api/api/utils/ccrel.py deleted file mode 100644 index e12dc58803f..00000000000 --- a/api/api/utils/ccrel.py +++ /dev/null @@ -1,78 +0,0 @@ -""" -Tools for embedding ccREL data into files using XMP. - -ccREL stands for Creative Commons Rights Expression Language. XMP stands for Extensible -Metadata Platform. - -This implementation is specifically for embedding ccREL inside of images, but it -could be extended to handle other types of content. - -For more information, see the [ccREL W3 standard](https://www.w3.org/Submission/ccREL/). -""" - -import io -import os -import uuid - -import libxmp -from libxmp.consts import XMP_NS_CC, XMP_NS_XMP, XMP_NS_XMP_Rights - - -def embed_xmp_bytes(image: io.BytesIO, work_properties): - """ - Embed ccREL metadata inside a file-like `io.BytesIO` object. - - For our purposes, we assume that the file is an image. - - :param image: A BytesIO representation of an image. - :param work_properties: A dictionary with keys 'license_url' and - 'attribution'. 'creator', and 'work_landing_page' are optional (but highly - recommended) - :return: An `io.BytesIO` object containing XMP metadata. - """ - - # libxmp only works with actual file locations on the disk. To work around - # this limitation, rather than embedding the metadata directly into the - # `io.BytesIO` object, we have to use a temporary file and then convert it - # back. - # https://github.com/python-xmp-toolkit/python-xmp-toolkit/issues/46 - filename = f"/tmp/{uuid.uuid4()}" - with open(filename, "w+b") as xmp_temp: - xmp_temp.write(image.getvalue()) - xmp_temp.flush() - xmpfile = libxmp.XMPFiles(file_path=xmp_temp.name, open_forupdate=True) - - # Set CC rights. - xmp = xmpfile.get_xmp() - xmp.register_namespace(XMP_NS_CC, "cc") - xmp.set_property(XMP_NS_CC, "license", work_properties["license_url"]) - if "creator" in work_properties: - if not xmp.does_property_exist(XMP_NS_CC, "attributionName"): - xmp.set_property( - XMP_NS_CC, "attributionName", work_properties["creator"] - ) - if "work_landing_page" in work_properties: - if not xmp.does_property_exist(XMP_NS_CC, "attributionURL"): - xmp.set_property( - XMP_NS_CC, "attributionURL", work_properties["work_landing_page"] - ) - xmp.register_namespace(XMP_NS_XMP, "xmp") - if "identifier" in work_properties: - if not xmp.does_property_exist(XMP_NS_XMP, "Identifier"): - xmp.set_property( - XMP_NS_XMP, "Identifier", work_properties["identifier"] - ) - # Set generic XMP rights. - xmp.register_namespace(XMP_NS_XMP_Rights, "xmpRights") - if not xmp.does_property_exist(XMP_NS_XMP_Rights, "XMP_NS_XMP_Rights"): - xmp.set_property_bool(XMP_NS_XMP_Rights, "Marked", True) - if not xmp.does_property_exist(XMP_NS_XMP_Rights, "UsageTerms"): - usage = work_properties["attribution"] - xmp.set_property(XMP_NS_XMP_Rights, "UsageTerms", usage) - xmpfile.put_xmp(xmp) - xmpfile.close_file() - - with open(filename, "r+b") as xmpfile: - file_with_xmp = io.BytesIO(xmpfile.read()) - os.remove(filename) - return file_with_xmp diff --git a/api/api/utils/watermark.py b/api/api/utils/watermark.py deleted file mode 100644 index fbad4fe35e5..00000000000 --- a/api/api/utils/watermark.py +++ /dev/null @@ -1,245 +0,0 @@ -import os -from enum import Flag, auto -from io import BytesIO -from textwrap import wrap - -from django.conf import settings -from rest_framework import status -from rest_framework.exceptions import APIException - -import requests -import structlog -from openverse_attribution.license import License -from PIL import Image, ImageDraw, ImageFont, UnidentifiedImageError - - -logger = structlog.get_logger(__name__) - - -BREAKPOINT_DIMENSION = 400 # 400px -MARGIN_RATIO = 0.04 # 4% -FONT_RATIO = 0.04 # 4% - -FRAME_COLOR = "#fff" # White frame -TEXT_COLOR = "#000" # Black text -HEADERS = { - "User-Agent": settings.OUTBOUND_USER_AGENT_TEMPLATE.format(purpose="Watermark") -} - - -class UpstreamWatermarkException(APIException): - status_code = status.HTTP_424_FAILED_DEPENDENCY - default_detail = ( - "Could not render watermarked image due to upstream provider error." - ) - default_code = "upstream_watermark_failure" - - -class Dimension(Flag): - """This enum represents the two dimensions of an image.""" - - HEIGHT = auto() - WIDTH = auto() - BOTH = HEIGHT | WIDTH - NONE = 0 - - -# Utils - - -def _smaller_dimension(width, height): - """ - Determine which image dimensions are below the breakpoint dimensions. - - :param width: the width of the image - :param height: the height of the image - :return: True if the image is small, False otherwise - """ - - smaller_dimension = Dimension.NONE - if width < BREAKPOINT_DIMENSION: - smaller_dimension = smaller_dimension | Dimension.WIDTH - if height < BREAKPOINT_DIMENSION: - smaller_dimension = smaller_dimension | Dimension.HEIGHT - return smaller_dimension - - -def _get_font_path(monospace=False): - """ - Return the path to the TTF font file. - - :param monospace: True for monospaced font, False for variable-width font - :return: the path to the TTF font file - """ - - font_name = "SourceCodePro-Bold.ttf" if monospace else "SourceSansPro-Bold.ttf" - font_path = os.path.join(os.path.dirname(__file__), "fonts", font_name) - - return font_path - - -def _fit_in_width(text, font, max_width): - """ - Break the given text so that it fits in the given space. - - :param text: the text to fit in the limited width - :param font: the font containing size and other info - :param max_width: the maximum width the text is allowed to take - :return: the fitted text - """ - - char_length = font.getlength("x") # x has the closest to average width - max_chars = int( - max_width // char_length - ) # Must be an integer to be used with `wrap` below - - text = "\n".join(["\n".join(wrap(line, max_chars)) for line in text.split("\n")]) - - return text - - -# Framing - - -def _create_frame(dimensions): - """ - Create a frame with the given dimensions. - - :param dimensions: a tuple containing the width and height of the frame - :return: a white frame with the given dimensions - """ - - return Image.new("RGB", dimensions, FRAME_COLOR) - - -def _frame_image(image, frame, left_margin, top_margin): - """ - Fix the image in the frame with the specified spacing. - - :param image: the image to frame - :param frame: the frame in which to fit the image - :param left_margin: the margin to the left of the image - :param top_margin: the margin to the top of the image - :return: the framed image - """ - - frame.paste(image, (left_margin, top_margin)) - return frame - - -# Attribution - - -def _get_attribution_height(text, font): - draw = ImageDraw.Draw(Image.new("RGB", (0, 0))) - _, _, _, height = draw.multiline_textbbox((0, 0), text, font) - return height - - -# Actions - - -def _open_image(url): - """ - Read an image from a URL and convert it into a PIL Image object. - - :param url: the URL from where to read the image - :return: the PIL image object with the EXIF data - """ - try: - response = requests.get(url, headers=HEADERS) - response.raise_for_status() - img_bytes = BytesIO(response.content) - img = Image.open(img_bytes) - except requests.exceptions.RequestException as e: - logger.error("Error requesting image", exc=e, exc_info=True) - raise UpstreamWatermarkException(f"{e}") - except UnidentifiedImageError as e: - logger.error("Error loading image data", exc=e, exc_info=True) - raise UpstreamWatermarkException(f"{e}") - - return img, img.getexif() - - -def _print_attribution_on_image(img: Image.Image, image_info): - """ - Add a frame around the image and put the attribution text on the bottom. - - :param img: the image to frame and attribute - :param image_info: the information about a particular image - :return: return the framed and attributed image - """ - - try: - lic = License(image_info["license"], image_info["license_version"]) - except ValueError: - return img - - width, height = img.size - smaller_dimension = _smaller_dimension(width, height) - - if smaller_dimension is Dimension.NONE: - margin = round(MARGIN_RATIO * min(width, height)) - font_size = round(FONT_RATIO * min(width, height)) - new_width = width - else: - margin = round(MARGIN_RATIO * BREAKPOINT_DIMENSION) - font_size = round(FONT_RATIO * BREAKPOINT_DIMENSION) - new_width = ( - BREAKPOINT_DIMENSION if Dimension.WIDTH in smaller_dimension else width - ) - - font = ImageFont.truetype(_get_font_path(), size=font_size) - - text = lic.get_attribution_text( - image_info["title"], - image_info["creator"], - url=False, - ) - text = _fit_in_width(text, font, new_width) - attribution_height = _get_attribution_height(text, font) - - frame_width = margin + new_width + margin - frame_height = margin + height + margin + attribution_height + margin - left_margin = (frame_width - width) // 2 - - frame = _create_frame( - ( - frame_width, - frame_height, - ) - ) - _frame_image(img, frame, left_margin, margin) - - draw = ImageDraw.Draw(frame) - text_position_x = margin - text_position_y = margin + height + margin - draw.text( - xy=( - text_position_x, - text_position_y, - ), - text=text, - font=font, - fill=TEXT_COLOR, - ) - - return frame - - -def watermark(image_url, info, draw_frame=True): - """ - Return a PIL Image with a watermark and embedded metadata. - - :param image_url: The URL of the image. - :param info: A dictionary with keys title, creator, license, and - license_version - :param draw_frame: Whether to draw an attribution frame. - :returns: A PIL Image and its EXIF data, if included. - """ - - img, exif = _open_image(image_url) - if not draw_frame: - return img, exif - frame = _print_attribution_on_image(img, info) - return frame, exif diff --git a/api/api/views/image_views.py b/api/api/views/image_views.py index 6b5fc22ebdb..0ccafd19200 100644 --- a/api/api/views/image_views.py +++ b/api/api/views/image_views.py @@ -1,10 +1,8 @@ import io from django.conf import settings -from django.http.response import FileResponse, HttpResponse from django.shortcuts import aget_object_or_404 from rest_framework.decorators import action -from rest_framework.exceptions import NotFound from rest_framework.response import Response from drf_spectacular.utils import extend_schema, extend_schema_view @@ -20,7 +18,6 @@ stats, ) from api.docs.image_docs import thumbnail as thumbnail_docs -from api.docs.image_docs import watermark as watermark_doc from api.models import Image from api.serializers.image_serializers import ( ImageReportRequestSerializer, @@ -28,11 +25,9 @@ ImageSerializer, OembedRequestSerializer, OembedSerializer, - WatermarkRequestSerializer, ) from api.utils import image_proxy from api.utils.aiohttp import get_aiohttp_session -from api.utils.watermark import UpstreamWatermarkException, watermark from api.views.media_views import MediaViewSet @@ -125,69 +120,6 @@ async def thumbnail(self, request, identifier): """Retrieve the scaled down and compressed thumbnail of the image.""" return await super().thumbnail(request) - @watermark_doc - @action(detail=True, url_path="watermark", url_name="watermark") - def watermark(self, request, *_, **__): # noqa: D401 - """ - Note that this endpoint is deprecated. - - --- - - 🚧 **TODO:** Document this. - """ - - if not settings.WATERMARK_ENABLED: - raise NotFound("The watermark feature is currently disabled.") - - params = WatermarkRequestSerializer(data=request.query_params) - params.is_valid(raise_exception=True) - - image = self.get_object() - image_url = image.url - - if image_url.endswith(".svg") or getattr(image, "filetype") == "svg": - raise UpstreamWatermarkException( - "Unsupported media type: SVG images are not supported for watermarking." - ) - - image_info = { - attr: getattr(image, attr) - for attr in ["title", "creator", "license", "license_version"] - } - - # Create the actual watermarked image. - watermarked, exif = watermark(image_url, image_info, params.data["watermark"]) - img_bytes = io.BytesIO() - self._save_wrapper(watermarked, exif, img_bytes) - - if params.data["embed_metadata"]: - # Embed ccREL metadata with XMP. - work_properties = { - "creator": image.creator, - "license_url": image.license_url, - "attribution": image.attribution, - "work_landing_page": image.foreign_landing_url, - "identifier": str(image.identifier), - } - - # Import inside a function to allow server run without Exempi library - import libxmp - - from api.utils import ccrel - - try: - with_xmp = ccrel.embed_xmp_bytes(img_bytes, work_properties) - return FileResponse(with_xmp, content_type="image/jpeg") - except (libxmp.XMPError, AttributeError): - # Just send the EXIF-ified file if libxmp fails to add metadata - response = HttpResponse(content_type="image/jpeg") - self._save_wrapper(watermarked, exif, response) - return response - else: - response = HttpResponse(img_bytes, content_type="image/jpeg") - self._save_wrapper(watermarked, exif, response) - return response - @report @action( detail=True, @@ -203,17 +135,3 @@ def report(self, request, identifier): """ return super().report(request, identifier) - - # Helper functions - - @staticmethod - def _save_wrapper(pil_img, exif_bytes, destination): - """Prevent PIL from crashing if ``exif_bytes`` is ``None``.""" - - if exif_bytes: - # Re-insert EXIF metadata - pil_img.save(destination, "jpeg", exif=exif_bytes) - else: - pil_img.save(destination, "jpeg") - - pil_img.close() diff --git a/api/conf/settings/misc.py b/api/conf/settings/misc.py index 22ca88759cd..2d980744e89 100644 --- a/api/conf/settings/misc.py +++ b/api/conf/settings/misc.py @@ -13,9 +13,6 @@ "ENABLE_FILTERED_INDEX_QUERIES", cast=bool, default=False ) -# Whether to enable the image watermark endpoint -WATERMARK_ENABLED = config("WATERMARK_ENABLED", default=False, cast=bool) - # Log full Elasticsearch response VERBOSE_ES_RESPONSE = config("DEBUG_SCORES", default=False, cast=bool) diff --git a/api/env.docker b/api/env.docker index e31d7a5cc23..57eda9874b0 100644 --- a/api/env.docker +++ b/api/env.docker @@ -22,6 +22,4 @@ SEMANTIC_VERSION=1.0.0 ELASTICSEARCH_URL=es -WATERMARK_ENABLED=True - IPYTHONDIR=/api/.ipython diff --git a/api/env.template b/api/env.template index 4021ed5f361..0c14ebb34aa 100644 --- a/api/env.template +++ b/api/env.template @@ -31,8 +31,6 @@ ENVIRONMENT=local SEMANTIC_VERSION=1.0.0 -#WATERMARK_ENABLED=False - #SETUP_ES=True #ELASTICSEARCH_URL=es #ELASTICSEARCH_PORT=9200 diff --git a/api/pdm.lock b/api/pdm.lock index 6dcfb15f18d..f76ef6bff3d 100644 --- a/api/pdm.lock +++ b/api/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev", "overrides", "test"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:4958f42c8951e1c07a6279084b470fd99787be84dc222aa78ff75980b54ebdef" +content_hash = "sha256:65555fe554ab1276e79c4a9347ff3b881562bf74e0fe6fd040c04031f089e2b8" [[metadata.targets]] requires_python = "==3.12.*" @@ -1639,29 +1639,6 @@ files = [ {file = "python_slugify-8.0.4-py2.py3-none-any.whl", hash = "sha256:276540b79961052b66b7d116620b36518847f52d5fd9e3a70164fc8c50faa6b8"}, ] -[[package]] -name = "python-xmp-toolkit" -version = "2.0.2" -summary = "Python XMP Toolkit for working with metadata." -groups = ["default"] -dependencies = [ - "pytz", -] -files = [ - {file = "python-xmp-toolkit-2.0.2.tar.gz", hash = "sha256:3a88431bb8222e9723da688b3ead50506f3ed3002749362845745dd995a7a68b"}, - {file = "python_xmp_toolkit-2.0.2-py3-none-any.whl", hash = "sha256:dad1c2dcf5392459ecf2d0e368a7bbcab099cac7ef9f75fa53612db4efbcda8a"}, -] - -[[package]] -name = "pytz" -version = "2024.1" -summary = "World timezone definitions, modern and historical" -groups = ["default"] -files = [ - {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, - {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, -] - [[package]] name = "pyyaml" version = "6.0.2" diff --git a/api/pyproject.toml b/api/pyproject.toml index 8c7584028e7..630a96cc039 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -33,7 +33,6 @@ dependencies = [ "pillow >=11, <12", "psycopg[pool] >=3.2.3, <4", "python-decouple >=3.8, <4", - "python-xmp-toolkit >=2.0.2, <3", "sentry-sdk >=2.19, <3", "uvicorn[standard] >=0.34, <0.35", "openverse-attribution @ file:///${PROJECT_ROOT}/../packages/python/openverse-attribution", diff --git a/api/test/unit/utils/test_watermark.py b/api/test/unit/utils/test_watermark.py deleted file mode 100644 index 1266e8bb643..00000000000 --- a/api/test/unit/utils/test_watermark.py +++ /dev/null @@ -1,43 +0,0 @@ -import json -from pathlib import Path - -import pook -import pytest - -from api.utils.watermark import HEADERS, watermark - - -_MOCK_IMAGE_PATH = Path(__file__).parent / ".." / ".." / "factory" -_MOCK_IMAGE_BYTES = (_MOCK_IMAGE_PATH / "sample-image.jpg").read_bytes() -_MOCK_IMAGE_INFO = json.loads((_MOCK_IMAGE_PATH / "sample-image-info.json").read_text()) - - -@pytest.fixture -def mock_request(): - with pook.use(): - mock = ( - pook.get("http://example.com/") - .header("User-Agent", HEADERS["User-Agent"]) - .reply(200) - .body(_MOCK_IMAGE_BYTES) - .mock - ) - yield mock - - -def test_watermark_image_sends_ua_header(mock_request): - watermark("http://example.com/", _MOCK_IMAGE_INFO) - # ``pook`` will only match if UA header is sent. - assert mock_request.total_matches > 0 - - -# Previously, wrapped titles would throw a TypeError: -# slice indices must be integers or None or have an __index__ method. -# See: https://github.com/WordPress/openverse/issues/2466 -def test_long_title_wraps_correctly(mock_request): - # Make the title 400 chars long - _MOCK_IMAGE_INFO_LONG_TITLE = dict(_MOCK_IMAGE_INFO) - _MOCK_IMAGE_INFO_LONG_TITLE["title"] = "a" * 400 - - watermark("http://example.com/", _MOCK_IMAGE_INFO_LONG_TITLE) - assert mock_request.total_matches > 0 diff --git a/api/test/unit/views/test_image_views.py b/api/test/unit/views/test_image_views.py index daa5e2db538..b3f63e7dca2 100644 --- a/api/test/unit/views/test_image_views.py +++ b/api/test/unit/views/test_image_views.py @@ -1,10 +1,8 @@ import json from pathlib import Path -from unittest.mock import patch import pook import pytest -from PIL import UnidentifiedImageError from api.views.image_views import ImageViewSet from test.factory.models.image import ImageFactory @@ -61,45 +59,3 @@ def test_thumbnail_uses_upstream_thumb_for_smk( assert response.status_code == 200 assert mock_get.matched is True - - -@pytest.mark.django_db -def test_watermark_raises_424_for_invalid_image(api_client): - image = ImageFactory.create() - expected_error_message = ( - "cannot identify image file <_io.BytesIO object at 0xffff86d8fec0>" - ) - - with pook.use(): - pook.get(image.url).reply(200) - - with patch("PIL.Image.open") as mock_open: - mock_open.side_effect = UnidentifiedImageError(expected_error_message) - res = api_client.get(f"/v1/images/{image.identifier}/watermark/") - - assert res.status_code == 424 - assert res.data["detail"] == expected_error_message - - -@pytest.mark.django_db -def test_watermark_raises_424_for_404_image(api_client): - image = ImageFactory.create() - - with pook.use(): - pook.get(image.url).reply(404) - - res = api_client.get(f"/v1/images/{image.identifier}/watermark/") - assert res.status_code == 424 - assert res.data["detail"] == f"404 Client Error: Not Found for url: {image.url}" - - -@pytest.mark.django_db -def test_watermark_raises_424_for_SVG_image(api_client): - image = ImageFactory.create(url="http://example.com/image.svg") - - res = api_client.get(f"/v1/images/{image.identifier}/watermark/") - assert res.status_code == 424 - assert ( - res.data["detail"] - == "Unsupported media type: SVG images are not supported for watermarking." - )