Skip to content
This repository has been archived by the owner on Feb 22, 2023. It is now read-only.

Improve the thumbnail service to support compression and WEBP #630

Merged
merged 16 commits into from
Apr 12, 2022
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions api/catalog/api/docs/audio_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
MediaSearch,
MediaStats,
fields_to_md,
refer_sample,
)
from catalog.api.examples import (
audio_complain_201_example,
Expand All @@ -31,6 +32,7 @@
InputErrorSerializer,
NotFoundErrorSerializer,
)
from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer
from catalog.api.serializers.provider_serializers import ProviderSerializer
from drf_yasg import openapi

Expand Down Expand Up @@ -206,3 +208,17 @@ class AudioComplain(MediaComplain):
"responses": responses,
"code_examples": code_examples,
}


class AudioThumbnail:
desc = f"""
thumbnail is an API endpoint to retrieve the scaled down and compressed thumbnail
of the artwork of an audio track or its audio set.

{refer_sample}"""

swagger_setup = {
"operation_id": "audio_thumbnail",
"operation_description": desc,
"query_serializer": MediaThumbnailRequestSerializer,
}
15 changes: 15 additions & 0 deletions api/catalog/api/docs/image_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
OembedRequestSerializer,
OembedSerializer,
)
from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer
from catalog.api.serializers.provider_serializers import ProviderSerializer
from drf_yasg import openapi

Expand Down Expand Up @@ -240,3 +241,17 @@ class ImageOembed:
"responses": responses,
"code_examples": code_examples,
}


class ImageThumbnail:
desc = f"""
thumbnail is an API endpoint to retrieve the scaled down and compressed thumbnail
of an image.

{refer_sample}"""

swagger_setup = {
"operation_id": "image_thumbnail",
"operation_description": desc,
"query_serializer": MediaThumbnailRequestSerializer,
}
27 changes: 27 additions & 0 deletions api/catalog/api/serializers/media_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,3 +420,30 @@ class MediaSearchSerializer(serializers.Serializer):
page = serializers.IntegerField(
help_text="The current page number returned in the response."
)


class MediaThumbnailRequestSerializer(serializers.Serializer):
"""
This serializer parses and validates thumbnail query string parameters.
"""

full_size = serializers.BooleanField(
source="is_full_size",
allow_null=True,
required=False,
default=False,
help_text="whether to render the actual image and not a thumbnail version",
)
compressed = serializers.BooleanField(
source="is_compressed",
allow_null=True,
default=None,
required=False,
help_text="whether to compress the output image to reduce file size,"
"defaults to opposite of `full_size`",
dhruvkb marked this conversation as resolved.
Show resolved Hide resolved
)

def validate(self, data):
if data.get("is_compressed") is None:
data["is_compressed"] = not data["is_full_size"]
return data
11 changes: 5 additions & 6 deletions api/catalog/api/views/audio_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
AudioRelated,
AudioSearch,
AudioStats,
AudioThumbnail,
)
from catalog.api.models import Audio
from catalog.api.serializers.audio_serializers import (
Expand All @@ -14,6 +15,7 @@
AudioSerializer,
AudioWaveformSerializer,
)
from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer
from catalog.api.utils.exceptions import get_api_exception
from catalog.api.utils.throttle import OneThousandPerMinute
from catalog.api.views.media_views import MediaViewSet
Expand All @@ -31,7 +33,7 @@
@method_decorator(swagger_auto_schema(**AudioDetail.swagger_setup), "retrieve")
@method_decorator(swagger_auto_schema(**AudioRelated.swagger_setup), "related")
@method_decorator(swagger_auto_schema(**AudioComplain.swagger_setup), "report")
@method_decorator(swagger_auto_schema(auto_schema=None), "thumbnail")
@method_decorator(swagger_auto_schema(**AudioThumbnail.swagger_setup), "thumbnail")
@method_decorator(swagger_auto_schema(auto_schema=None), "waveform")
class AudioViewSet(MediaViewSet):
"""
Expand All @@ -51,6 +53,7 @@ class AudioViewSet(MediaViewSet):
detail=True,
url_path="thumb",
url_name="thumb",
serializer_class=MediaThumbnailRequestSerializer,
throttle_classes=[OneThousandPerMinute],
)
def thumbnail(self, request, *_, **__):
Expand All @@ -64,11 +67,7 @@ def thumbnail(self, request, *_, **__):
if not image_url:
raise get_api_exception("Could not find artwork.", 404)

is_full_size = request.query_params.get("full_size", False)
if is_full_size:
return self._get_proxied_image(image_url, None)
else:
return self._get_proxied_image(image_url)
return super().thumbnail(image_url, request)

@action(
detail=True,
Expand Down
11 changes: 5 additions & 6 deletions api/catalog/api/views/image_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
ImageRelated,
ImageSearch,
ImageStats,
ImageThumbnail,
)
from catalog.api.models import Image
from catalog.api.serializers.image_serializers import (
Expand All @@ -20,6 +21,7 @@
OembedSerializer,
WatermarkRequestSerializer,
)
from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer
from catalog.api.utils.exceptions import get_api_exception
from catalog.api.utils.throttle import OneThousandPerMinute
from catalog.api.utils.watermark import watermark
Expand All @@ -42,7 +44,7 @@
@method_decorator(swagger_auto_schema(**ImageRelated.swagger_setup), "related")
@method_decorator(swagger_auto_schema(**ImageComplain.swagger_setup), "report")
@method_decorator(swagger_auto_schema(**ImageOembed.swagger_setup), "oembed")
@method_decorator(swagger_auto_schema(auto_schema=None), "thumbnail")
@method_decorator(swagger_auto_schema(**ImageThumbnail.swagger_setup), "thumbnail")
@method_decorator(swagger_auto_schema(auto_schema=None), "watermark")
class ImageViewSet(MediaViewSet):
"""
Expand Down Expand Up @@ -91,6 +93,7 @@ def oembed(self, request, *_, **__):
detail=True,
url_path="thumb",
url_name="thumb",
serializer_class=MediaThumbnailRequestSerializer,
throttle_classes=[OneThousandPerMinute],
)
def thumbnail(self, request, *_, **__):
Expand All @@ -100,11 +103,7 @@ def thumbnail(self, request, *_, **__):
if not image_url:
raise get_api_exception("Could not find image.", 404)

is_full_size = request.query_params.get("full_size", False)
if is_full_size:
return self._get_proxied_image(image_url, None)
else:
return self._get_proxied_image(image_url)
return super().thumbnail(image_url, request)

@action(detail=True, url_path="watermark", url_name="watermark")
def watermark(self, request, *_, **__):
Expand Down
89 changes: 75 additions & 14 deletions api/catalog/api/views/media_views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import json
import logging as log
from typing import List
from urllib.error import HTTPError
from urllib.request import urlopen
from urllib.parse import urlencode
from urllib.request import Request, urlopen

from catalog.api.controllers import search_controller
from catalog.api.models import ContentProvider
Expand Down Expand Up @@ -124,6 +128,15 @@ def report(self, request, *_, **__):
serializer = self.get_serializer(report)
return Response(data=serializer.data, status=status.HTTP_201_CREATED)

def thumbnail(self, image_url, request, *_, **__):
serializer = self.get_serializer(data=request.query_params)
serializer.is_valid(raise_exception=True)
return self._get_proxied_image(
image_url,
accept_header=request.headers.get("Accept", "image/*"),
**serializer.validated_data,
)

# Helper functions

@staticmethod
Expand All @@ -143,24 +156,72 @@ def _get_user_ip(request):
return ip

@staticmethod
def _get_proxied_image(image_url, width=settings.THUMBNAIL_WIDTH_PX):
if width is None: # full size
proxy_upstream = f"{settings.THUMBNAIL_PROXY_URL}/{image_url}"
else:
proxy_upstream = (
f"{settings.THUMBNAIL_PROXY_URL}/"
f"{settings.THUMBNAIL_WIDTH_PX},fit/"
f"{image_url}"
)
def _thumbnail_proxy_comm(
path: str,
params: dict,
headers: List[
tuple[str, str]
] = None, # ``List`` because there is a ``list`` function
dhruvkb marked this conversation as resolved.
Show resolved Hide resolved
):
proxy_url = settings.THUMBNAIL_PROXY_URL
query_string = urlencode(params)
upstream_url = f"{proxy_url}/{path}?{query_string}"
log.debug(f"Image proxy upstream URL: {upstream_url}")

try:
upstream_response = urlopen(proxy_upstream)
status = upstream_response.status
req = Request(upstream_url)
if headers:
for key, val in headers:
req.add_header(key, val)
upstream_response = urlopen(req)

res_status = upstream_response.status
content_type = upstream_response.headers.get("Content-Type")
log.debug(
"Image proxy response "
f"status: {res_status}, content-type: {content_type}"
)

return upstream_response, res_status, content_type
except HTTPError:
raise get_api_exception("Failed to render thumbnail.")
dhruvkb marked this conversation as resolved.
Show resolved Hide resolved

@staticmethod
def _get_proxied_image(
image_url: str,
accept_header: str = "image/*",
is_full_size: bool = False,
is_compressed: bool = True,
):
width = settings.THUMBNAIL_WIDTH_PX
if is_full_size:
info_res, *_ = MediaViewSet._thumbnail_proxy_comm("info", {"url": image_url})
info = json.loads(info_res.read())
width = info["width"]

params = {
"url": image_url,
"width": width,
}

if is_compressed:
params |= {
dhruvkb marked this conversation as resolved.
Show resolved Hide resolved
"quality": settings.THUMBNAIL_JPG_QUALITY,
"compression": settings.THUMBNAIL_PNG_COMPRESSION,
}
else:
params |= {
"quality": 100,
"compression": 0,
}

if "webp" in accept_header:
params["type"] = "auto" # Use ``Accept`` header to determine output type.
Comment on lines +216 to +217
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This converts JPG and PNG to WEBP if the Accept header contains it. Otherwise the format stays as it is. This is to prevent issues with Safari which before Big Sur, preferentially requests PNG. JPG images become quite large if converted to PNG.


img_res, res_status, content_type = MediaViewSet._thumbnail_proxy_comm(
"resize", params, [("Accept", accept_header)]
)
response = HttpResponse(
upstream_response.read(), status=status, content_type=content_type
img_res.read(), status=res_status, content_type=content_type
)

return response
4 changes: 3 additions & 1 deletion api/catalog/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,9 @@
# Produce CC-hosted thumbnails dynamically through a proxy.
THUMBNAIL_PROXY_URL = config("THUMBNAIL_PROXY_URL", default="http://localhost:8222")

THUMBNAIL_WIDTH_PX = 600
THUMBNAIL_WIDTH_PX = config("THUMBNAIL_WIDTH_PX", cast=int, default=600)
THUMBNAIL_JPG_QUALITY = config("THUMBNAIL_JPG_QUALITY", cast=int, default=80)
THUMBNAIL_PNG_COMPRESSION = config("THUMBNAIL_PNG_COMPRESSION", cast=int, default=6)

AUTHENTICATION_BACKENDS = (
"oauth2_provider.backends.OAuth2Backend",
Expand Down
2 changes: 1 addition & 1 deletion api/docs/guides/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ The command `just up` spawns the following services:
- API application database
- [Elasticsearch](https://www.elastic.co/elasticsearch/)
- [Redis](https://redis.io/)
- [imageproxy](https://github.com/willnorris/imageproxy)
- [imaginary](https://github.com/h2non/imaginary)
- [NGINX](http://nginx.org)
- **web** (`api/`)
- **ingestion_server** and **indexer_worker** (`ingestion_server/`)
Expand Down
2 changes: 1 addition & 1 deletion api/env.docker
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ DJANGO_DEBUG_ENABLED="True"

REDIS_HOST="cache"

THUMBNAIL_PROXY_URL="http://thumbs:8222"
THUMBNAIL_PROXY_URL="http://thumbnails:8222"

DJANGO_DATABASE_HOST="db"

Expand Down
6 changes: 5 additions & 1 deletion api/env.template
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ DJANGO_DEBUG_ENABLED="True"
#REDIS_PORT="6379"
#REDIS_PASSWORD=""

#THUMBNAIL_PROXY_URL="http://thumbs:8222"
#THUMBNAIL_PROXY_URL="http://thumbnails:8222"

#THUMBNAIL_WIDTH_PX="600"
#THUMBNAIL_JPG_QUALITY="80"
#THUMBNAIL_PNG_COMPRESSION="6"

#DJANGO_DATABASE_HOST="db"
#DJANGO_DATABASE_PORT="5432"
Expand Down
10 changes: 10 additions & 0 deletions api/test/audio_integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
search_special_chars,
stats,
thumb,
thumb_compression,
thumb_webp,
)

import pytest
Expand Down Expand Up @@ -73,5 +75,13 @@ def test_audio_thumb(audio_fixture):
thumb(audio_fixture)


def test_audio_thumb_compression(audio_fixture):
thumb_compression(audio_fixture)


def test_audio_thumb_webp(audio_fixture):
thumb_webp(audio_fixture)


def test_audio_report(audio_fixture):
report("audio", audio_fixture)
10 changes: 10 additions & 0 deletions api/test/image_integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
search_special_chars,
stats,
thumb,
thumb_compression,
thumb_webp,
)
from urllib.parse import urlencode

Expand Down Expand Up @@ -72,6 +74,14 @@ def test_image_thumb(image_fixture):
thumb(image_fixture)


def test_image_thumb_compression(image_fixture):
thumb_compression(image_fixture)


def test_image_thumb_webp(image_fixture):
thumb_webp(image_fixture)


def test_audio_report(image_fixture):
report("images", image_fixture)

Expand Down
Loading