diff --git a/CHANGES/1288.feature b/CHANGES/1288.feature new file mode 100644 index 000000000..d5cc72ed3 --- /dev/null +++ b/CHANGES/1288.feature @@ -0,0 +1,3 @@ +Updated the Manifest model to no longer rely on artifacts, storing all manifest data internally +withing the database. This change dissociates the image manifest from external files on the storage +backend. diff --git a/pulp_container/app/management/commands/container-handle-image-data.py b/pulp_container/app/management/commands/container-handle-image-data.py index ed10e7547..786374fce 100644 --- a/pulp_container/app/management/commands/container-handle-image-data.py +++ b/pulp_container/app/management/commands/container-handle-image-data.py @@ -1,3 +1,5 @@ +import json + from json.decoder import JSONDecodeError from gettext import gettext as _ @@ -6,9 +8,12 @@ from django.core.exceptions import ObjectDoesNotExist from django.core.management import BaseCommand +from django.db.models import Q from pulp_container.app.models import Manifest +from pulp_container.app.utils import get_content_data + from pulp_container.constants import MEDIA_TYPE @@ -31,14 +36,15 @@ class Command(BaseCommand): def handle(self, *args, **options): manifests_updated_count = 0 - manifests = Manifest.objects.filter(labels={}, annotations={}) + manifests = Manifest.objects.filter(Q(data="") | Q(annotations={}, labels={})) manifests = manifests.exclude( media_type__in=[MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI, MEDIA_TYPE.MANIFEST_V1] ) manifests_updated_count += self.update_manifests(manifests) manifest_lists = Manifest.objects.filter( - media_type__in=[MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI], annotations={} + Q(media_type__in=[MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI]), + Q(data="") | Q(annotations={}), ) manifests_updated_count += self.update_manifests(manifest_lists) @@ -46,18 +52,36 @@ def handle(self, *args, **options): self.style.SUCCESS("Successfully updated %d manifests." % manifests_updated_count) ) + def init_manifest(self, manifest): + has_initialized_data = manifest.data != "" + if has_initialized_data: + manifest_data = json.loads(manifest.data) + else: + manifest_artifact = manifest._artifacts.get() + manifest_data, raw_bytes_data = get_content_data(manifest_artifact) + manifest.data = raw_bytes_data.decode("utf-8") + manifest._artifacts.clear() + + manifest.annotations = manifest_data.get("annotations", {}) + + has_annotations = bool(manifest.annotations) + has_labels = manifest.init_labels() + has_image_nature = manifest.init_image_nature() + + return has_annotations or has_labels or has_image_nature or (not has_initialized_data) + def update_manifests(self, manifests_qs): manifests_updated_count = 0 manifests_to_update = [] for manifest in manifests_qs.iterator(): # suppress non-existing/already migrated artifacts and corrupted JSON files with suppress(ObjectDoesNotExist, JSONDecodeError): - has_metadata = manifest.init_metadata() - if has_metadata: + needs_update = self.init_manifest(manifest) + if needs_update: manifests_to_update.append(manifest) if len(manifests_to_update) > 1000: - fields_to_update = ["annotations", "labels", "is_bootable", "is_flatpak"] + fields_to_update = ["annotations", "labels", "is_bootable", "is_flatpak", "data"] manifests_qs.model.objects.bulk_update( manifests_to_update, fields_to_update, @@ -65,7 +89,7 @@ def update_manifests(self, manifests_qs): manifests_updated_count += len(manifests_to_update) manifests_to_update.clear() if manifests_to_update: - fields_to_update = ["annotations", "labels", "is_bootable", "is_flatpak"] + fields_to_update = ["annotations", "labels", "is_bootable", "is_flatpak", "data"] manifests_qs.model.objects.bulk_update( manifests_to_update, fields_to_update, diff --git a/pulp_container/app/migrations/0039_manifest_data.py b/pulp_container/app/migrations/0039_manifest_data.py new file mode 100644 index 000000000..c58f61a39 --- /dev/null +++ b/pulp_container/app/migrations/0039_manifest_data.py @@ -0,0 +1,30 @@ +# Generated by Django 4.2.10 on 2024-03-05 11:22 +import warnings + +from django.db import migrations, models + + +def print_warning_for_initializing_manifest_data(apps, schema_editor): + warnings.warn( + "Run 'pulpcore-manager container-handle-image-data' to move the manifests' data from artifacts to the new 'data' database field." + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("container", "0038_add_manifest_metadata_fields"), + ] + + operations = [ + migrations.AddField( + model_name="manifest", + name="data", + field=models.TextField(default=""), + ), + migrations.RunPython( + print_warning_for_initializing_manifest_data, + reverse_code=migrations.RunPython.noop, + elidable=True, + ), + ] diff --git a/pulp_container/app/models.py b/pulp_container/app/models.py index 123229581..3ef866368 100644 --- a/pulp_container/app/models.py +++ b/pulp_container/app/models.py @@ -72,6 +72,7 @@ class Manifest(Content): digest (models.TextField): The manifest digest. schema_version (models.IntegerField): The manifest schema version. media_type (models.TextField): The manifest media type. + data (models.TextField): The manifest's data in text format. annotations (models.JSONField): Metadata stored inside the image manifest. labels (models.JSONField): Metadata stored inside the image configuration. is_bootable (models.BooleanField): Indicates whether the image is bootable or not. @@ -98,6 +99,7 @@ class Manifest(Content): digest = models.TextField(db_index=True) schema_version = models.IntegerField() media_type = models.TextField(choices=MANIFEST_CHOICES) + data = models.TextField(default="") annotations = models.JSONField(default=dict) labels = models.JSONField(default=dict) diff --git a/pulp_container/app/redirects.py b/pulp_container/app/redirects.py index d88cbe093..b938a562a 100644 --- a/pulp_container/app/redirects.py +++ b/pulp_container/app/redirects.py @@ -1,7 +1,7 @@ from django.conf import settings from django.core.exceptions import ObjectDoesNotExist -from django.http import Http404 from django.shortcuts import redirect +from django.http import Http404 from pulp_container.app.exceptions import ManifestNotFound from pulp_container.app.utils import get_accepted_media_types @@ -29,11 +29,11 @@ def redirect_to_content_app(self, content_type, content_id): f"{settings.CONTENT_ORIGIN}/pulp/container/{self.path}/{content_type}/{content_id}" ) - -class FileStorageRedirects(CommonRedirects): - """ - A class which contains methods used for redirecting to the default django's file storage. - """ + def issue_manifest_redirect(self, manifest): + """ + Issue a redirect for the passed manifest. + """ + return self.redirect_to_content_app("manifests", manifest.digest) def issue_tag_redirect(self, tag): """ @@ -48,11 +48,11 @@ def issue_tag_redirect(self, tag): return self.redirect_to_content_app("manifests", tag.name) - def issue_manifest_redirect(self, manifest): - """ - Issue a redirect for the passed manifest. - """ - return self.redirect_to_content_app("manifests", manifest.digest) + +class FileStorageRedirects(CommonRedirects): + """ + A class which contains methods used for redirecting to the default django's file storage. + """ def issue_blob_redirect(self, blob): """ @@ -66,38 +66,6 @@ class S3StorageRedirects(CommonRedirects): A class that implements methods for the direct retrieval of manifest objects. """ - def issue_tag_redirect(self, tag): - """ - Issue a redirect if an accepted media type requires it or return not found if manifest - version is not supported. - """ - manifest_media_type = tag.tagged_manifest.media_type - if manifest_media_type == MEDIA_TYPE.MANIFEST_V1: - return self.redirect_to_artifact( - tag.name, tag.tagged_manifest, MEDIA_TYPE.MANIFEST_V1_SIGNED - ) - elif manifest_media_type in get_accepted_media_types(self.request.headers): - return self.redirect_to_artifact(tag.name, tag.tagged_manifest, manifest_media_type) - else: - raise ManifestNotFound(reference=tag.name) - - def issue_manifest_redirect(self, manifest): - """ - Directly redirect to an associated manifest's artifact. - """ - return self.redirect_to_artifact(manifest.digest, manifest, manifest.media_type) - - def redirect_to_artifact(self, content_name, manifest, manifest_media_type): - """ - Search for the passed manifest's artifact and issue a redirect. - """ - try: - artifact = manifest._artifacts.get() - except ObjectDoesNotExist: - raise Http404(f"An artifact for '{content_name}' was not found") - - return self.redirect_to_object_storage(artifact, manifest_media_type) - def issue_blob_redirect(self, blob): """ Redirect to the passed blob or stream content when an associated artifact is not present. @@ -123,6 +91,47 @@ def redirect_to_object_storage(self, artifact, return_media_type): ) return redirect(content_url) + # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifests + def redirect_to_artifact(self, content_name, manifest, manifest_media_type): + """ + Search for the passed manifest's artifact and issue a redirect. + """ + try: + artifact = manifest._artifacts.get() + except ObjectDoesNotExist: + raise Http404(f"An artifact for '{content_name}' was not found") + + return self.redirect_to_object_storage(artifact, manifest_media_type) + + def issue_tag_redirect(self, tag): + """ + Issue a redirect if an accepted media type requires it or return not found if manifest + version is not supported. + """ + if tag.tagged_manifest.data: + return super().issue_tag_redirect(tag) + + manifest_media_type = tag.tagged_manifest.media_type + if manifest_media_type == MEDIA_TYPE.MANIFEST_V1: + return self.redirect_to_artifact( + tag.name, tag.tagged_manifest, MEDIA_TYPE.MANIFEST_V1_SIGNED + ) + elif manifest_media_type in get_accepted_media_types(self.request.headers): + return self.redirect_to_artifact(tag.name, tag.tagged_manifest, manifest_media_type) + else: + raise ManifestNotFound(reference=tag.name) + + def issue_manifest_redirect(self, manifest): + """ + Directly redirect to an associated manifest's artifact. + """ + if manifest.data: + return super().issue_manifest_redirect(manifest) + + return self.redirect_to_artifact(manifest.digest, manifest, manifest.media_type) + + # END OF BACKWARD COMPATIBILITY + class AzureStorageRedirects(S3StorageRedirects): """ diff --git a/pulp_container/app/registry.py b/pulp_container/app/registry.py index 007b789a4..26bc3dc47 100644 --- a/pulp_container/app/registry.py +++ b/pulp_container/app/registry.py @@ -132,7 +132,7 @@ async def get_tag(self, request): pull_downloader = await PullThroughDownloader.create( distribution, repository_version, path, tag_name ) - raw_manifest, digest, media_type = await pull_downloader.download_manifest( + raw_text_manifest, digest, media_type = await pull_downloader.download_manifest( run_pipeline=True ) headers = { @@ -140,7 +140,7 @@ async def get_tag(self, request): "Docker-Content-Digest": digest, "Docker-Distribution-API-Version": "registry/2.0", } - return web.Response(text=raw_manifest, headers=headers) + return web.Response(text=raw_text_manifest, headers=headers) else: raise PathNotResolved(tag_name) @@ -168,7 +168,7 @@ async def get_tag(self, request): distribution, repository_version, path, tag_name ) pull_downloader.downloader = downloader - raw_manifest, digest, media_type = await pull_downloader.download_manifest( + raw_text_manifest, digest, media_type = await pull_downloader.download_manifest( run_pipeline=True ) headers = { @@ -176,7 +176,7 @@ async def get_tag(self, request): "Docker-Content-Digest": digest, "Docker-Distribution-API-Version": "registry/2.0", } - return web.Response(text=raw_manifest, headers=headers) + return web.Response(text=raw_text_manifest, headers=headers) accepted_media_types = get_accepted_media_types(request.headers) @@ -200,7 +200,12 @@ async def get_tag(self, request): "Content-Type": return_media_type, "Docker-Content-Digest": tag.tagged_manifest.digest, } - return await self.dispatch_tag(request, tag, response_headers) + # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest + if not tag.tagged_manifest.data: + saved_artifact = await tag.tagged_manifest._artifacts.aget() + return await Registry._dispatch(saved_artifact, response_headers) + # END OF BACKWARD COMPATIBILITY + return web.Response(text=tag.tagged_manifest.data, headers=response_headers) # return what was found in case media_type is accepted header (docker, oci) if tag.tagged_manifest.media_type in accepted_media_types: @@ -209,35 +214,16 @@ async def get_tag(self, request): "Content-Type": return_media_type, "Docker-Content-Digest": tag.tagged_manifest.digest, } - return await self.dispatch_tag(request, tag, response_headers) + # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest + if not tag.tagged_manifest.data: + saved_artifact = await tag.tagged_manifest._artifacts.aget() + return await Registry._dispatch(saved_artifact, response_headers) + # END OF BACKWARD COMPATIBILITY + return web.Response(text=tag.tagged_manifest.data, headers=response_headers) # return 404 in case the client is requesting docker manifest v2 schema 1 raise PathNotResolved(tag_name) - async def dispatch_tag(self, request, tag, response_headers): - """ - Finds an artifact associated with a Tag and sends it to the client, otherwise tries - to stream it. - - Args: - request(:class:`~aiohttp.web.Request`): The request to prepare a response for. - tag: Tag - response_headers (dict): dictionary that contains the 'Content-Type' header to send - with the response - - Returns: - :class:`aiohttp.web.StreamResponse` or :class:`aiohttp.web.FileResponse`: The response - streamed back to the client. - - """ - try: - artifact = await tag.tagged_manifest._artifacts.aget() - except ObjectDoesNotExist: - ca = await sync_to_async(lambda x: x[0])(tag.tagged_manifest.contentartifact_set.all()) - return await self._stream_content_artifact(request, web.StreamResponse(), ca) - else: - return await Registry._dispatch(artifact, response_headers) - @RegistryContentCache( base_key=lambda req, cac: Registry.find_base_path_cached(req, cac), auth=lambda req, cac, bk: Registry.auth_cached(req, cac, bk), @@ -261,20 +247,35 @@ async def get_by_digest(self, request): pending_manifests = repository.pending_manifests.values_list("pk") pending_content = pending_blobs.union(pending_manifests) content = repository_version.content | Content.objects.filter(pk__in=pending_content) + # "/pulp/container/{path:.+}/{content:(blobs|manifests)}/sha256:{digest:.+}" + content_type = request.match_info["content"] try: - ca = await ContentArtifact.objects.select_related("artifact", "content").aget( - content__in=content, relative_path=digest - ) - ca_content = await sync_to_async(ca.content.cast)() - if isinstance(ca_content, Blob): - media_type = BLOB_CONTENT_TYPE - else: - media_type = ca_content.media_type - headers = { - "Content-Type": media_type, - "Docker-Content-Digest": ca_content.digest, - } + if content_type == "manifests": + manifest = await Manifest.objects.aget(digest=digest) + headers = { + "Content-Type": manifest.media_type, + "Docker-Content-Digest": manifest.digest, + } + # TODO: BACKWARD COMPATIBILITY - remove after migrating to artifactless manifest + if not manifest.data: + saved_artifact = await manifest._artifacts.aget() + return await Registry._dispatch(saved_artifact, headers) + # END OF BACKWARD COMPATIBILITY + return web.Response(text=manifest.data, headers=headers) + elif content_type == "blobs": + ca = await ContentArtifact.objects.select_related("artifact", "content").aget( + content__in=content, relative_path=digest + ) + ca_content = await sync_to_async(ca.content.cast)() + if isinstance(ca_content, Blob): + media_type = BLOB_CONTENT_TYPE + else: + media_type = ca_content.media_type + headers = { + "Content-Type": media_type, + "Docker-Content-Digest": ca_content.digest, + } except ObjectDoesNotExist: distribution = await distribution.acast() if distribution.remote_id and distribution.pull_through_distribution_id: @@ -282,16 +283,18 @@ async def get_by_digest(self, request): distribution, repository_version, path, digest ) - # "/pulp/container/{path:.+}/{content:(blobs|manifests)}/sha256:{digest:.+}" - content_type = request.match_info["content"] if content_type == "manifests": - raw_manifest, digest, media_type = await pull_downloader.download_manifest() + ( + raw_text_manifest, + digest, + media_type, + ) = await pull_downloader.download_manifest() headers = { "Content-Type": media_type, "Docker-Content-Digest": digest, "Docker-Distribution-API-Version": "registry/2.0", } - return web.Response(text=raw_manifest, headers=headers) + return web.Response(text=raw_text_manifest, headers=headers) elif content_type == "blobs": # there might be a case where the client has all the manifest data in place # and tries to download only missing blobs; because of that, only the reference @@ -304,6 +307,7 @@ async def get_by_digest(self, request): else: raise PathNotResolved(path) else: + # else branch can be reached only for blob artifact = ca.artifact if artifact: return await Registry._dispatch(artifact, headers) @@ -349,31 +353,28 @@ async def init_remote_blob(self): async def download_manifest(self, run_pipeline=False): response = await self.run_manifest_downloader() - with open(response.path) as f: - raw_data = f.read() - - response.artifact_attributes["file"] = response.path - saved_artifact = await save_artifact(response.artifact_attributes) + with open(response.path, mode="r") as f: + raw_text_data = f.read() if run_pipeline: - await self.run_pipeline(saved_artifact) + await self.run_pipeline(raw_text_data) try: - manifest_data = json.loads(raw_data) + manifest_data = json.loads(raw_text_data) except json.decoder.JSONDecodeError: raise PathNotResolved(self.identifier) media_type = determine_media_type(manifest_data, response) if media_type in (MEDIA_TYPE.MANIFEST_V1_SIGNED, MEDIA_TYPE.MANIFEST_V1): - digest = calculate_digest(raw_data) + digest = calculate_digest(raw_text_data) else: digest = f"sha256:{response.artifact_attributes['sha256']}" if media_type not in (MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI): # add the manifest and blobs to the repository to be able to stream it # in the next round when a client approaches the registry - await self.init_pending_content(digest, manifest_data, media_type, saved_artifact) + await self.init_pending_content(digest, manifest_data, raw_text_data, media_type) - return raw_data, digest, media_type + return raw_text_data, digest, media_type async def run_manifest_downloader(self): if self.downloader is None: @@ -396,7 +397,7 @@ async def run_manifest_downloader(self): return response - async def run_pipeline(self, saved_artifact): + async def run_pipeline(self, raw_text_manifest_data): set_guid(generate_guid()) await sync_to_async(dispatch)( download_image_data, @@ -404,12 +405,12 @@ async def run_pipeline(self, saved_artifact): kwargs={ "repository_pk": self.repository_version.repository.pk, "remote_pk": self.remote.pk, - "manifest_artifact_pk": saved_artifact.pk, + "raw_text_manifest_data": raw_text_manifest_data, "tag_name": self.identifier, }, ) - async def init_pending_content(self, digest, manifest_data, media_type, artifact): + async def init_pending_content(self, digest, manifest_data, raw_text_data, media_type): if config := manifest_data.get("config", None): config_digest = config["digest"] config_blob = await self.save_config_blob(config_digest) @@ -419,11 +420,14 @@ async def init_pending_content(self, digest, manifest_data, media_type, artifact manifest = Manifest( digest=digest, - schema_version=2 - if manifest_data["mediaType"] in (MEDIA_TYPE.MANIFEST_V2, MEDIA_TYPE.MANIFEST_OCI) - else 1, + schema_version=( + 2 + if manifest_data["mediaType"] in (MEDIA_TYPE.MANIFEST_V2, MEDIA_TYPE.MANIFEST_OCI) + else 1 + ), media_type=media_type, config_blob=config_blob, + data=raw_text_data, ) # skip if media_type of schema1 @@ -441,12 +445,6 @@ async def init_pending_content(self, digest, manifest_data, media_type, artifact blob = await self.save_blob(layer["digest"], manifest) await sync_to_async(self.repository.pending_blobs.add)(blob) - content_artifact = ContentArtifact( - artifact=artifact, content=manifest, relative_path=manifest.digest - ) - with suppress(IntegrityError): - await content_artifact.asave() - async def save_blob(self, digest, manifest): blob = Blob(digest=digest) try: diff --git a/pulp_container/app/registry_api.py b/pulp_container/app/registry_api.py index 365aa7a0f..e4533a14b 100644 --- a/pulp_container/app/registry_api.py +++ b/pulp_container/app/registry_api.py @@ -4,6 +4,7 @@ . _Plugin Writer's Guide: http://docs.pulpproject.org/plugins/plugin-writer/index.html """ + import base64 import binascii import json @@ -1133,9 +1134,10 @@ def put(self, request, path, pk=None): manifest_digest = "sha256:{id}".format(id=artifact.sha256) with storage.open(artifact.file.name) as artifact_file: - raw_data = artifact_file.read() + raw_bytes_data = artifact_file.read() - content_data = json.loads(raw_data) + raw_text_data = raw_bytes_data.decode("utf-8") + content_data = json.loads(raw_text_data) media_type = determine_media_type(content_data, request) validate_manifest(content_data, media_type, manifest_digest) @@ -1168,8 +1170,8 @@ def put(self, request, path, pk=None): if (len(manifests) - found_manifests.count()) != 0: ManifestInvalid(digest=manifest_digest) - manifest_list = self._init_manifest(manifest_digest, media_type) - manifest_list = self._save_manifest(manifest_list, artifact) + manifest_list = self._init_manifest(manifest_digest, media_type, raw_text_data) + manifest_list = self._save_manifest(manifest_list) manifests_to_list = [] for manifest in found_manifests: @@ -1249,10 +1251,10 @@ def put(self, request, path, pk=None): raise ManifestInvalid(digest=manifest_digest) config_blob = found_config_blobs.first() - manifest = self._init_manifest(manifest_digest, media_type, config_blob) + manifest = self._init_manifest(manifest_digest, media_type, raw_text_data, config_blob) manifest.init_metadata(manifest_data=content_data) - manifest = self._save_manifest(manifest, artifact) + manifest = self._save_manifest(manifest) thru = [] for blob in found_blobs: @@ -1306,29 +1308,22 @@ def put(self, request, path, pk=None): repository.pending_manifests.add(manifest) return ManifestResponse(manifest, path, request, status=201) - def _init_manifest(self, manifest_digest, media_type, config_blob=None): + def _init_manifest(self, manifest_digest, media_type, raw_text_data, config_blob=None): return models.Manifest( digest=manifest_digest, schema_version=2, media_type=media_type, config_blob=config_blob, + data=raw_text_data, ) - def _save_manifest(self, manifest, artifact): + def _save_manifest(self, manifest): try: manifest.save() except IntegrityError: manifest = models.Manifest.objects.get(digest=manifest.digest) manifest.touch() - ca = ContentArtifact(artifact=artifact, content=manifest, relative_path=manifest.digest) - try: - ca.save() - except IntegrityError: - # re-upload artifact in case it was previously removed. - ca = ContentArtifact.objects.get(content=manifest, relative_path=manifest.digest) - if not ca.artifact: - ca.artifact = artifact - ca.save(update_fields=["artifact"]) + return manifest def receive_artifact(self, chunk): diff --git a/pulp_container/app/serializers.py b/pulp_container/app/serializers.py index a1a5f5150..3cac522bd 100644 --- a/pulp_container/app/serializers.py +++ b/pulp_container/app/serializers.py @@ -56,7 +56,7 @@ class Meta: model = models.Tag -class ManifestSerializer(SingleArtifactContentSerializer): +class ManifestSerializer(NoArtifactContentSerializer): """ Serializer for Manifests. """ @@ -105,7 +105,7 @@ class ManifestSerializer(SingleArtifactContentSerializer): ) class Meta: - fields = SingleArtifactContentSerializer.Meta.fields + ( + fields = NoArtifactContentSerializer.Meta.fields + ( "digest", "schema_version", "media_type", diff --git a/pulp_container/app/tasks/builder.py b/pulp_container/app/tasks/builder.py index f18d574ac..58daa3796 100644 --- a/pulp_container/app/tasks/builder.py +++ b/pulp_container/app/tasks/builder.py @@ -13,6 +13,7 @@ Tag, ) from pulp_container.constants import MEDIA_TYPE +from pulp_container.app.utils import calculate_digest from pulpcore.plugin.models import Artifact, ContentArtifact, Content @@ -61,22 +62,24 @@ def add_image_from_directory_to_repository(path, repository, tag): """ manifest_path = os.path.join(path, "manifest.json") - manifest_artifact = Artifact.init_and_validate(manifest_path) - manifest_artifact.save() - manifest_digest = "sha256:{}".format(manifest_artifact.sha256) + + with open(manifest_path, "rb") as f: + bytes_data = f.read() + manifest_digest = calculate_digest(bytes_data) + manifest_text_data = bytes_data.decode("utf-8") + manifest = Manifest( - digest=manifest_digest, schema_version=2, media_type=MEDIA_TYPE.MANIFEST_OCI + digest=manifest_digest, + schema_version=2, + media_type=MEDIA_TYPE.MANIFEST_OCI, + data=manifest_text_data, ) manifest.save() - ContentArtifact( - artifact=manifest_artifact, content=manifest, relative_path=manifest_digest - ).save() tag = Tag(name=tag, tagged_manifest=manifest) tag.save() with repository.new_version() as new_repo_version: - manifest_json = json.load(manifest_artifact.file) - manifest_artifact.file.close() + manifest_json = json.loads(manifest_text_data) config_blob = get_or_create_blob(manifest_json["config"], manifest, path) manifest.config_blob = config_blob diff --git a/pulp_container/app/tasks/download_image_data.py b/pulp_container/app/tasks/download_image_data.py index 6c62c5501..6a741462e 100644 --- a/pulp_container/app/tasks/download_image_data.py +++ b/pulp_container/app/tasks/download_image_data.py @@ -1,7 +1,6 @@ import json import logging -from pulpcore.plugin.models import Artifact from pulpcore.plugin.stages import DeclarativeContent from pulp_container.app.models import ContainerRemote, ContainerRepository, Tag @@ -14,12 +13,11 @@ log = logging.getLogger(__name__) -def download_image_data(repository_pk, remote_pk, manifest_artifact_pk, tag_name): +def download_image_data(repository_pk, remote_pk, raw_text_manifest_data, tag_name): repository = ContainerRepository.objects.get(pk=repository_pk) remote = ContainerRemote.objects.get(pk=remote_pk) - manifest_artifact = Artifact.objects.get(pk=manifest_artifact_pk) log.info("Pulling cache: repository={r} remote={p}".format(r=repository.name, p=remote.name)) - first_stage = ContainerPullThroughFirstStage(remote, manifest_artifact, tag_name) + first_stage = ContainerPullThroughFirstStage(remote, raw_text_manifest_data, tag_name) dv = ContainerDeclarativeVersion(first_stage, repository) return dv.create() @@ -27,11 +25,11 @@ def download_image_data(repository_pk, remote_pk, manifest_artifact_pk, tag_name class ContainerPullThroughFirstStage(ContainerFirstStage): """The stage that prepares the pipeline for downloading a single tag and its related data.""" - def __init__(self, remote, manifest_artifact, tag_name): + def __init__(self, remote, raw_text_manifest_data, tag_name): """Initialize the stage with the artifact defined in content-app.""" super().__init__(remote, signed_only=False) self.tag_name = tag_name - self.manifest_artifact = manifest_artifact + self.raw_text_manifest_data = raw_text_manifest_data async def run(self): """Run the stage and create declarative content for one tag, its manifest, and blobs. @@ -42,14 +40,12 @@ async def run(self): tag_dc = DeclarativeContent(Tag(name=self.tag_name)) self.tag_dcs.append(tag_dc) - raw_data = self.manifest_artifact.file.read() - content_data = json.loads(raw_data) - self.manifest_artifact.file.close() + content_data = json.loads(self.raw_text_manifest_data) media_type = determine_media_type_from_json(content_data) if media_type in (MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI): list_dc = self.create_tagged_manifest_list( - self.tag_name, self.manifest_artifact, content_data, media_type + self.tag_name, content_data, self.raw_text_manifest_data, media_type ) for manifest_data in content_data.get("manifests"): listed_manifest = await self.create_listed_manifest(manifest_data) @@ -65,7 +61,7 @@ async def run(self): else: # Simple tagged manifest man_dc = self.create_tagged_manifest( - self.tag_name, self.manifest_artifact, content_data, raw_data, media_type + self.tag_name, content_data, self.raw_text_manifest_data, media_type ) tag_dc.extra_data["tagged_manifest_dc"] = man_dc await self.handle_blobs(man_dc, content_data) diff --git a/pulp_container/app/tasks/sign.py b/pulp_container/app/tasks/sign.py index 38329419a..e37dfb49b 100644 --- a/pulp_container/app/tasks/sign.py +++ b/pulp_container/app/tasks/sign.py @@ -99,16 +99,23 @@ async def create_signature(manifest, reference, signing_service): """ async with semaphore: # download and write file for object storage - artifact = await manifest._artifacts.aget() - if settings.DEFAULT_FILE_STORAGE != "pulpcore.app.models.storage.FileSystem": + if not manifest.data: + # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest + artifact = await manifest._artifacts.aget() + if settings.DEFAULT_FILE_STORAGE != "pulpcore.app.models.storage.FileSystem": + async with tempfile.NamedTemporaryFile(dir=".", mode="wb", delete=False) as tf: + await tf.write(await sync_to_async(artifact.file.read)()) + await tf.flush() + artifact.file.close() + manifest_path = tf.name + else: + manifest_path = artifact.file.path + # END OF BACKWARD COMPATIBILITY + else: async with tempfile.NamedTemporaryFile(dir=".", mode="wb", delete=False) as tf: - await tf.write(await sync_to_async(artifact.file.read)()) + await tf.write(manifest.data.encode("utf-8")) await tf.flush() - - artifact.file.close() manifest_path = tf.name - else: - manifest_path = artifact.file.path async with tempfile.NamedTemporaryFile(dir=".", prefix="signature") as tf: sig_path = tf.name diff --git a/pulp_container/app/tasks/sync_stages.py b/pulp_container/app/tasks/sync_stages.py index 000a13b0e..cb287b9ac 100644 --- a/pulp_container/app/tasks/sync_stages.py +++ b/pulp_container/app/tasks/sync_stages.py @@ -29,7 +29,6 @@ Tag, ) from pulp_container.app.utils import ( - save_artifact, extract_data_from_signature, urlpath_sanitize, determine_media_type, @@ -61,40 +60,39 @@ def __init__(self, remote, signed_only): self.manifest_dcs = [] self.signature_dcs = [] - async def _download_and_save_artifact_data(self, manifest_url): + async def _download_manifest_data(self, manifest_url): downloader = self.remote.get_downloader(url=manifest_url) response = await downloader.run(extra_data={"headers": V2_ACCEPT_HEADERS}) with open(response.path, "rb") as content_file: - raw_data = content_file.read() + raw_bytes_data = content_file.read() response.artifact_attributes["file"] = response.path - saved_artifact = await save_artifact(response.artifact_attributes) - content_data = json.loads(raw_data) + raw_text_data = raw_bytes_data.decode("utf-8") + content_data = json.loads(raw_bytes_data) - return saved_artifact, content_data, raw_data, response + return content_data, raw_text_data, response async def _check_for_existing_manifest(self, download_tag): response = await download_tag digest = response.headers.get("docker-content-digest") - if digest and ( - manifest := await Manifest.objects.prefetch_related("contentartifact_set") - .filter(digest=digest, _artifacts__isnull=False) - .afirst() - ): - saved_artifact = await manifest._artifacts.aget() - content_data, raw_data = await sync_to_async(get_content_data)(saved_artifact) + if digest and (manifest := await Manifest.objects.filter(digest=digest).afirst()): + raw_text_data = manifest.data + + # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest + if not raw_text_data: + saved_artifact = await manifest._artifacts.aget() + content_data, raw_bytes_data = await sync_to_async(get_content_data)(saved_artifact) + raw_text_data = raw_bytes_data.decode("utf-8") + # END OF BACKWARD COMPATIBILITY + else: + content_data = json.loads(raw_text_data) else: - ( - saved_artifact, - content_data, - raw_data, - response, - ) = await self._download_and_save_artifact_data(response.url) + content_data, raw_text_data, response = await self._download_manifest_data(response.url) - return saved_artifact, content_data, raw_data, response + return content_data, raw_text_data, response async def run(self): """ @@ -137,9 +135,9 @@ async def run(self): ] for artifact in asyncio.as_completed(to_download_artifact): - saved_artifact, content_data, raw_data, response = await artifact + content_data, raw_text_data, response = await artifact - digest = saved_artifact.sha256 + digest = calculate_digest(raw_text_data) # Look for cosign signatures # cosign signature has a tag convention 'sha256-1234.sig' @@ -165,8 +163,8 @@ async def run(self): tag_dc = DeclarativeContent(Tag(name=tag_name)) if media_type in (MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI): - list_dc = self.create_tagged_manifest_list( - tag_name, saved_artifact, content_data, media_type + list_dc = self.create_manifest_list( + content_data, raw_text_data, media_type, digest=digest ) for listed_manifest_task in asyncio.as_completed( [ @@ -215,8 +213,8 @@ async def run(self): else: # Simple tagged manifest - man_dc = self.create_tagged_manifest( - tag_name, saved_artifact, content_data, raw_data, media_type + man_dc = self.create_manifest( + content_data, raw_text_data, media_type, digest=digest ) if signature_source is not None: man_sig_dcs = await self.create_signatures(man_dc, signature_source) @@ -359,79 +357,56 @@ async def handle_blobs(self, manifest_dc, content_data): manifest_dc.extra_data["config_blob_dc"] = blob_dc await self.put(blob_dc) - def create_tagged_manifest_list(self, tag_name, saved_artifact, manifest_list_data, media_type): + def create_manifest_list(self, manifest_list_data, raw_text_data, media_type, digest=None): """ Create a ManifestList. Args: - tag_name (str): A name of a tag - saved_artifact (pulpcore.plugin.models.Artifact): A saved manifest's Artifact manifest_list_data (dict): Data about a ManifestList - media_type (str): The type of manifest + raw_text_data: (str): The raw JSON representation of the ManifestList + media_type (str): The type of the ManifestList + digest (str): The digest of the ManifestList """ - digest = f"sha256:{saved_artifact.sha256}" + if digest is None: + digest = calculate_digest(raw_text_data) + manifest_list = Manifest( digest=digest, schema_version=manifest_list_data["schemaVersion"], media_type=media_type, annotations=manifest_list_data.get("annotations", {}), + data=raw_text_data, ) - manifest_list_dc = self._create_manifest_declarative_content( - manifest_list, saved_artifact, tag_name, digest - ) + manifest_list_dc = DeclarativeContent(content=manifest_list) manifest_list_dc.extra_data["listed_manifests"] = [] return manifest_list_dc - def create_tagged_manifest(self, tag_name, saved_artifact, manifest_data, raw_data, media_type): + def create_manifest(self, manifest_data, raw_text_data, media_type, digest=None): """ Create an Image Manifest. Args: - tag_name (str): A name of a tag - saved_artifact (pulpcore.plugin.models.Artifact): A saved manifest's Artifact - manifest_data (dict): Data about a single new ImageManifest. - raw_data: (str): The raw JSON representation of the ImageManifest. - media_type (str): The type of a manifest + manifest_data (dict): Data about a single new ImageManifest + raw_text_data: (str): The raw JSON representation of the ImageManifest + media_type (str): The type of the ImageManifest + digest(str): THe digest of the ImageManifest """ - if media_type in (MEDIA_TYPE.MANIFEST_V2, MEDIA_TYPE.MANIFEST_OCI): - digest = f"sha256:{saved_artifact.sha256}" - else: - digest = calculate_digest(raw_data) + if digest is None: + digest = calculate_digest(raw_text_data) manifest = Manifest( digest=digest, schema_version=manifest_data["schemaVersion"], media_type=media_type, + data=raw_text_data, annotations=manifest_data.get("annotations", {}), ) - return self._create_manifest_declarative_content(manifest, saved_artifact, tag_name, digest) - - def _create_manifest_declarative_content(self, manifest, saved_artifact, tag_name, digest): - relative_url = f"/v2/{self.remote.namespaced_upstream_name}/manifests/" - da_digest = self._create_manifest_declarative_artifact( - relative_url + digest, saved_artifact, digest - ) - da_tag = self._create_manifest_declarative_artifact( - relative_url + tag_name, saved_artifact, digest - ) - - man_dc = DeclarativeContent(content=manifest, d_artifacts=[da_digest, da_tag]) - return man_dc - - def _create_manifest_declarative_artifact(self, relative_url, saved_artifact, digest): - url = urljoin(self.remote.url, relative_url) - da = DeclarativeArtifact( - artifact=saved_artifact, - url=url, - relative_path=digest, - remote=self.remote, - extra_data={"headers": V2_ACCEPT_HEADERS}, - ) - return da + manifest_dc = DeclarativeContent(content=manifest) + return manifest_dc def _create_signature_declarative_content( self, signature_raw, man_dc, name=None, signature_b64=None @@ -470,37 +445,33 @@ async def create_listed_manifest(self, manifest_data): ) manifest_url = urljoin(self.remote.url, relative_url) - if ( - manifest := await Manifest.objects.prefetch_related("contentartifact_set") - .filter(digest=digest, _artifacts__isnull=False) - .afirst() - ): - saved_artifact = await manifest._artifacts.aget() - content_data, _ = await sync_to_async(get_content_data)(saved_artifact) + if manifest := await Manifest.objects.filter(digest=digest).afirst(): + # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest + if not manifest.data: + saved_artifact = await manifest._artifacts.aget() + content_data, _ = await sync_to_async(get_content_data)(saved_artifact) + # END OF BACKWARD COMPATIBILITY + else: + content_data = json.loads(manifest.data) else: - saved_artifact, content_data, _, response = await self._download_and_save_artifact_data( - manifest_url - ) + content_data, raw_text_data, response = await self._download_manifest_data(manifest_url) media_type = determine_media_type(content_data, response) validate_manifest(content_data, media_type, digest) manifest = Manifest( digest=digest, - schema_version=2 - if content_data["mediaType"] in (MEDIA_TYPE.MANIFEST_V2, MEDIA_TYPE.MANIFEST_OCI) - else 1, + schema_version=( + 2 + if content_data["mediaType"] + in (MEDIA_TYPE.MANIFEST_V2, MEDIA_TYPE.MANIFEST_OCI) + else 1 + ), media_type=content_data["mediaType"], + data=raw_text_data, annotations=content_data.get("annotations", {}), ) - da = DeclarativeArtifact( - artifact=saved_artifact, - url=manifest_url, - relative_path=digest, - remote=self.remote, - extra_data={"headers": V2_ACCEPT_HEADERS}, - ) platform = {} p = manifest_data["platform"] platform["architecture"] = p["architecture"] @@ -509,10 +480,7 @@ async def create_listed_manifest(self, manifest_data): platform["variant"] = p.get("variant", "") platform["os.version"] = p.get("os.version", "") platform["os.features"] = p.get("os.features", "") - man_dc = DeclarativeContent( - content=manifest, - d_artifacts=[da], - ) + man_dc = DeclarativeContent(content=manifest) return {"manifest_dc": man_dc, "platform": platform, "content_data": content_data} def create_blob(self, blob_data, deferred_download=True): diff --git a/pulp_container/app/utils.py b/pulp_container/app/utils.py index 06834c02a..71aaa35be 100644 --- a/pulp_container/app/utils.py +++ b/pulp_container/app/utils.py @@ -225,13 +225,16 @@ def calculate_digest(manifest): Calculate the requested digest of the ImageManifest, given in JSON. Args: - manifest (str): The raw JSON representation of the Manifest. + manifest (str | bytes): The raw JSON representation of the Manifest. Returns: str: The digest of the given ImageManifest """ decoded_manifest = json.loads(manifest) + if isinstance(manifest, str): + manifest = manifest.encode("utf-8") + if "signatures" in decoded_manifest: # This manifest contains signatures. Unfortunately, the Docker manifest digest # is calculated on the unsigned version of the Manifest so we need to remove the