From 7c1c92366652794b5e3f34124f4c3ca78b5937ff Mon Sep 17 00:00:00 2001 From: Stefan Heinemann Date: Mon, 20 Jan 2025 17:05:06 +0100 Subject: [PATCH] PB-1091 Flag the inexistent files in the file size updater For some assets the file on the bucket is missing. This makes the file_size_upgrade cronjob be stuck in a infinite loop, as it'll always try to update the next N assets. Therefore we flag these assets with a `file_size` of `None`, to avoid repeating the update and to be able to produce a list of missing files. --- .../commands/update_asset_file_size.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/app/stac_api/management/commands/update_asset_file_size.py b/app/stac_api/management/commands/update_asset_file_size.py index 8e2b03d3..a4591c99 100644 --- a/app/stac_api/management/commands/update_asset_file_size.py +++ b/app/stac_api/management/commands/update_asset_file_size.py @@ -45,6 +45,13 @@ def update(self): asset.save() print(".", end="", flush=True) except ClientError: + # We set file_size to None to indicate that this asset couldn't be + # found on the bucket. That way the script won't get stuck with the + # same 100 inexistent assets on one hand and we'll be able to + # produce a list of missing files on the other hand + asset.file_size = None + asset.save() + print("_", end="", flush=True) logger.error( 'file size could not be read from s3 bucket [%s] for asset %s', bucket, key ) @@ -56,7 +63,7 @@ def update(self): self.print_success( f"Update file size for {len(collection_assets)} collection assets out of " - "{total_asset_count}" + f"{total_asset_count}" ) for collection_asset in collection_assets: @@ -71,6 +78,13 @@ def update(self): collection_asset.save() print(".", end="", flush=True) except ClientError: + # We set file_size to None to indicate that this asset couldn't be + # found on the bucket. That way the script won't get stuck with the + # same 100 inexistent assets on one hand and we'll be able to + # produce a list of missing files on the other hand + collection_asset.file_size = None + collection_asset.save() + print("_", end="", flush=True) logger.error( 'file size could not be read from s3 bucket [%s] for collection asset %s' )