From b45bd71e50c4559ff7ce2405d1dc72e63bdfe66e Mon Sep 17 00:00:00 2001 From: Glen Robson Date: Fri, 26 Apr 2024 12:55:09 +0100 Subject: [PATCH] Adding support for autogenerated vtt files --- iiify/resolver.py | 35 +++++++++++++++++++++++++++++++++++ tests/test_video.py | 25 +++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/iiify/resolver.py b/iiify/resolver.py index 986c538..8208438 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -506,6 +506,7 @@ def create_manifest3(identifier, domain=None, page=None): # sort the files into originals and derivatives, splitting the derivatives into buckets based on the original originals = [] derivatives = {} + vttfiles = {} for f in metadata['files']: if f['source'] == 'derivative': if f['original'] in derivatives: @@ -514,6 +515,14 @@ def create_manifest3(identifier, domain=None, page=None): derivatives[f['original']] = {f['format']: f} elif f['source'] == 'original': originals.append(f) + + if f['format'] == 'Web Video Text Tracks': + # Example: 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt + sourceFilename = f['name'].replace('.autogenerated.vtt', '') + # Example: cruz-test.en.vtt + sourceFilename = sourceFilename.replace('[a-z][a-z].vtt', '') + + vttfiles[sourceFilename] = [f] # create the canvases for each original for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]: @@ -522,6 +531,32 @@ def create_manifest3(identifier, domain=None, page=None): c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas" c = Canvas(id=c_id, label=normalised_id, duration=float(file['length']), height=int(file['height']), width=int(file['width'])) + # Add vtt if present + if vttfiles and normalised_id in vttfiles: + vttAPId = f"{URI_PRIFIX}/{identifier}/{slugged_id}/vtt" + + vttNo = 1 + for vttFile in vttfiles[normalised_id]: + vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}", + motivation="supplementing", + target=c.id, + anno_page_id=vttAPId, + body={"id": f"https://archive.org/download/{identifier}/{vttFile['name']}", + "type": "Text", + "format": "text/vtt", + }) + # add label and language + if vttFile['name'].endswith("autogenerated.vtt"): + vtAnno.body.label = { 'en': ['autogenerated']} + else: + # Assume langauge + splitName = vttFile['name'].split(".") + lang = splitName[-2] + vtAnno.body.add_label(lang, language=lang) + vtAnno.body.language = lang + + vttNo += 1 + # create intermediary objects ap = AnnotationPage(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/page") anno = Annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation", motivation="painting", target=c.id) diff --git a/tests/test_video.py b/tests/test_video.py index 869ccc6..bfd4e45 100644 --- a/tests/test_video.py +++ b/tests/test_video.py @@ -22,5 +22,30 @@ def test_v3_h264_MPEG4_OGG_Theora(self): self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'") self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'") + def test_vtt_autogenerated(self): + resp = self.test_app.get("/iiif/3/youtube-SvH4fbjOT0A/manifest.json?recache=true") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}") + self.assertTrue('annotations' in manifest['items'][0], "Expected annotations in manifest") + self.assertTrue(isinstance(manifest['items'][0]['annotations'], list), "Expected annotations to be a list") + self.assertEqual(len(manifest['items'][0]['annotations']), 1, "Expected 1 item in annotations") + annotationPage = manifest['items'][0]['annotations'][0] + self.assertEqual(annotationPage['type'], 'AnnotationPage', "Expected annotations to contain annotation page") + + self.assertTrue('items' in annotationPage and isinstance(annotationPage['items'],list) and len(annotationPage['items']) == 1, f"Expected annotation page to contain a list of items which contains 1 item. Found {annotationPage['items']}") + annotation = annotationPage['items'][0] + self.assertEqual(annotation['type'], 'Annotation', "Expected annotationPage to contain annotations") + self.assertEqual(annotation['motivation'], 'supplementing', "Expected annotation to have the supplementing annotation") + self.assertTrue('body' in annotation, "Expected annotation to have a body") + body = annotation['body'] + self.assertEqual(body['type'],'Text', "Expected body to have a type text") + self.assertEqual(body['format'],'text/vtt', "Expected body to have a type text") + self.assertEqual(body['label']['en'][0], "autogenerated", "Expected VTT file to have the label autogenerated") + self.assertFalse("language" in body, "We don't know the language for this item so there shouldn't be a language specified") + self.assertEqual(body['id'], "https://archive.org/download/youtube-SvH4fbjOT0A/34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt","Unexpected URL for the VTT file") + + if __name__ == '__main__': unittest.main() \ No newline at end of file