From 0cc45f709bdb493db0216d26ef670ff2f2d00c61 Mon Sep 17 00:00:00 2001 From: Glen Robson Date: Fri, 26 Apr 2024 11:37:09 +0100 Subject: [PATCH 1/8] Extracting video tests --- tests/test_manifests.py | 20 +------------------- tests/test_video.py | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 19 deletions(-) create mode 100644 tests/test_video.py diff --git a/tests/test_manifests.py b/tests/test_manifests.py index 3fa25ba..3e82cfc 100644 --- a/tests/test_manifests.py +++ b/tests/test_manifests.py @@ -42,21 +42,13 @@ def test_v3_single_text_manifest(self): self.assertEqual(manifest['type'], "Manifest", f"Unexpected type. Expected Manifest go {manifest['type']}") self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}") - def test_v3_vermont_Life_Magazine(self): resp = self.test_app.get("/iiif/3/rbmsbk_ap2-v4_2001_V55N4/manifest.json") self.assertEqual(resp.status_code, 200) manifest = resp.json self.assertEqual(len(manifest['items']),116,f"Expected 116 canvas but got: {len(manifest['items'])}") - - def test_v3_single_video_manifest(self): - resp = self.test_app.get("/iiif/3/youtube-7w8F2Xi3vFw/manifest.json") - self.assertEqual(resp.status_code, 200) - manifest = resp.json - - self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}") - + #logic to cover etree mediatype github issue #123 def test_v3_etree_mediatype(self): resp = self.test_app.get("/iiif/3/gd72-04-14.aud.vernon.23662.sbeok.shnf/manifest.json") @@ -66,7 +58,6 @@ def test_v3_etree_mediatype(self): self.assertEqual(len(manifest['items']),36,f"Expected 36 canvases but got: {len(manifest['items'])}") self.assertEqual(manifest['items'][0]['items'][0]['items'][0]['body']['items'][0]['type'],"Sound",f"Expected 'Sound' but got: {manifest['items'][0]['items'][0]['items'][0]['body']['items'][0]['type']}") - def test_v3_64Kbps_MP3(self): resp = self.test_app.get("/iiif/3/TvQuran.com__Alafasi/manifest.json") self.assertEqual(resp.status_code, 200) @@ -74,7 +65,6 @@ def test_v3_64Kbps_MP3(self): self.assertEqual(len(manifest['items']),114,f"Expected 114 canvases but got: {len(manifest['items'])}") self.assertEqual("64Kbps MP3".lower() in resp.text.lower(), True, f"Expected the string '64Kbps MP3'") - def test_v3_128Kbps_MP3(self): resp = self.test_app.get("/iiif/3/alice_in_wonderland_librivox/manifest.json") self.assertEqual(resp.status_code, 200) @@ -82,14 +72,6 @@ def test_v3_128Kbps_MP3(self): self.assertEqual(len(manifest['items']),12,f"Expected 12 canvases but got: {len(manifest['items'])}") self.assertEqual("128kbps mp3".lower() in resp.text.lower(), True, f"Expected the string '128kbps mp3'") - def test_v3_h264_MPEG4_OGG_Theora(self): - resp = self.test_app.get("/iiif/3/taboca_201002_03/manifest.json") - self.assertEqual(resp.status_code, 200) - manifest = resp.json - self.assertEqual(len(manifest['items']),251,f"Expected 251 canvases but got: {len(manifest['items'])}") - self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'") - self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'") - def test_v3_aiff(self): resp = self.test_app.get("/iiif/3/PDextend_AIFF/manifest.json") self.assertEqual(resp.status_code, 200) diff --git a/tests/test_video.py b/tests/test_video.py new file mode 100644 index 0000000..869ccc6 --- /dev/null +++ b/tests/test_video.py @@ -0,0 +1,26 @@ +import unittest +from flask.testing import FlaskClient +from iiify.app import app + +class TestVideo(unittest.TestCase): + + def setUp(self) -> None: + self.test_app = FlaskClient(app) + + def test_v3_single_video_manifest(self): + resp = self.test_app.get("/iiif/3/youtube-7w8F2Xi3vFw/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}") + + def test_v3_h264_MPEG4_OGG_Theora(self): + resp = self.test_app.get("/iiif/3/taboca_201002_03/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + self.assertEqual(len(manifest['items']),251,f"Expected 251 canvases but got: {len(manifest['items'])}") + self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'") + self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'") + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 23e70c406595d4b3da0d6a6634c8fe9832ec1866 Mon Sep 17 00:00:00 2001 From: Glen Robson Date: Fri, 26 Apr 2024 12:54:28 +0100 Subject: [PATCH 2/8] Adding example for single test --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index ee45d44..757ce1d 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,11 @@ Unit tests are in the `tests` folder and can be run with: python -m unittest discover -s tests ``` +Run single test: +``` +python -m unittest tests.test_video.TestVideo.test_vtt_autogenerated +``` + Retrieve large.jpg as 800px wide JPEG * http://127.0.0.1:8080/iiif/large.jpg/full/800,/0/default.jpg From b45bd71e50c4559ff7ce2405d1dc72e63bdfe66e Mon Sep 17 00:00:00 2001 From: Glen Robson Date: Fri, 26 Apr 2024 12:55:09 +0100 Subject: [PATCH 3/8] Adding support for autogenerated vtt files --- iiify/resolver.py | 35 +++++++++++++++++++++++++++++++++++ tests/test_video.py | 25 +++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/iiify/resolver.py b/iiify/resolver.py index 986c538..8208438 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -506,6 +506,7 @@ def create_manifest3(identifier, domain=None, page=None): # sort the files into originals and derivatives, splitting the derivatives into buckets based on the original originals = [] derivatives = {} + vttfiles = {} for f in metadata['files']: if f['source'] == 'derivative': if f['original'] in derivatives: @@ -514,6 +515,14 @@ def create_manifest3(identifier, domain=None, page=None): derivatives[f['original']] = {f['format']: f} elif f['source'] == 'original': originals.append(f) + + if f['format'] == 'Web Video Text Tracks': + # Example: 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt + sourceFilename = f['name'].replace('.autogenerated.vtt', '') + # Example: cruz-test.en.vtt + sourceFilename = sourceFilename.replace('[a-z][a-z].vtt', '') + + vttfiles[sourceFilename] = [f] # create the canvases for each original for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]: @@ -522,6 +531,32 @@ def create_manifest3(identifier, domain=None, page=None): c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas" c = Canvas(id=c_id, label=normalised_id, duration=float(file['length']), height=int(file['height']), width=int(file['width'])) + # Add vtt if present + if vttfiles and normalised_id in vttfiles: + vttAPId = f"{URI_PRIFIX}/{identifier}/{slugged_id}/vtt" + + vttNo = 1 + for vttFile in vttfiles[normalised_id]: + vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}", + motivation="supplementing", + target=c.id, + anno_page_id=vttAPId, + body={"id": f"https://archive.org/download/{identifier}/{vttFile['name']}", + "type": "Text", + "format": "text/vtt", + }) + # add label and language + if vttFile['name'].endswith("autogenerated.vtt"): + vtAnno.body.label = { 'en': ['autogenerated']} + else: + # Assume langauge + splitName = vttFile['name'].split(".") + lang = splitName[-2] + vtAnno.body.add_label(lang, language=lang) + vtAnno.body.language = lang + + vttNo += 1 + # create intermediary objects ap = AnnotationPage(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/page") anno = Annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation", motivation="painting", target=c.id) diff --git a/tests/test_video.py b/tests/test_video.py index 869ccc6..bfd4e45 100644 --- a/tests/test_video.py +++ b/tests/test_video.py @@ -22,5 +22,30 @@ def test_v3_h264_MPEG4_OGG_Theora(self): self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'") self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'") + def test_vtt_autogenerated(self): + resp = self.test_app.get("/iiif/3/youtube-SvH4fbjOT0A/manifest.json?recache=true") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}") + self.assertTrue('annotations' in manifest['items'][0], "Expected annotations in manifest") + self.assertTrue(isinstance(manifest['items'][0]['annotations'], list), "Expected annotations to be a list") + self.assertEqual(len(manifest['items'][0]['annotations']), 1, "Expected 1 item in annotations") + annotationPage = manifest['items'][0]['annotations'][0] + self.assertEqual(annotationPage['type'], 'AnnotationPage', "Expected annotations to contain annotation page") + + self.assertTrue('items' in annotationPage and isinstance(annotationPage['items'],list) and len(annotationPage['items']) == 1, f"Expected annotation page to contain a list of items which contains 1 item. Found {annotationPage['items']}") + annotation = annotationPage['items'][0] + self.assertEqual(annotation['type'], 'Annotation', "Expected annotationPage to contain annotations") + self.assertEqual(annotation['motivation'], 'supplementing', "Expected annotation to have the supplementing annotation") + self.assertTrue('body' in annotation, "Expected annotation to have a body") + body = annotation['body'] + self.assertEqual(body['type'],'Text', "Expected body to have a type text") + self.assertEqual(body['format'],'text/vtt', "Expected body to have a type text") + self.assertEqual(body['label']['en'][0], "autogenerated", "Expected VTT file to have the label autogenerated") + self.assertFalse("language" in body, "We don't know the language for this item so there shouldn't be a language specified") + self.assertEqual(body['id'], "https://archive.org/download/youtube-SvH4fbjOT0A/34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt","Unexpected URL for the VTT file") + + if __name__ == '__main__': unittest.main() \ No newline at end of file From f5df7febf50a02ca15eb8c5b83cb0fa67c116d96 Mon Sep 17 00:00:00 2001 From: Glen Robson Date: Thu, 16 May 2024 21:56:25 +0100 Subject: [PATCH 4/8] Adding support for multilingual vtt files --- iiify/resolver.py | 14 +++++++------- tests/test_video.py | 17 +++++++++++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/iiify/resolver.py b/iiify/resolver.py index 8208438..ec31098 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -517,12 +517,12 @@ def create_manifest3(identifier, domain=None, page=None): originals.append(f) if f['format'] == 'Web Video Text Tracks': - # Example: 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt - sourceFilename = f['name'].replace('.autogenerated.vtt', '') - # Example: cruz-test.en.vtt - sourceFilename = sourceFilename.replace('[a-z][a-z].vtt', '') - - vttfiles[sourceFilename] = [f] + # Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt + sourceFilename = re.sub('\.[a-zA-H-]*\.vtt', '', f['name']) + if sourceFilename not in vttfiles: + vttfiles[sourceFilename] = [] + + vttfiles[sourceFilename].append(f) # create the canvases for each original for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]: @@ -552,7 +552,7 @@ def create_manifest3(identifier, domain=None, page=None): # Assume langauge splitName = vttFile['name'].split(".") lang = splitName[-2] - vtAnno.body.add_label(lang, language=lang) + vtAnno.body.add_label(lang, language="none") vtAnno.body.language = lang vttNo += 1 diff --git a/tests/test_video.py b/tests/test_video.py index bfd4e45..3ade5bb 100644 --- a/tests/test_video.py +++ b/tests/test_video.py @@ -46,6 +46,23 @@ def test_vtt_autogenerated(self): self.assertFalse("language" in body, "We don't know the language for this item so there shouldn't be a language specified") self.assertEqual(body['id'], "https://archive.org/download/youtube-SvH4fbjOT0A/34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt","Unexpected URL for the VTT file") + def test_vtt_multilingual(self): + resp = self.test_app.get("/iiif/3/cruz-test/manifest.json?recache=true") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + canvas = manifest['items'][0] + self.assertTrue('annotations' in canvas, 'Expected annotations in Canvas') + self.assertEqual(len(canvas['annotations']), 1, 'Expected one AnnotationPage') + annotations = canvas['annotations'][0]['items'] + self.assertEqual(len(annotations), 104, 'Expected all 104 langues') + + # Check welsh + for item in annotations: + self.assertTrue('language' in item['body'], f"All vtt files should have a language: {item}") + if item['body']['language'] == 'cy': + self.assertEqual(item['body']['id'], 'https://archive.org/download/cruz-test/cruz-test.cy.vtt', 'Unexpected link for the Welsh vtt file') + if __name__ == '__main__': unittest.main() \ No newline at end of file From 9682df7d0a494f7e12fcccc4ed31ba5e36628dbc Mon Sep 17 00:00:00 2001 From: Glen Robson Date: Fri, 17 May 2024 12:58:26 +0100 Subject: [PATCH 5/8] Adding proxy for IA download resources --- nginx-vhost.conf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nginx-vhost.conf b/nginx-vhost.conf index bf7645e..f9ca165 100644 --- a/nginx-vhost.conf +++ b/nginx-vhost.conf @@ -37,4 +37,9 @@ server { # Reverse proxy with the variables captured above proxy_pass https://cantaloupe.prod.archive.org/iiif/$1/$2; } + + location /iiif/resource/ { + # https://archive.org/download/cruz-test/cruz-test.af.vtt + proxy_pass https://archive.org/download/; + } } From cd36dfef0905f2c67b1cc635d2fc784300fd2208 Mon Sep 17 00:00:00 2001 From: Glen Robson Date: Fri, 17 May 2024 13:10:09 +0100 Subject: [PATCH 6/8] Adding CORS headers --- nginx-vhost.conf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nginx-vhost.conf b/nginx-vhost.conf index f9ca165..0819c31 100644 --- a/nginx-vhost.conf +++ b/nginx-vhost.conf @@ -39,6 +39,8 @@ server { } location /iiif/resource/ { + add_header 'Access-Control-Allow-Origin' '*' always; + add_header 'Access-Control-Allow-Methods' 'GET, HEAD, POST, PUT, PATCH, DELETE' always; # https://archive.org/download/cruz-test/cruz-test.af.vtt proxy_pass https://archive.org/download/; } From 12ace1bcf8710f9bdaa28af1176d03bd4de12ddf Mon Sep 17 00:00:00 2001 From: Glen Robson Date: Fri, 17 May 2024 16:06:32 +0100 Subject: [PATCH 7/8] Using proxy for vtt file --- iiify/resolver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iiify/resolver.py b/iiify/resolver.py index ec31098..e12a3b5 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -541,7 +541,7 @@ def create_manifest3(identifier, domain=None, page=None): motivation="supplementing", target=c.id, anno_page_id=vttAPId, - body={"id": f"https://archive.org/download/{identifier}/{vttFile['name']}", + body={"id": f"{domain}resource/{identifier}/{vttFile['name']}", "type": "Text", "format": "text/vtt", }) @@ -549,7 +549,7 @@ def create_manifest3(identifier, domain=None, page=None): if vttFile['name'].endswith("autogenerated.vtt"): vtAnno.body.label = { 'en': ['autogenerated']} else: - # Assume langauge + # Assume language splitName = vttFile['name'].split(".") lang = splitName[-2] vtAnno.body.add_label(lang, language="none") From 65c97f909d16af65ffb82bf374816df5532b9189 Mon Sep 17 00:00:00 2001 From: Glen Robson Date: Fri, 17 May 2024 16:08:41 +0100 Subject: [PATCH 8/8] Proxying vtt files --- tests/test_video.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_video.py b/tests/test_video.py index 3ade5bb..7644f81 100644 --- a/tests/test_video.py +++ b/tests/test_video.py @@ -44,7 +44,7 @@ def test_vtt_autogenerated(self): self.assertEqual(body['format'],'text/vtt', "Expected body to have a type text") self.assertEqual(body['label']['en'][0], "autogenerated", "Expected VTT file to have the label autogenerated") self.assertFalse("language" in body, "We don't know the language for this item so there shouldn't be a language specified") - self.assertEqual(body['id'], "https://archive.org/download/youtube-SvH4fbjOT0A/34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt","Unexpected URL for the VTT file") + self.assertEqual(body['id'], "https://localhost/iiif/resource/youtube-SvH4fbjOT0A/34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt","Unexpected URL for the VTT file") def test_vtt_multilingual(self): resp = self.test_app.get("/iiif/3/cruz-test/manifest.json?recache=true") @@ -61,7 +61,7 @@ def test_vtt_multilingual(self): for item in annotations: self.assertTrue('language' in item['body'], f"All vtt files should have a language: {item}") if item['body']['language'] == 'cy': - self.assertEqual(item['body']['id'], 'https://archive.org/download/cruz-test/cruz-test.cy.vtt', 'Unexpected link for the Welsh vtt file') + self.assertEqual(item['body']['id'], 'https://localhost/iiif/resource/cruz-test/cruz-test.cy.vtt', 'Unexpected link for the Welsh vtt file') if __name__ == '__main__':