diff --git a/README.md b/README.md index ee45d44..757ce1d 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,11 @@ Unit tests are in the `tests` folder and can be run with: python -m unittest discover -s tests ``` +Run single test: +``` +python -m unittest tests.test_video.TestVideo.test_vtt_autogenerated +``` + Retrieve large.jpg as 800px wide JPEG * http://127.0.0.1:8080/iiif/large.jpg/full/800,/0/default.jpg diff --git a/iiify/resolver.py b/iiify/resolver.py index 986c538..e12a3b5 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -506,6 +506,7 @@ def create_manifest3(identifier, domain=None, page=None): # sort the files into originals and derivatives, splitting the derivatives into buckets based on the original originals = [] derivatives = {} + vttfiles = {} for f in metadata['files']: if f['source'] == 'derivative': if f['original'] in derivatives: @@ -514,6 +515,14 @@ def create_manifest3(identifier, domain=None, page=None): derivatives[f['original']] = {f['format']: f} elif f['source'] == 'original': originals.append(f) + + if f['format'] == 'Web Video Text Tracks': + # Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt + sourceFilename = re.sub('\.[a-zA-H-]*\.vtt', '', f['name']) + if sourceFilename not in vttfiles: + vttfiles[sourceFilename] = [] + + vttfiles[sourceFilename].append(f) # create the canvases for each original for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]: @@ -522,6 +531,32 @@ def create_manifest3(identifier, domain=None, page=None): c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas" c = Canvas(id=c_id, label=normalised_id, duration=float(file['length']), height=int(file['height']), width=int(file['width'])) + # Add vtt if present + if vttfiles and normalised_id in vttfiles: + vttAPId = f"{URI_PRIFIX}/{identifier}/{slugged_id}/vtt" + + vttNo = 1 + for vttFile in vttfiles[normalised_id]: + vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}", + motivation="supplementing", + target=c.id, + anno_page_id=vttAPId, + body={"id": f"{domain}resource/{identifier}/{vttFile['name']}", + "type": "Text", + "format": "text/vtt", + }) + # add label and language + if vttFile['name'].endswith("autogenerated.vtt"): + vtAnno.body.label = { 'en': ['autogenerated']} + else: + # Assume language + splitName = vttFile['name'].split(".") + lang = splitName[-2] + vtAnno.body.add_label(lang, language="none") + vtAnno.body.language = lang + + vttNo += 1 + # create intermediary objects ap = AnnotationPage(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/page") anno = Annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation", motivation="painting", target=c.id) diff --git a/nginx-vhost.conf b/nginx-vhost.conf index bf7645e..0819c31 100644 --- a/nginx-vhost.conf +++ b/nginx-vhost.conf @@ -37,4 +37,11 @@ server { # Reverse proxy with the variables captured above proxy_pass https://cantaloupe.prod.archive.org/iiif/$1/$2; } + + location /iiif/resource/ { + add_header 'Access-Control-Allow-Origin' '*' always; + add_header 'Access-Control-Allow-Methods' 'GET, HEAD, POST, PUT, PATCH, DELETE' always; + # https://archive.org/download/cruz-test/cruz-test.af.vtt + proxy_pass https://archive.org/download/; + } } diff --git a/tests/test_manifests.py b/tests/test_manifests.py index 3fa25ba..3e82cfc 100644 --- a/tests/test_manifests.py +++ b/tests/test_manifests.py @@ -42,21 +42,13 @@ def test_v3_single_text_manifest(self): self.assertEqual(manifest['type'], "Manifest", f"Unexpected type. Expected Manifest go {manifest['type']}") self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}") - def test_v3_vermont_Life_Magazine(self): resp = self.test_app.get("/iiif/3/rbmsbk_ap2-v4_2001_V55N4/manifest.json") self.assertEqual(resp.status_code, 200) manifest = resp.json self.assertEqual(len(manifest['items']),116,f"Expected 116 canvas but got: {len(manifest['items'])}") - - def test_v3_single_video_manifest(self): - resp = self.test_app.get("/iiif/3/youtube-7w8F2Xi3vFw/manifest.json") - self.assertEqual(resp.status_code, 200) - manifest = resp.json - - self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}") - + #logic to cover etree mediatype github issue #123 def test_v3_etree_mediatype(self): resp = self.test_app.get("/iiif/3/gd72-04-14.aud.vernon.23662.sbeok.shnf/manifest.json") @@ -66,7 +58,6 @@ def test_v3_etree_mediatype(self): self.assertEqual(len(manifest['items']),36,f"Expected 36 canvases but got: {len(manifest['items'])}") self.assertEqual(manifest['items'][0]['items'][0]['items'][0]['body']['items'][0]['type'],"Sound",f"Expected 'Sound' but got: {manifest['items'][0]['items'][0]['items'][0]['body']['items'][0]['type']}") - def test_v3_64Kbps_MP3(self): resp = self.test_app.get("/iiif/3/TvQuran.com__Alafasi/manifest.json") self.assertEqual(resp.status_code, 200) @@ -74,7 +65,6 @@ def test_v3_64Kbps_MP3(self): self.assertEqual(len(manifest['items']),114,f"Expected 114 canvases but got: {len(manifest['items'])}") self.assertEqual("64Kbps MP3".lower() in resp.text.lower(), True, f"Expected the string '64Kbps MP3'") - def test_v3_128Kbps_MP3(self): resp = self.test_app.get("/iiif/3/alice_in_wonderland_librivox/manifest.json") self.assertEqual(resp.status_code, 200) @@ -82,14 +72,6 @@ def test_v3_128Kbps_MP3(self): self.assertEqual(len(manifest['items']),12,f"Expected 12 canvases but got: {len(manifest['items'])}") self.assertEqual("128kbps mp3".lower() in resp.text.lower(), True, f"Expected the string '128kbps mp3'") - def test_v3_h264_MPEG4_OGG_Theora(self): - resp = self.test_app.get("/iiif/3/taboca_201002_03/manifest.json") - self.assertEqual(resp.status_code, 200) - manifest = resp.json - self.assertEqual(len(manifest['items']),251,f"Expected 251 canvases but got: {len(manifest['items'])}") - self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'") - self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'") - def test_v3_aiff(self): resp = self.test_app.get("/iiif/3/PDextend_AIFF/manifest.json") self.assertEqual(resp.status_code, 200) diff --git a/tests/test_video.py b/tests/test_video.py new file mode 100644 index 0000000..7644f81 --- /dev/null +++ b/tests/test_video.py @@ -0,0 +1,68 @@ +import unittest +from flask.testing import FlaskClient +from iiify.app import app + +class TestVideo(unittest.TestCase): + + def setUp(self) -> None: + self.test_app = FlaskClient(app) + + def test_v3_single_video_manifest(self): + resp = self.test_app.get("/iiif/3/youtube-7w8F2Xi3vFw/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}") + + def test_v3_h264_MPEG4_OGG_Theora(self): + resp = self.test_app.get("/iiif/3/taboca_201002_03/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + self.assertEqual(len(manifest['items']),251,f"Expected 251 canvases but got: {len(manifest['items'])}") + self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'") + self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'") + + def test_vtt_autogenerated(self): + resp = self.test_app.get("/iiif/3/youtube-SvH4fbjOT0A/manifest.json?recache=true") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}") + self.assertTrue('annotations' in manifest['items'][0], "Expected annotations in manifest") + self.assertTrue(isinstance(manifest['items'][0]['annotations'], list), "Expected annotations to be a list") + self.assertEqual(len(manifest['items'][0]['annotations']), 1, "Expected 1 item in annotations") + annotationPage = manifest['items'][0]['annotations'][0] + self.assertEqual(annotationPage['type'], 'AnnotationPage', "Expected annotations to contain annotation page") + + self.assertTrue('items' in annotationPage and isinstance(annotationPage['items'],list) and len(annotationPage['items']) == 1, f"Expected annotation page to contain a list of items which contains 1 item. Found {annotationPage['items']}") + annotation = annotationPage['items'][0] + self.assertEqual(annotation['type'], 'Annotation', "Expected annotationPage to contain annotations") + self.assertEqual(annotation['motivation'], 'supplementing', "Expected annotation to have the supplementing annotation") + self.assertTrue('body' in annotation, "Expected annotation to have a body") + body = annotation['body'] + self.assertEqual(body['type'],'Text', "Expected body to have a type text") + self.assertEqual(body['format'],'text/vtt', "Expected body to have a type text") + self.assertEqual(body['label']['en'][0], "autogenerated", "Expected VTT file to have the label autogenerated") + self.assertFalse("language" in body, "We don't know the language for this item so there shouldn't be a language specified") + self.assertEqual(body['id'], "https://localhost/iiif/resource/youtube-SvH4fbjOT0A/34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt","Unexpected URL for the VTT file") + + def test_vtt_multilingual(self): + resp = self.test_app.get("/iiif/3/cruz-test/manifest.json?recache=true") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + canvas = manifest['items'][0] + self.assertTrue('annotations' in canvas, 'Expected annotations in Canvas') + self.assertEqual(len(canvas['annotations']), 1, 'Expected one AnnotationPage') + annotations = canvas['annotations'][0]['items'] + self.assertEqual(len(annotations), 104, 'Expected all 104 langues') + + # Check welsh + for item in annotations: + self.assertTrue('language' in item['body'], f"All vtt files should have a language: {item}") + if item['body']['language'] == 'cy': + self.assertEqual(item['body']['id'], 'https://localhost/iiif/resource/cruz-test/cruz-test.cy.vtt', 'Unexpected link for the Welsh vtt file') + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file