diff --git a/README.md b/README.md index ee45d44..757ce1d 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,11 @@ Unit tests are in the `tests` folder and can be run with: python -m unittest discover -s tests ``` +Run single test: +``` +python -m unittest tests.test_video.TestVideo.test_vtt_autogenerated +``` + Retrieve large.jpg as 800px wide JPEG * http://127.0.0.1:8080/iiif/large.jpg/full/800,/0/default.jpg diff --git a/iiify/configs/__init__.py b/iiify/configs/__init__.py index ef4bce7..9bee0ff 100644 --- a/iiify/configs/__init__.py +++ b/iiify/configs/__init__.py @@ -14,6 +14,7 @@ import sys import types import configparser +import json path = os.path.dirname(os.path.realpath(__file__)) approot = os.path.abspath(os.path.join(path, os.pardir)) @@ -75,3 +76,6 @@ def getdef(self, section, option, default_value): "long": 432000, # 5 days "longest": 2592000 # 30 days } + +with open('%s/links.json' % path, 'r') as file: + LINKS = json.load(file) \ No newline at end of file diff --git a/iiify/configs/links.json b/iiify/configs/links.json new file mode 100644 index 0000000..58f2174 --- /dev/null +++ b/iiify/configs/links.json @@ -0,0 +1,137 @@ +{ + "Animated GIF": { + "field": "rendering", + "type": "Image", + "format": "image/gif" + }, + "Text PDF": { + "field": "rendering", + "type": "Text", + "format": "application/pdf" + }, + "Abbyy GZ": { + "field": "rendering", + "type": "Dataset", + "format": "application/gzip" + }, + "Archive BitTorrent": { + "field": "rendering", + "type": "Dataset", + "format": "application/x-bittorrent" + }, + "Grayscale PDF": { + "field": "rendering", + "type": "Text", + "format": "application/pdf" + }, + "chOCR": { + "field": "rendering", + "type": "Text", + "format": "application/gzip" + }, + "DjVuTXT": { + "field": "rendering", + "type": "Text", + "format": "text/plain" + }, + "Djvu XML": { + "field": "rendering", + "type": "Dataset", + "format": "application/xml" + }, + "hOCR": { + "field": "rendering", + "type": "Text", + "format": "text/html" + }, + "Single Page Processed JP2 ZIP": { + "field": "rendering", + "type": "Image", + "format": "application/zip" + }, + "OCR Search Text": { + "field": "rendering", + "type": "Text", + "format": "application/gzip" + }, + "Single Page Original JP2 Tar": { + "field": "rendering", + "type": "Image", + "format": "application/x-tar" + }, + "DjVu": { + "field": "rendering", + "type": "Image", + "format": "image/vnd.djvu" + }, + "Cloth Cover Detection Log": { + "field": "seeAlso", + "type": "Text", + "format": "text/plain" + }, + "Dublin Core": { + "field": "seeAlso", + "type": "Dataset", + "format": "application/xml" + }, + "OCR Page Index": { + "field": "seeAlso", + "type": "Dataset", + "format": "application/json" + }, + "MARC": { + "field": "seeAlso", + "type": "Dataset", + "format": "application/xml" + }, + "MARC Binary": { + "field": "seeAlso", + "type": "Dataset", + "format": "application/marc" + }, + "MARC Source": { + "field": "seeAlso", + "type": "Dataset", + "format": "application/xml" + }, + "Page Numbers JSON": { + "field": "seeAlso", + "type": "Dataset", + "format": "application/json" + }, + "Scandata": { + "field": "seeAlso", + "type": "Dataset", + "format": "application/xml" + }, + "SubRip": { + "field": "rendering", + "type": "Text", + "format": "text/plain" + }, + "Web Video Text Tracks": { + "field": "rendering", + "type": "Text", + "format": "text/vtt" + }, + "Intermediate ASR JSON": { + "field": "rendering", + "type": "Text", + "format": "application/json" + }, + "Whisper ASR JSON": { + "field": "rendering", + "type": "Text", + "format": "application/json" + }, + "Storj Upload Log": { + "field": "seeAlso", + "type": "Text", + "format": "text/plain" + }, + "Storj Upload Trigger": { + "field": "seeAlso", + "type": "Text", + "format": "text/plain" + } +} \ No newline at end of file diff --git a/iiify/resolver.py b/iiify/resolver.py index 53afdfd..a972e00 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -3,8 +3,9 @@ import os import requests from iiif2 import iiif, web -from .configs import options, cors, approot, cache_root, media_root, apiurl +from .configs import options, cors, approot, cache_root, media_root, apiurl, LINKS from iiif_prezi3 import Manifest, config, Annotation, AnnotationPage,AnnotationPageRef, Canvas, Manifest, ResourceItem, ServiceItem, Choice, Collection, ManifestRef, CollectionRef + from urllib.parse import urlparse, parse_qs, quote import json import math @@ -13,7 +14,7 @@ IMG_CTX = 'http://iiif.io/api/image/2/context.json' PRZ_CTX = 'http://iiif.io/api/presentation/2/context.json' -ARCHIVE = 'http://archive.org' +ARCHIVE = 'https://archive.org' IMG_SRV = 'https://iiif.archive.org/image/iiif' METADATA_FIELDS = ("title", "volume", "publisher", "subject", "date", "contributor", "creator") bookdata = 'http://%s/BookReader/BookReaderJSON.php' @@ -385,7 +386,66 @@ def addMetadata(item, identifier, metadata, collection=False): item.metadata = manifest_metadata +def addSeeAlso(manifest, identifier, files): + + manifest.seeAlso = [ + {"id": f"{ARCHIVE}/metadata/{identifier}", + "type": "Metadata", + "label": {"en": ["Item Metadata"]}, + "format": "application/json"} + ] + + # Type format from IA Metadata -> Type description in IIIF + SEEALSO_TYPES = { + "Abbyy GZ": "OCR Data", + "Abbyy XML": "OCR Data", + "Djvu XML": "OCR Data", + "Scandata": "OCR Data", + "Archive BitTorrent": "Torrent", + "Metadata": "Metadata", + } + + for file in files: + if file['format'] in LINKS and LINKS[file['format']]['field'] == 'seeAlso': + seeAlso = LINKS[file['format']] + manifest.seeAlso.append( + {"id": f"{ARCHIVE}/download/{identifier}/{file['name']}", + "type": seeAlso['type'], + "label": {"en": [file["format"]]}, + "format": seeAlso['format'] + }) + + +def addRendering(manifest, identifier, files): + manifest.rendering = [] + + for file in files: + if file['format'] in LINKS and LINKS[file['format']]['field'] == 'rendering': + rendering = LINKS[file['format']] + manifest.rendering.append( + {"id": f"{ARCHIVE}/download/{identifier}/{file['name']}", + "type": rendering['type'], + "label": {"en": [file["format"]]}, + "format": rendering['format'] + }) + +def addThumbnails(manifest, identifier, files): + thumbnails = [] + + for file in files: + if file['format'] == "Thumbnail": + mimetype = "image/jpeg" + if file['name'].endswith('.png'): + mimetype = "image/png" + + thumbnails.append({ + "id": f"{ARCHIVE}/download/{identifier}/{file['name']}", + "type": "Image", + "format": mimetype, + }) + if thumbnails: + manifest.thumbnail = thumbnails def create_manifest3(identifier, domain=None, page=None): # Get item metadata @@ -401,6 +461,9 @@ def create_manifest3(identifier, domain=None, page=None): manifest = Manifest(id=f"{uri}/manifest.json", label=metadata["metadata"]["title"]) addMetadata(manifest, identifier, metadata['metadata']) + addSeeAlso(manifest, identifier, metadata['files']) + addRendering(manifest, identifier, metadata['files']) + addThumbnails(manifest, identifier, metadata['files']) if mediatype == 'texts': # Get bookreader metadata (mostly for filenames and height / width of image) @@ -570,6 +633,7 @@ def create_manifest3(identifier, domain=None, page=None): # sort the files into originals and derivatives, splitting the derivatives into buckets based on the original originals = [] derivatives = {} + vttfiles = {} for f in metadata['files']: if f['source'] == 'derivative': if f['original'] in derivatives: @@ -578,6 +642,14 @@ def create_manifest3(identifier, domain=None, page=None): derivatives[f['original']] = {f['format']: f} elif f['source'] == 'original': originals.append(f) + + if f['format'] == 'Web Video Text Tracks': + # Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt + sourceFilename = re.sub('\.[a-zA-H-]*\.vtt', '', f['name']) + if sourceFilename not in vttfiles: + vttfiles[sourceFilename] = [] + + vttfiles[sourceFilename].append(f) # create the canvases for each original for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]: @@ -586,6 +658,32 @@ def create_manifest3(identifier, domain=None, page=None): c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas" c = Canvas(id=c_id, label=normalised_id, duration=float(file['length']), height=int(file['height']), width=int(file['width'])) + # Add vtt if present + if vttfiles and normalised_id in vttfiles: + vttAPId = f"{URI_PRIFIX}/{identifier}/{slugged_id}/vtt" + + vttNo = 1 + for vttFile in vttfiles[normalised_id]: + vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}", + motivation="supplementing", + target=c.id, + anno_page_id=vttAPId, + body={"id": f"{domain}resource/{identifier}/{vttFile['name']}", + "type": "Text", + "format": "text/vtt", + }) + # add label and language + if vttFile['name'].endswith("autogenerated.vtt"): + vtAnno.body.label = { 'en': ['autogenerated']} + else: + # Assume language + splitName = vttFile['name'].split(".") + lang = splitName[-2] + vtAnno.body.add_label(lang, language="none") + vtAnno.body.language = lang + + vttNo += 1 + # create intermediary objects ap = AnnotationPage(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/page") anno = Annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation", motivation="painting", target=c.id) @@ -745,7 +843,6 @@ def ia_resolver(identifier): def cantaloupe_resolver(identifier): """Resolves an existing Image Service identifier to what it should be with the new Cantaloupe setup""" - leaf = None if "$" in identifier: identifier, leaf = identifier.split("$", 1) @@ -757,7 +854,6 @@ def cantaloupe_resolver(identifier): mediatype = metadata['metadata']['mediatype'].lower() files = metadata['files'] - if mediatype == "image": # single image file - find the filename @@ -808,11 +904,12 @@ def cantaloupe_resolver(identifier): #filename = next(f for f in files if f['source'].lower() == 'derivative' \ # and f['name'].endswith('_jp2.zip'))['name'] + print("end of logic - filename:", filename) if filename: dirpath = filename[:-4] filepath = f"{fileIdentifier}_{leaf.zfill(4)}{extension}" return f"{identifier}%2f{filename}%2f{dirpath}%2f{filepath}" - # print (f'images not found for {identifier}') - # for f in files: - # print (f"source: {f['source'].lower()} name: {f['name']} and {f['source'].lower() == 'derivative'} {f['name'].endswith('_jp2.zip')}") + # print (f'images not found for {identifier}') + # for f in files: + # print (f"source: {f['source'].lower()} name: {f['name']} and {f['source'].lower() == 'derivative'} {f['name'].endswith('_jp2.zip')}") diff --git a/nginx-vhost.conf b/nginx-vhost.conf index bf7645e..0819c31 100644 --- a/nginx-vhost.conf +++ b/nginx-vhost.conf @@ -37,4 +37,11 @@ server { # Reverse proxy with the variables captured above proxy_pass https://cantaloupe.prod.archive.org/iiif/$1/$2; } + + location /iiif/resource/ { + add_header 'Access-Control-Allow-Origin' '*' always; + add_header 'Access-Control-Allow-Methods' 'GET, HEAD, POST, PUT, PATCH, DELETE' always; + # https://archive.org/download/cruz-test/cruz-test.af.vtt + proxy_pass https://archive.org/download/; + } } diff --git a/tests/test_linking.py b/tests/test_linking.py new file mode 100644 index 0000000..20ffb65 --- /dev/null +++ b/tests/test_linking.py @@ -0,0 +1,102 @@ +import unittest +from flask.testing import FlaskClient +from iiify.app import app + +class TestLinking(unittest.TestCase): + + def setUp(self) -> None: + self.test_app = FlaskClient(app) + + def convertListToHash(self, items): + map = {} + for item in items: + map[item['label']['en'][0]] = item + return map + + def checkLink(self, map, field, name, value): + self.assertTrue(name in map, f"Expected to find {name} in {field}") + + self.assertEqual(map[name]['id'], value, f"Expected {value} in {map[name]}") + + def test_v3_image_links(self): + resp = self.test_app.get("/iiif/3/journalofexpedit00ford/manifest.json?recache=true") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + self.assertTrue('rendering' in manifest, "Expected rendering in Manifest") + renderingMap = self.convertListToHash(manifest['rendering']) + # Animated GIF - rendering + self.checkLink(renderingMap, "rendering", "Animated GIF", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford.gif") + # Text PDF - rendering + self.checkLink(renderingMap, "rendering", "Text PDF", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford.pdf") + # Abbyy GZ - rendering + self.checkLink(renderingMap, "rendering", "Abbyy GZ", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_abbyy.gz") + # Archive BitTorrent - rendering + self.checkLink(renderingMap, "rendering", "Archive BitTorrent", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_archive.torrent") + # Grayscale PDF - rendering + self.checkLink(renderingMap, "rendering", "Grayscale PDF", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_bw.pdf") + # chOCR - rendering + self.checkLink(renderingMap, "rendering", "chOCR", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_chocr.html.gz") + # DjVuTXT - rendering + self.checkLink(renderingMap, "rendering", "DjVuTXT", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_djvu.txt") + # Djvu XML - rendering + self.checkLink(renderingMap, "rendering", "Djvu XML", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_djvu.xml") + # hOCR - rendering + self.checkLink(renderingMap, "rendering", "hOCR", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_hocr.html") + # Single Page Processed JP2 ZIP - rendering + self.checkLink(renderingMap, "rendering", "Single Page Processed JP2 ZIP", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_jp2.zip") + # OCR Search Text - rendering + self.checkLink(renderingMap, "rendering", "OCR Search Text", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_hocr_searchtext.txt.gz") + # Single Page Original JP2 Tar - rendering + self.checkLink(renderingMap, "rendering", "Single Page Original JP2 Tar", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_orig_jp2.tar") + # DjVu - rendering + self.checkLink(renderingMap, "rendering", "DjVu", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford.djvu") + + self.assertTrue('seeAlso' in manifest, "Expected seeAlso in Manifest") + seeAlsoMap = self.convertListToHash(manifest['seeAlso']) + # Cloth Cover Detection Log - seeAlso + self.checkLink(seeAlsoMap, "seeAlso", "Cloth Cover Detection Log", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_cloth_detection.log") + # Dublin Core - seeAlso + self.checkLink(seeAlsoMap, "seeAlso", "Dublin Core", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_dc.xml") + # OCR Page Index - seeAlso + self.checkLink(seeAlsoMap, "seeAlso", "OCR Page Index", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_hocr_pageindex.json.gz") + # MARC - seeAlso + self.checkLink(seeAlsoMap, "seeAlso", "MARC", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_marc.xml") + # MARC Binary - seeAlso + self.checkLink(seeAlsoMap, "seeAlso", "MARC Binary", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_meta.mrc") + # MARC Source - seeAlso + self.checkLink(seeAlsoMap, "seeAlso", "MARC Source", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_metasource.xml") + # Page Numbers JSON - seeAlso + self.checkLink(seeAlsoMap, "seeAlso", "Page Numbers JSON", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_page_numbers.json") + # Scandata - seeAlso + self.checkLink(seeAlsoMap, "seeAlso", "Scandata", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_scandata.xml") + + def test_v3_video_links(self): + resp = self.test_app.get("/iiif/3/DuckandC1951/manifest.json?recache=true") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + self.assertTrue('rendering' in manifest, "Expected rendering in Manifest") + renderingMap = self.convertListToHash(manifest['rendering']) + seeAlsoMap = self.convertListToHash(manifest['seeAlso']) + self.assertTrue("Unknown" not in renderingMap and "Unknown" not in seeAlsoMap, "Found Unknown in rendering or seeAlso where it shouldn't be.") + + # SubRip - rendering + self.checkLink(renderingMap, "rendering", "SubRip", "https://archive.org/download/DuckandC1951/DuckandC1951.asr.srt") + # Web Video Text Tracks - rendering + self.checkLink(renderingMap, "rendering", "Web Video Text Tracks", "https://archive.org/download/DuckandC1951/DuckandC1951.asr.vtt") + # Archive BitTorrent - rendering + self.checkLink(renderingMap, "rendering", "Archive BitTorrent", "https://archive.org/download/DuckandC1951/DuckandC1951_archive.torrent") + # Intermediate ASR JSON - rendering + self.checkLink(renderingMap, "rendering", "Intermediate ASR JSON", "https://archive.org/download/DuckandC1951/DuckandC1951_intermediate_asr.json") + # Whisper ASR JSON + self.checkLink(renderingMap, "rendering", "Whisper ASR JSON", "https://archive.org/download/DuckandC1951/DuckandC1951_whisper_asr.json") + + # Storj Upload Log - seeAlso + self.checkLink(seeAlsoMap, "seeAlso", "Storj Upload Log", "https://archive.org/download/DuckandC1951/DuckandC1951.storj-store.log") + # Storj Upload Trigger - seeAlso + self.checkLink(seeAlsoMap, "seeAlso", "Storj Upload Trigger", "https://archive.org/download/DuckandC1951/DuckandC1951.storj-store.trigger") + + # Thumbnail - thumbnail + # 19 thumbs + self.assertEqual(len(manifest['thumbnail']), 19, f"Expected 19 thumbnails: {manifest['thumbnail']}") \ No newline at end of file diff --git a/tests/test_manifests.py b/tests/test_manifests.py index a899002..5bd5ee6 100644 --- a/tests/test_manifests.py +++ b/tests/test_manifests.py @@ -42,21 +42,13 @@ def test_v3_single_text_manifest(self): self.assertEqual(manifest['type'], "Manifest", f"Unexpected type. Expected Manifest go {manifest['type']}") self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}") - def test_v3_vermont_Life_Magazine(self): resp = self.test_app.get("/iiif/3/rbmsbk_ap2-v4_2001_V55N4/manifest.json") self.assertEqual(resp.status_code, 200) manifest = resp.json self.assertEqual(len(manifest['items']),116,f"Expected 116 canvas but got: {len(manifest['items'])}") - - def test_v3_single_video_manifest(self): - resp = self.test_app.get("/iiif/3/youtube-7w8F2Xi3vFw/manifest.json") - self.assertEqual(resp.status_code, 200) - manifest = resp.json - - self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}") - + #logic to cover etree mediatype github issue #123 def test_v3_etree_mediatype(self): resp = self.test_app.get("/iiif/3/gd72-04-14.aud.vernon.23662.sbeok.shnf/manifest.json") @@ -66,7 +58,6 @@ def test_v3_etree_mediatype(self): self.assertEqual(len(manifest['items']),36,f"Expected 36 canvases but got: {len(manifest['items'])}") self.assertEqual(manifest['items'][0]['items'][0]['items'][0]['body']['items'][0]['type'],"Sound",f"Expected 'Sound' but got: {manifest['items'][0]['items'][0]['items'][0]['body']['items'][0]['type']}") - def test_v3_64Kbps_MP3(self): resp = self.test_app.get("/iiif/3/TvQuran.com__Alafasi/manifest.json") self.assertEqual(resp.status_code, 200) @@ -74,7 +65,6 @@ def test_v3_64Kbps_MP3(self): self.assertEqual(len(manifest['items']),114,f"Expected 114 canvases but got: {len(manifest['items'])}") self.assertEqual("64Kbps MP3".lower() in resp.text.lower(), True, f"Expected the string '64Kbps MP3'") - def test_v3_128Kbps_MP3(self): resp = self.test_app.get("/iiif/3/alice_in_wonderland_librivox/manifest.json") self.assertEqual(resp.status_code, 200) @@ -82,14 +72,6 @@ def test_v3_128Kbps_MP3(self): self.assertEqual(len(manifest['items']),12,f"Expected 12 canvases but got: {len(manifest['items'])}") self.assertEqual("128kbps mp3".lower() in resp.text.lower(), True, f"Expected the string '128kbps mp3'") - def test_v3_h264_MPEG4_OGG_Theora(self): - resp = self.test_app.get("/iiif/3/taboca_201002_03/manifest.json") - self.assertEqual(resp.status_code, 200) - manifest = resp.json - self.assertEqual(len(manifest['items']),251,f"Expected 251 canvases but got: {len(manifest['items'])}") - self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'") - self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'") - def test_v3_aiff(self): resp = self.test_app.get("/iiif/3/PDextend_AIFF/manifest.json") self.assertEqual(resp.status_code, 200) diff --git a/tests/test_video.py b/tests/test_video.py new file mode 100644 index 0000000..7644f81 --- /dev/null +++ b/tests/test_video.py @@ -0,0 +1,68 @@ +import unittest +from flask.testing import FlaskClient +from iiify.app import app + +class TestVideo(unittest.TestCase): + + def setUp(self) -> None: + self.test_app = FlaskClient(app) + + def test_v3_single_video_manifest(self): + resp = self.test_app.get("/iiif/3/youtube-7w8F2Xi3vFw/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}") + + def test_v3_h264_MPEG4_OGG_Theora(self): + resp = self.test_app.get("/iiif/3/taboca_201002_03/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + self.assertEqual(len(manifest['items']),251,f"Expected 251 canvases but got: {len(manifest['items'])}") + self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'") + self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'") + + def test_vtt_autogenerated(self): + resp = self.test_app.get("/iiif/3/youtube-SvH4fbjOT0A/manifest.json?recache=true") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}") + self.assertTrue('annotations' in manifest['items'][0], "Expected annotations in manifest") + self.assertTrue(isinstance(manifest['items'][0]['annotations'], list), "Expected annotations to be a list") + self.assertEqual(len(manifest['items'][0]['annotations']), 1, "Expected 1 item in annotations") + annotationPage = manifest['items'][0]['annotations'][0] + self.assertEqual(annotationPage['type'], 'AnnotationPage', "Expected annotations to contain annotation page") + + self.assertTrue('items' in annotationPage and isinstance(annotationPage['items'],list) and len(annotationPage['items']) == 1, f"Expected annotation page to contain a list of items which contains 1 item. Found {annotationPage['items']}") + annotation = annotationPage['items'][0] + self.assertEqual(annotation['type'], 'Annotation', "Expected annotationPage to contain annotations") + self.assertEqual(annotation['motivation'], 'supplementing', "Expected annotation to have the supplementing annotation") + self.assertTrue('body' in annotation, "Expected annotation to have a body") + body = annotation['body'] + self.assertEqual(body['type'],'Text', "Expected body to have a type text") + self.assertEqual(body['format'],'text/vtt', "Expected body to have a type text") + self.assertEqual(body['label']['en'][0], "autogenerated", "Expected VTT file to have the label autogenerated") + self.assertFalse("language" in body, "We don't know the language for this item so there shouldn't be a language specified") + self.assertEqual(body['id'], "https://localhost/iiif/resource/youtube-SvH4fbjOT0A/34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt","Unexpected URL for the VTT file") + + def test_vtt_multilingual(self): + resp = self.test_app.get("/iiif/3/cruz-test/manifest.json?recache=true") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + canvas = manifest['items'][0] + self.assertTrue('annotations' in canvas, 'Expected annotations in Canvas') + self.assertEqual(len(canvas['annotations']), 1, 'Expected one AnnotationPage') + annotations = canvas['annotations'][0]['items'] + self.assertEqual(len(annotations), 104, 'Expected all 104 langues') + + # Check welsh + for item in annotations: + self.assertTrue('language' in item['body'], f"All vtt files should have a language: {item}") + if item['body']['language'] == 'cy': + self.assertEqual(item['body']['id'], 'https://localhost/iiif/resource/cruz-test/cruz-test.cy.vtt', 'Unexpected link for the Welsh vtt file') + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file