Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding VTT support #68

Merged
merged 8 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ Unit tests are in the `tests` folder and can be run with:
python -m unittest discover -s tests
```

Run single test:
```
python -m unittest tests.test_video.TestVideo.test_vtt_autogenerated
```

Retrieve large.jpg as 800px wide JPEG
* http://127.0.0.1:8080/iiif/large.jpg/full/800,/0/default.jpg

Expand Down
35 changes: 35 additions & 0 deletions iiify/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ def create_manifest3(identifier, domain=None, page=None):
# sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
originals = []
derivatives = {}
vttfiles = {}
for f in metadata['files']:
if f['source'] == 'derivative':
if f['original'] in derivatives:
Expand All @@ -514,6 +515,14 @@ def create_manifest3(identifier, domain=None, page=None):
derivatives[f['original']] = {f['format']: f}
elif f['source'] == 'original':
originals.append(f)

if f['format'] == 'Web Video Text Tracks':
# Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt
sourceFilename = re.sub('\.[a-zA-H-]*\.vtt', '', f['name'])
if sourceFilename not in vttfiles:
vttfiles[sourceFilename] = []

vttfiles[sourceFilename].append(f)

# create the canvases for each original
for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]:
Expand All @@ -522,6 +531,32 @@ def create_manifest3(identifier, domain=None, page=None):
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
c = Canvas(id=c_id, label=normalised_id, duration=float(file['length']), height=int(file['height']), width=int(file['width']))

# Add vtt if present
if vttfiles and normalised_id in vttfiles:
vttAPId = f"{URI_PRIFIX}/{identifier}/{slugged_id}/vtt"

vttNo = 1
for vttFile in vttfiles[normalised_id]:
vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}",
motivation="supplementing",
target=c.id,
anno_page_id=vttAPId,
body={"id": f"{domain}resource/{identifier}/{vttFile['name']}",
"type": "Text",
"format": "text/vtt",
})
# add label and language
if vttFile['name'].endswith("autogenerated.vtt"):
vtAnno.body.label = { 'en': ['autogenerated']}
else:
# Assume language
splitName = vttFile['name'].split(".")
lang = splitName[-2]
vtAnno.body.add_label(lang, language="none")
vtAnno.body.language = lang

vttNo += 1

# create intermediary objects
ap = AnnotationPage(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/page")
anno = Annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation", motivation="painting", target=c.id)
Expand Down
7 changes: 7 additions & 0 deletions nginx-vhost.conf
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,11 @@ server {
# Reverse proxy with the variables captured above
proxy_pass https://cantaloupe.prod.archive.org/iiif/$1/$2;
}

location /iiif/resource/ {
add_header 'Access-Control-Allow-Origin' '*' always;
add_header 'Access-Control-Allow-Methods' 'GET, HEAD, POST, PUT, PATCH, DELETE' always;
# https://archive.org/download/cruz-test/cruz-test.af.vtt
proxy_pass https://archive.org/download/;
}
}
20 changes: 1 addition & 19 deletions tests/test_manifests.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,13 @@ def test_v3_single_text_manifest(self):
self.assertEqual(manifest['type'], "Manifest", f"Unexpected type. Expected Manifest go {manifest['type']}")
self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}")


def test_v3_vermont_Life_Magazine(self):
resp = self.test_app.get("/iiif/3/rbmsbk_ap2-v4_2001_V55N4/manifest.json")
self.assertEqual(resp.status_code, 200)
manifest = resp.json

self.assertEqual(len(manifest['items']),116,f"Expected 116 canvas but got: {len(manifest['items'])}")

def test_v3_single_video_manifest(self):
resp = self.test_app.get("/iiif/3/youtube-7w8F2Xi3vFw/manifest.json")
self.assertEqual(resp.status_code, 200)
manifest = resp.json

self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}")


#logic to cover etree mediatype github issue #123
def test_v3_etree_mediatype(self):
resp = self.test_app.get("/iiif/3/gd72-04-14.aud.vernon.23662.sbeok.shnf/manifest.json")
Expand All @@ -66,30 +58,20 @@ def test_v3_etree_mediatype(self):
self.assertEqual(len(manifest['items']),36,f"Expected 36 canvases but got: {len(manifest['items'])}")
self.assertEqual(manifest['items'][0]['items'][0]['items'][0]['body']['items'][0]['type'],"Sound",f"Expected 'Sound' but got: {manifest['items'][0]['items'][0]['items'][0]['body']['items'][0]['type']}")


def test_v3_64Kbps_MP3(self):
resp = self.test_app.get("/iiif/3/TvQuran.com__Alafasi/manifest.json")
self.assertEqual(resp.status_code, 200)
manifest = resp.json
self.assertEqual(len(manifest['items']),114,f"Expected 114 canvases but got: {len(manifest['items'])}")
self.assertEqual("64Kbps MP3".lower() in resp.text.lower(), True, f"Expected the string '64Kbps MP3'")


def test_v3_128Kbps_MP3(self):
resp = self.test_app.get("/iiif/3/alice_in_wonderland_librivox/manifest.json")
self.assertEqual(resp.status_code, 200)
manifest = resp.json
self.assertEqual(len(manifest['items']),12,f"Expected 12 canvases but got: {len(manifest['items'])}")
self.assertEqual("128kbps mp3".lower() in resp.text.lower(), True, f"Expected the string '128kbps mp3'")

def test_v3_h264_MPEG4_OGG_Theora(self):
resp = self.test_app.get("/iiif/3/taboca_201002_03/manifest.json")
self.assertEqual(resp.status_code, 200)
manifest = resp.json
self.assertEqual(len(manifest['items']),251,f"Expected 251 canvases but got: {len(manifest['items'])}")
self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'")
self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'")

def test_v3_aiff(self):
resp = self.test_app.get("/iiif/3/PDextend_AIFF/manifest.json")
self.assertEqual(resp.status_code, 200)
Expand Down
68 changes: 68 additions & 0 deletions tests/test_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import unittest
from flask.testing import FlaskClient
from iiify.app import app

class TestVideo(unittest.TestCase):

def setUp(self) -> None:
self.test_app = FlaskClient(app)

def test_v3_single_video_manifest(self):
resp = self.test_app.get("/iiif/3/youtube-7w8F2Xi3vFw/manifest.json")
self.assertEqual(resp.status_code, 200)
manifest = resp.json

self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}")

def test_v3_h264_MPEG4_OGG_Theora(self):
resp = self.test_app.get("/iiif/3/taboca_201002_03/manifest.json")
self.assertEqual(resp.status_code, 200)
manifest = resp.json
self.assertEqual(len(manifest['items']),251,f"Expected 251 canvases but got: {len(manifest['items'])}")
self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'")
self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'")

def test_vtt_autogenerated(self):
resp = self.test_app.get("/iiif/3/youtube-SvH4fbjOT0A/manifest.json?recache=true")
self.assertEqual(resp.status_code, 200)
manifest = resp.json

self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}")
self.assertTrue('annotations' in manifest['items'][0], "Expected annotations in manifest")
self.assertTrue(isinstance(manifest['items'][0]['annotations'], list), "Expected annotations to be a list")
self.assertEqual(len(manifest['items'][0]['annotations']), 1, "Expected 1 item in annotations")
annotationPage = manifest['items'][0]['annotations'][0]
self.assertEqual(annotationPage['type'], 'AnnotationPage', "Expected annotations to contain annotation page")

self.assertTrue('items' in annotationPage and isinstance(annotationPage['items'],list) and len(annotationPage['items']) == 1, f"Expected annotation page to contain a list of items which contains 1 item. Found {annotationPage['items']}")
annotation = annotationPage['items'][0]
self.assertEqual(annotation['type'], 'Annotation', "Expected annotationPage to contain annotations")
self.assertEqual(annotation['motivation'], 'supplementing', "Expected annotation to have the supplementing annotation")
self.assertTrue('body' in annotation, "Expected annotation to have a body")
body = annotation['body']
self.assertEqual(body['type'],'Text', "Expected body to have a type text")
self.assertEqual(body['format'],'text/vtt', "Expected body to have a type text")
self.assertEqual(body['label']['en'][0], "autogenerated", "Expected VTT file to have the label autogenerated")
self.assertFalse("language" in body, "We don't know the language for this item so there shouldn't be a language specified")
self.assertEqual(body['id'], "https://localhost/iiif/resource/youtube-SvH4fbjOT0A/34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt","Unexpected URL for the VTT file")

def test_vtt_multilingual(self):
resp = self.test_app.get("/iiif/3/cruz-test/manifest.json?recache=true")
self.assertEqual(resp.status_code, 200)
manifest = resp.json

canvas = manifest['items'][0]
self.assertTrue('annotations' in canvas, 'Expected annotations in Canvas')
self.assertEqual(len(canvas['annotations']), 1, 'Expected one AnnotationPage')
annotations = canvas['annotations'][0]['items']
self.assertEqual(len(annotations), 104, 'Expected all 104 langues')

# Check welsh
for item in annotations:
self.assertTrue('language' in item['body'], f"All vtt files should have a language: {item}")
if item['body']['language'] == 'cy':
self.assertEqual(item['body']['id'], 'https://localhost/iiif/resource/cruz-test/cruz-test.cy.vtt', 'Unexpected link for the Welsh vtt file')


if __name__ == '__main__':
unittest.main()
Loading