Skip to content

Commit

Permalink
Adding support for autogenerated vtt files
Browse files Browse the repository at this point in the history
  • Loading branch information
glenrobson committed Apr 26, 2024
1 parent 23e70c4 commit b45bd71
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 0 deletions.
35 changes: 35 additions & 0 deletions iiify/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ def create_manifest3(identifier, domain=None, page=None):
# sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
originals = []
derivatives = {}
vttfiles = {}
for f in metadata['files']:
if f['source'] == 'derivative':
if f['original'] in derivatives:
Expand All @@ -514,6 +515,14 @@ def create_manifest3(identifier, domain=None, page=None):
derivatives[f['original']] = {f['format']: f}
elif f['source'] == 'original':
originals.append(f)

if f['format'] == 'Web Video Text Tracks':
# Example: 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt
sourceFilename = f['name'].replace('.autogenerated.vtt', '')
# Example: cruz-test.en.vtt
sourceFilename = sourceFilename.replace('[a-z][a-z].vtt', '')

vttfiles[sourceFilename] = [f]

# create the canvases for each original
for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]:
Expand All @@ -522,6 +531,32 @@ def create_manifest3(identifier, domain=None, page=None):
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
c = Canvas(id=c_id, label=normalised_id, duration=float(file['length']), height=int(file['height']), width=int(file['width']))

# Add vtt if present
if vttfiles and normalised_id in vttfiles:
vttAPId = f"{URI_PRIFIX}/{identifier}/{slugged_id}/vtt"

vttNo = 1
for vttFile in vttfiles[normalised_id]:
vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}",
motivation="supplementing",
target=c.id,
anno_page_id=vttAPId,
body={"id": f"https://archive.org/download/{identifier}/{vttFile['name']}",
"type": "Text",
"format": "text/vtt",
})
# add label and language
if vttFile['name'].endswith("autogenerated.vtt"):
vtAnno.body.label = { 'en': ['autogenerated']}
else:
# Assume langauge
splitName = vttFile['name'].split(".")
lang = splitName[-2]
vtAnno.body.add_label(lang, language=lang)
vtAnno.body.language = lang

vttNo += 1

# create intermediary objects
ap = AnnotationPage(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/page")
anno = Annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation", motivation="painting", target=c.id)
Expand Down
25 changes: 25 additions & 0 deletions tests/test_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,30 @@ def test_v3_h264_MPEG4_OGG_Theora(self):
self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'")
self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'")

def test_vtt_autogenerated(self):
resp = self.test_app.get("/iiif/3/youtube-SvH4fbjOT0A/manifest.json?recache=true")
self.assertEqual(resp.status_code, 200)
manifest = resp.json

self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}")
self.assertTrue('annotations' in manifest['items'][0], "Expected annotations in manifest")
self.assertTrue(isinstance(manifest['items'][0]['annotations'], list), "Expected annotations to be a list")
self.assertEqual(len(manifest['items'][0]['annotations']), 1, "Expected 1 item in annotations")
annotationPage = manifest['items'][0]['annotations'][0]
self.assertEqual(annotationPage['type'], 'AnnotationPage', "Expected annotations to contain annotation page")

self.assertTrue('items' in annotationPage and isinstance(annotationPage['items'],list) and len(annotationPage['items']) == 1, f"Expected annotation page to contain a list of items which contains 1 item. Found {annotationPage['items']}")
annotation = annotationPage['items'][0]
self.assertEqual(annotation['type'], 'Annotation', "Expected annotationPage to contain annotations")
self.assertEqual(annotation['motivation'], 'supplementing', "Expected annotation to have the supplementing annotation")
self.assertTrue('body' in annotation, "Expected annotation to have a body")
body = annotation['body']
self.assertEqual(body['type'],'Text', "Expected body to have a type text")
self.assertEqual(body['format'],'text/vtt', "Expected body to have a type text")
self.assertEqual(body['label']['en'][0], "autogenerated", "Expected VTT file to have the label autogenerated")
self.assertFalse("language" in body, "We don't know the language for this item so there shouldn't be a language specified")
self.assertEqual(body['id'], "https://archive.org/download/youtube-SvH4fbjOT0A/34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt","Unexpected URL for the VTT file")


if __name__ == '__main__':
unittest.main()

0 comments on commit b45bd71

Please sign in to comment.