Skip to content

Commit

Permalink
Merge branch 'spring-2024' into issue-48
Browse files Browse the repository at this point in the history
  • Loading branch information
glenrobson authored May 23, 2024
2 parents 62f4107 + f814775 commit 37587f4
Show file tree
Hide file tree
Showing 8 changed files with 428 additions and 26 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ Unit tests are in the `tests` folder and can be run with:
python -m unittest discover -s tests
```

Run single test:
```
python -m unittest tests.test_video.TestVideo.test_vtt_autogenerated
```

Retrieve large.jpg as 800px wide JPEG
* http://127.0.0.1:8080/iiif/large.jpg/full/800,/0/default.jpg

Expand Down
4 changes: 4 additions & 0 deletions iiify/configs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import sys
import types
import configparser
import json

path = os.path.dirname(os.path.realpath(__file__))
approot = os.path.abspath(os.path.join(path, os.pardir))
Expand Down Expand Up @@ -75,3 +76,6 @@ def getdef(self, section, option, default_value):
"long": 432000, # 5 days
"longest": 2592000 # 30 days
}

with open('%s/links.json' % path, 'r') as file:
LINKS = json.load(file)
137 changes: 137 additions & 0 deletions iiify/configs/links.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
{
"Animated GIF": {
"field": "rendering",
"type": "Image",
"format": "image/gif"
},
"Text PDF": {
"field": "rendering",
"type": "Text",
"format": "application/pdf"
},
"Abbyy GZ": {
"field": "rendering",
"type": "Dataset",
"format": "application/gzip"
},
"Archive BitTorrent": {
"field": "rendering",
"type": "Dataset",
"format": "application/x-bittorrent"
},
"Grayscale PDF": {
"field": "rendering",
"type": "Text",
"format": "application/pdf"
},
"chOCR": {
"field": "rendering",
"type": "Text",
"format": "application/gzip"
},
"DjVuTXT": {
"field": "rendering",
"type": "Text",
"format": "text/plain"
},
"Djvu XML": {
"field": "rendering",
"type": "Dataset",
"format": "application/xml"
},
"hOCR": {
"field": "rendering",
"type": "Text",
"format": "text/html"
},
"Single Page Processed JP2 ZIP": {
"field": "rendering",
"type": "Image",
"format": "application/zip"
},
"OCR Search Text": {
"field": "rendering",
"type": "Text",
"format": "application/gzip"
},
"Single Page Original JP2 Tar": {
"field": "rendering",
"type": "Image",
"format": "application/x-tar"
},
"DjVu": {
"field": "rendering",
"type": "Image",
"format": "image/vnd.djvu"
},
"Cloth Cover Detection Log": {
"field": "seeAlso",
"type": "Text",
"format": "text/plain"
},
"Dublin Core": {
"field": "seeAlso",
"type": "Dataset",
"format": "application/xml"
},
"OCR Page Index": {
"field": "seeAlso",
"type": "Dataset",
"format": "application/json"
},
"MARC": {
"field": "seeAlso",
"type": "Dataset",
"format": "application/xml"
},
"MARC Binary": {
"field": "seeAlso",
"type": "Dataset",
"format": "application/marc"
},
"MARC Source": {
"field": "seeAlso",
"type": "Dataset",
"format": "application/xml"
},
"Page Numbers JSON": {
"field": "seeAlso",
"type": "Dataset",
"format": "application/json"
},
"Scandata": {
"field": "seeAlso",
"type": "Dataset",
"format": "application/xml"
},
"SubRip": {
"field": "rendering",
"type": "Text",
"format": "text/plain"
},
"Web Video Text Tracks": {
"field": "rendering",
"type": "Text",
"format": "text/vtt"
},
"Intermediate ASR JSON": {
"field": "rendering",
"type": "Text",
"format": "application/json"
},
"Whisper ASR JSON": {
"field": "rendering",
"type": "Text",
"format": "application/json"
},
"Storj Upload Log": {
"field": "seeAlso",
"type": "Text",
"format": "text/plain"
},
"Storj Upload Trigger": {
"field": "seeAlso",
"type": "Text",
"format": "text/plain"
}
}
111 changes: 104 additions & 7 deletions iiify/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import os
import requests
from iiif2 import iiif, web
from .configs import options, cors, approot, cache_root, media_root, apiurl
from .configs import options, cors, approot, cache_root, media_root, apiurl, LINKS
from iiif_prezi3 import Manifest, config, Annotation, AnnotationPage,AnnotationPageRef, Canvas, Manifest, ResourceItem, ServiceItem, Choice, Collection, ManifestRef, CollectionRef

from urllib.parse import urlparse, parse_qs, quote
import json
import math
Expand All @@ -13,7 +14,7 @@

IMG_CTX = 'http://iiif.io/api/image/2/context.json'
PRZ_CTX = 'http://iiif.io/api/presentation/2/context.json'
ARCHIVE = 'http://archive.org'
ARCHIVE = 'https://archive.org'
IMG_SRV = 'https://iiif.archive.org/image/iiif'
METADATA_FIELDS = ("title", "volume", "publisher", "subject", "date", "contributor", "creator")
bookdata = 'http://%s/BookReader/BookReaderJSON.php'
Expand Down Expand Up @@ -385,7 +386,66 @@ def addMetadata(item, identifier, metadata, collection=False):

item.metadata = manifest_metadata

def addSeeAlso(manifest, identifier, files):

manifest.seeAlso = [
{"id": f"{ARCHIVE}/metadata/{identifier}",
"type": "Metadata",
"label": {"en": ["Item Metadata"]},
"format": "application/json"}
]

# Type format from IA Metadata -> Type description in IIIF
SEEALSO_TYPES = {
"Abbyy GZ": "OCR Data",
"Abbyy XML": "OCR Data",
"Djvu XML": "OCR Data",
"Scandata": "OCR Data",
"Archive BitTorrent": "Torrent",
"Metadata": "Metadata",
}

for file in files:
if file['format'] in LINKS and LINKS[file['format']]['field'] == 'seeAlso':
seeAlso = LINKS[file['format']]
manifest.seeAlso.append(
{"id": f"{ARCHIVE}/download/{identifier}/{file['name']}",
"type": seeAlso['type'],
"label": {"en": [file["format"]]},
"format": seeAlso['format']
})


def addRendering(manifest, identifier, files):
manifest.rendering = []

for file in files:
if file['format'] in LINKS and LINKS[file['format']]['field'] == 'rendering':
rendering = LINKS[file['format']]
manifest.rendering.append(
{"id": f"{ARCHIVE}/download/{identifier}/{file['name']}",
"type": rendering['type'],
"label": {"en": [file["format"]]},
"format": rendering['format']
})

def addThumbnails(manifest, identifier, files):
thumbnails = []

for file in files:
if file['format'] == "Thumbnail":
mimetype = "image/jpeg"
if file['name'].endswith('.png'):
mimetype = "image/png"

thumbnails.append({
"id": f"{ARCHIVE}/download/{identifier}/{file['name']}",
"type": "Image",
"format": mimetype,
})

if thumbnails:
manifest.thumbnail = thumbnails

def create_manifest3(identifier, domain=None, page=None):
# Get item metadata
Expand All @@ -401,6 +461,9 @@ def create_manifest3(identifier, domain=None, page=None):
manifest = Manifest(id=f"{uri}/manifest.json", label=metadata["metadata"]["title"])

addMetadata(manifest, identifier, metadata['metadata'])
addSeeAlso(manifest, identifier, metadata['files'])
addRendering(manifest, identifier, metadata['files'])
addThumbnails(manifest, identifier, metadata['files'])

if mediatype == 'texts':
# Get bookreader metadata (mostly for filenames and height / width of image)
Expand Down Expand Up @@ -570,6 +633,7 @@ def create_manifest3(identifier, domain=None, page=None):
# sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
originals = []
derivatives = {}
vttfiles = {}
for f in metadata['files']:
if f['source'] == 'derivative':
if f['original'] in derivatives:
Expand All @@ -578,6 +642,14 @@ def create_manifest3(identifier, domain=None, page=None):
derivatives[f['original']] = {f['format']: f}
elif f['source'] == 'original':
originals.append(f)

if f['format'] == 'Web Video Text Tracks':
# Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt
sourceFilename = re.sub('\.[a-zA-H-]*\.vtt', '', f['name'])
if sourceFilename not in vttfiles:
vttfiles[sourceFilename] = []

vttfiles[sourceFilename].append(f)

# create the canvases for each original
for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]:
Expand All @@ -586,6 +658,32 @@ def create_manifest3(identifier, domain=None, page=None):
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
c = Canvas(id=c_id, label=normalised_id, duration=float(file['length']), height=int(file['height']), width=int(file['width']))

# Add vtt if present
if vttfiles and normalised_id in vttfiles:
vttAPId = f"{URI_PRIFIX}/{identifier}/{slugged_id}/vtt"

vttNo = 1
for vttFile in vttfiles[normalised_id]:
vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}",
motivation="supplementing",
target=c.id,
anno_page_id=vttAPId,
body={"id": f"{domain}resource/{identifier}/{vttFile['name']}",
"type": "Text",
"format": "text/vtt",
})
# add label and language
if vttFile['name'].endswith("autogenerated.vtt"):
vtAnno.body.label = { 'en': ['autogenerated']}
else:
# Assume language
splitName = vttFile['name'].split(".")
lang = splitName[-2]
vtAnno.body.add_label(lang, language="none")
vtAnno.body.language = lang

vttNo += 1

# create intermediary objects
ap = AnnotationPage(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/page")
anno = Annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation", motivation="painting", target=c.id)
Expand Down Expand Up @@ -745,7 +843,6 @@ def ia_resolver(identifier):

def cantaloupe_resolver(identifier):
"""Resolves an existing Image Service identifier to what it should be with the new Cantaloupe setup"""

leaf = None
if "$" in identifier:
identifier, leaf = identifier.split("$", 1)
Expand All @@ -757,7 +854,6 @@ def cantaloupe_resolver(identifier):

mediatype = metadata['metadata']['mediatype'].lower()
files = metadata['files']

if mediatype == "image":
# single image file - find the filename

Expand Down Expand Up @@ -808,11 +904,12 @@ def cantaloupe_resolver(identifier):

#filename = next(f for f in files if f['source'].lower() == 'derivative' \
# and f['name'].endswith('_jp2.zip'))['name']
print("end of logic - filename:", filename)
if filename:
dirpath = filename[:-4]
filepath = f"{fileIdentifier}_{leaf.zfill(4)}{extension}"
return f"{identifier}%2f{filename}%2f{dirpath}%2f{filepath}"

# print (f'images not found for {identifier}')
# for f in files:
# print (f"source: {f['source'].lower()} name: {f['name']} and {f['source'].lower() == 'derivative'} {f['name'].endswith('_jp2.zip')}")
# print (f'images not found for {identifier}')
# for f in files:
# print (f"source: {f['source'].lower()} name: {f['name']} and {f['source'].lower() == 'derivative'} {f['name'].endswith('_jp2.zip')}")
7 changes: 7 additions & 0 deletions nginx-vhost.conf
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,11 @@ server {
# Reverse proxy with the variables captured above
proxy_pass https://cantaloupe.prod.archive.org/iiif/$1/$2;
}

location /iiif/resource/ {
add_header 'Access-Control-Allow-Origin' '*' always;
add_header 'Access-Control-Allow-Methods' 'GET, HEAD, POST, PUT, PATCH, DELETE' always;
# https://archive.org/download/cruz-test/cruz-test.af.vtt
proxy_pass https://archive.org/download/;
}
}
Loading

0 comments on commit 37587f4

Please sign in to comment.