Skip to content

Commit

Permalink
Adding list of renderings and seeAlsos
Browse files Browse the repository at this point in the history
  • Loading branch information
glenrobson committed May 23, 2024
1 parent 6238b9b commit a1900d9
Show file tree
Hide file tree
Showing 4 changed files with 272 additions and 24 deletions.
4 changes: 4 additions & 0 deletions iiify/configs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import sys
import types
import configparser
import json

path = os.path.dirname(os.path.realpath(__file__))
approot = os.path.abspath(os.path.join(path, os.pardir))
Expand Down Expand Up @@ -75,3 +76,6 @@ def getdef(self, section, option, default_value):
"long": 432000, # 5 days
"longest": 2592000 # 30 days
}

with open('%s/links.json' % path, 'r') as file:
LINKS = json.load(file)
137 changes: 137 additions & 0 deletions iiify/configs/links.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
{
"Animated GIF": {
"field": "rendering",
"type": "Image",
"format": "image/gif"
},
"Text PDF": {
"field": "rendering",
"type": "Text",
"format": "application/pdf"
},
"Abbyy GZ": {
"field": "rendering",
"type": "Dataset",
"format": "application/gzip"
},
"Archive BitTorrent": {
"field": "rendering",
"type": "Dataset",
"format": "application/x-bittorrent"
},
"Grayscale PDF": {
"field": "rendering",
"type": "Text",
"format": "application/pdf"
},
"chOCR": {
"field": "rendering",
"type": "Text",
"format": "application/gzip"
},
"DjVuTXT": {
"field": "rendering",
"type": "Text",
"format": "text/plain"
},
"Djvu XML": {
"field": "rendering",
"type": "Dataset",
"format": "application/xml"
},
"hOCR": {
"field": "rendering",
"type": "Text",
"format": "text/html"
},
"Single Page Processed JP2 ZIP": {
"field": "rendering",
"type": "Image",
"format": "application/zip"
},
"OCR Search Text": {
"field": "rendering",
"type": "Text",
"format": "application/gzip"
},
"Single Page Original JP2 Tar": {
"field": "rendering",
"type": "Image",
"format": "application/x-tar"
},
"DjVu": {
"field": "rendering",
"type": "Image",
"format": "image/vnd.djvu"
},
"Cloth Cover Detection Log": {
"field": "seeAlso",
"type": "Text",
"format": "text/plain"
},
"Dublin Core": {
"field": "seeAlso",
"type": "Dataset",
"format": "application/xml"
},
"OCR Page Index": {
"field": "seeAlso",
"type": "Dataset",
"format": "application/json"
},
"MARC": {
"field": "seeAlso",
"type": "Dataset",
"format": "application/xml"
},
"MARC Binary": {
"field": "seeAlso",
"type": "Dataset",
"format": "application/marc"
},
"MARC Source": {
"field": "seeAlso",
"type": "Dataset",
"format": "application/xml"
},
"Page Numbers JSON": {
"field": "seeAlso",
"type": "Dataset",
"format": "application/json"
},
"Scandata": {
"field": "seeAlso",
"type": "Dataset",
"format": "application/xml"
},
"SubRip": {
"field": "rendering",
"type": "Text",
"format": "text/plain"
},
"Web Video Text Tracks": {
"field": "rendering",
"type": "Text",
"format": "text/vtt"
},
"Intermediate ASR JSON": {
"field": "rendering",
"type": "Text",
"format": "application/json"
},
"Whisper ASR JSON": {
"field": "rendering",
"type": "Text",
"format": "application/json"
},
"Storj Upload Log": {
"field": "seeAlso",
"type": "Text",
"format": "text/plain"
},
"Storj Upload Trigger": {
"field": "seeAlso",
"type": "Text",
"format": "text/plain"
}
}
53 changes: 29 additions & 24 deletions iiify/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,16 @@
import os
import requests
from iiif2 import iiif, web
from .configs import options, cors, approot, cache_root, media_root, apiurl
from .configs import options, cors, approot, cache_root, media_root, apiurl, LINKS
from iiif_prezi3 import Manifest, config, Annotation, AnnotationPage, Canvas, Manifest, ResourceItem, ServiceItem, Choice, Collection, ManifestRef, CollectionRef
from urllib.parse import urlparse, parse_qs, quote
import json
import math
import re
import mimetypes

IMG_CTX = 'http://iiif.io/api/image/2/context.json'
PRZ_CTX = 'http://iiif.io/api/presentation/2/context.json'
ARCHIVE = 'http://archive.org'
ARCHIVE = 'https://archive.org'
IMG_SRV = 'https://iiif.archive.org/image/iiif'
METADATA_FIELDS = ("title", "volume", "publisher", "subject", "date", "contributor", "creator")
bookdata = 'http://%s/BookReader/BookReaderJSON.php'
Expand Down Expand Up @@ -387,8 +386,6 @@ def addMetadata(item, identifier, metadata, collection=False):

def addSeeAlso(manifest, identifier, files):

mimetypes.add_type("application/gzip", ".gz")

manifest.seeAlso = [
{"id": f"{ARCHIVE}/metadata/{identifier}",
"type": "Metadata",
Expand All @@ -407,36 +404,47 @@ def addSeeAlso(manifest, identifier, files):
}

for file in files:
if file['format'] in SEEALSO_TYPES:
if file['format'] in LINKS and LINKS[file['format']]['field'] == 'seeAlso':
seeAlso = LINKS[file['format']]
manifest.seeAlso.append(
{"id": f"{ARCHIVE}/download/{identifier}/{file['name']}",
"type": SEEALSO_TYPES[file["format"]],
"type": seeAlso['type'],
"label": {"en": [file["format"]]},
"format": mimetypes.types_map.get(f".{file['name'].rsplit('.', 1)[1]}", "application/octet-stream")
"format": seeAlso['format']
})


def addRendering(manifest, identifier, files):
RENDERING_TYPES = {
"Item Tile": "Image",
"Text PDF": "PDF",
"Animated GIF": "Image",
"DjVuTXT": "Text",
"Generic Raw Book Zip": "Images",
"Single Page Processed JP2 Zip": "Images",
}

manifest.rendering = []

for file in files:
if file['format'] in RENDERING_TYPES:
if file['format'] in LINKS and LINKS[file['format']]['field'] == 'rendering':
rendering = LINKS[file['format']]
manifest.rendering.append(
{"id": f"{ARCHIVE}/download/{identifier}/{file['name']}",
"type": RENDERING_TYPES[file["format"]],
"type": rendering['type'],
"label": {"en": [file["format"]]},
"format": mimetypes.guess_type(file["name"])[0]
"format": rendering['format']
})

def addThumbnails(manifest, identifier, files):
thumbnails = []

for file in files:
if file['format'] == "Thumbnail":
mimetype = "image/jpeg"
if file['name'].endswith('.png'):
mimetype = "image/png"

thumbnails.append({
"id": f"{ARCHIVE}/download/{identifier}/{file['name']}",
"type": "Image",
"format": mimetype,
})

if thumbnails:
manifest.thumbnail = thumbnails

def create_manifest3(identifier, domain=None, page=None):
# Get item metadata
metadata = requests.get('%s/metadata/%s' % (ARCHIVE, identifier)).json()
Expand All @@ -453,6 +461,7 @@ def create_manifest3(identifier, domain=None, page=None):
addMetadata(manifest, identifier, metadata['metadata'])
addSeeAlso(manifest, identifier, metadata['files'])
addRendering(manifest, identifier, metadata['files'])
addThumbnails(manifest, identifier, metadata['files'])

if mediatype == 'texts':
# Get bookreader metadata (mostly for filenames and height / width of image)
Expand Down Expand Up @@ -743,10 +752,8 @@ def ia_resolver(identifier):

def cantaloupe_resolver(identifier):
"""Resolves an existing Image Service identifier to what it should be with the new Cantaloupe setup"""
print("called with identifier:", identifier)
leaf = None
if "$" in identifier:
print("$ in identifier")
identifier, leaf = identifier.split("$", 1)

metadata = requests.get('%s/metadata/%s' % (ARCHIVE, identifier)).json()
Expand All @@ -756,8 +763,6 @@ def cantaloupe_resolver(identifier):

mediatype = metadata['metadata']['mediatype'].lower()
files = metadata['files']
print("mediatype:", mediatype)
print("leaf:", leaf)
if mediatype == "image":
# single image file - find the filename

Expand Down
102 changes: 102 additions & 0 deletions tests/test_linking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import unittest
from flask.testing import FlaskClient
from iiify.app import app

class TestLinking(unittest.TestCase):

def setUp(self) -> None:
self.test_app = FlaskClient(app)

def convertListToHash(self, items):
map = {}
for item in items:
map[item['label']['en'][0]] = item
return map

def checkLink(self, map, field, name, value):
self.assertTrue(name in map, f"Expected to find {name} in {field}")

self.assertEqual(map[name]['id'], value, f"Expected {value} in {map[name]}")

def test_v3_image_links(self):
resp = self.test_app.get("/iiif/3/journalofexpedit00ford/manifest.json?recache=true")
self.assertEqual(resp.status_code, 200)
manifest = resp.json

self.assertTrue('rendering' in manifest, "Expected rendering in Manifest")
renderingMap = self.convertListToHash(manifest['rendering'])
# Animated GIF - rendering
self.checkLink(renderingMap, "rendering", "Animated GIF", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford.gif")
# Text PDF - rendering
self.checkLink(renderingMap, "rendering", "Text PDF", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford.pdf")
# Abbyy GZ - rendering
self.checkLink(renderingMap, "rendering", "Abbyy GZ", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_abbyy.gz")
# Archive BitTorrent - rendering
self.checkLink(renderingMap, "rendering", "Archive BitTorrent", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_archive.torrent")
# Grayscale PDF - rendering
self.checkLink(renderingMap, "rendering", "Grayscale PDF", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_bw.pdf")
# chOCR - rendering
self.checkLink(renderingMap, "rendering", "chOCR", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_chocr.html.gz")
# DjVuTXT - rendering
self.checkLink(renderingMap, "rendering", "DjVuTXT", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_djvu.txt")
# Djvu XML - rendering
self.checkLink(renderingMap, "rendering", "Djvu XML", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_djvu.xml")
# hOCR - rendering
self.checkLink(renderingMap, "rendering", "hOCR", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_hocr.html")
# Single Page Processed JP2 ZIP - rendering
self.checkLink(renderingMap, "rendering", "Single Page Processed JP2 ZIP", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_jp2.zip")
# OCR Search Text - rendering
self.checkLink(renderingMap, "rendering", "OCR Search Text", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_hocr_searchtext.txt.gz")
# Single Page Original JP2 Tar - rendering
self.checkLink(renderingMap, "rendering", "Single Page Original JP2 Tar", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_orig_jp2.tar")
# DjVu - rendering
self.checkLink(renderingMap, "rendering", "DjVu", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford.djvu")

self.assertTrue('seeAlso' in manifest, "Expected seeAlso in Manifest")
seeAlsoMap = self.convertListToHash(manifest['seeAlso'])
# Cloth Cover Detection Log - seeAlso
self.checkLink(seeAlsoMap, "seeAlso", "Cloth Cover Detection Log", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_cloth_detection.log")
# Dublin Core - seeAlso
self.checkLink(seeAlsoMap, "seeAlso", "Dublin Core", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_dc.xml")
# OCR Page Index - seeAlso
self.checkLink(seeAlsoMap, "seeAlso", "OCR Page Index", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_hocr_pageindex.json.gz")
# MARC - seeAlso
self.checkLink(seeAlsoMap, "seeAlso", "MARC", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_marc.xml")
# MARC Binary - seeAlso
self.checkLink(seeAlsoMap, "seeAlso", "MARC Binary", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_meta.mrc")
# MARC Source - seeAlso
self.checkLink(seeAlsoMap, "seeAlso", "MARC Source", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_metasource.xml")
# Page Numbers JSON - seeAlso
self.checkLink(seeAlsoMap, "seeAlso", "Page Numbers JSON", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_page_numbers.json")
# Scandata - seeAlso
self.checkLink(seeAlsoMap, "seeAlso", "Scandata", "https://archive.org/download/journalofexpedit00ford/journalofexpedit00ford_scandata.xml")

def test_v3_video_links(self):
resp = self.test_app.get("/iiif/3/DuckandC1951/manifest.json?recache=true")
self.assertEqual(resp.status_code, 200)
manifest = resp.json

self.assertTrue('rendering' in manifest, "Expected rendering in Manifest")
renderingMap = self.convertListToHash(manifest['rendering'])
seeAlsoMap = self.convertListToHash(manifest['seeAlso'])
self.assertTrue("Unknown" not in renderingMap and "Unknown" not in seeAlsoMap, "Found Unknown in rendering or seeAlso where it shouldn't be.")

# SubRip - rendering
self.checkLink(renderingMap, "rendering", "SubRip", "https://archive.org/download/DuckandC1951/DuckandC1951.asr.srt")
# Web Video Text Tracks - rendering
self.checkLink(renderingMap, "rendering", "Web Video Text Tracks", "https://archive.org/download/DuckandC1951/DuckandC1951.asr.vtt")
# Archive BitTorrent - rendering
self.checkLink(renderingMap, "rendering", "Archive BitTorrent", "https://archive.org/download/DuckandC1951/DuckandC1951_archive.torrent")
# Intermediate ASR JSON - rendering
self.checkLink(renderingMap, "rendering", "Intermediate ASR JSON", "https://archive.org/download/DuckandC1951/DuckandC1951_intermediate_asr.json")
# Whisper ASR JSON
self.checkLink(renderingMap, "rendering", "Whisper ASR JSON", "https://archive.org/download/DuckandC1951/DuckandC1951_whisper_asr.json")

# Storj Upload Log - seeAlso
self.checkLink(seeAlsoMap, "seeAlso", "Storj Upload Log", "https://archive.org/download/DuckandC1951/DuckandC1951.storj-store.log")
# Storj Upload Trigger - seeAlso
self.checkLink(seeAlsoMap, "seeAlso", "Storj Upload Trigger", "https://archive.org/download/DuckandC1951/DuckandC1951.storj-store.trigger")

# Thumbnail - thumbnail
# 19 thumbs
self.assertEqual(len(manifest['thumbnail']), 19, f"Expected 19 thumbnails: {manifest['thumbnail']}")

0 comments on commit a1900d9

Please sign in to comment.