Skip to content

Commit

Permalink
Adding annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
glenrobson committed May 23, 2024
1 parent e3ef404 commit 43c8939
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 2 deletions.
7 changes: 6 additions & 1 deletion iiify/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from flask_caching import Cache
from iiif2 import iiif, web
from .resolver import ia_resolver, create_manifest, create_manifest3, getids, collection, \
purify_domain, cantaloupe_resolver, create_collection3, IsCollection
purify_domain, cantaloupe_resolver, create_collection3, IsCollection, create_annotations
from .configs import options, cors, approot, cache_root, media_root, \
cache_expr, version, image_server, cache_timeouts
from urllib.parse import quote
Expand Down Expand Up @@ -191,6 +191,11 @@ def manifest3(identifier):
raise excpt
# abort(404)

@app.route('/iiif/<version>/annotations/<identifier>/<fileName>/<canvas_no>.json')
@cache.cached(timeout=cache_timeouts["long"], forced_update=cache_bust)
def annnotations(version, identifier, fileName, canvas_no):
domain = purify_domain(request.args.get('domain', request.url_root))
return ldjsonify(create_annotations(version, identifier, fileName, canvas_no, domain=domain))

@app.route('/iiif/<identifier>/manifest.json')
@cache.cached(timeout=cache_timeouts["long"], forced_update=cache_bust)
Expand Down
58 changes: 57 additions & 1 deletion iiify/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import json
import math
import re
import xml.etree.ElementTree as ET

IMG_CTX = 'http://iiif.io/api/image/2/context.json'
PRZ_CTX = 'http://iiif.io/api/presentation/2/context.json'
Expand Down Expand Up @@ -406,9 +407,12 @@ def create_manifest3(identifier, domain=None, page=None):
# subprefix can be different from the identifier use the scandata filename to find the correct prefix
# if not present fall back to identifier
subprefix = identifier
djvuFile = ""
for fileMd in metadata['files']:
if fileMd['name'].endswith('_scandata.xml'):
subprefix = fileMd['name'].replace('_scandata.xml', '')
if fileMd['format'] == 'Djvu XML':
djvuFile = fileMd['name']

bookReaderURL = f"https://{metadata.get('server')}/BookReader/BookReaderJSIA.php?id={identifier}&itemPath={metadata.get('dir')}&server={metadata.get('server')}&format=jsonp&subPrefix={subprefix}"

Expand Down Expand Up @@ -469,7 +473,21 @@ def create_manifest3(identifier, domain=None, page=None):
except:
pass


# Add annotations if djvu file is present
if djvuFile:
count = 1
for canvas in manifest.items:
if 'annotations' in canvas:
annotations = canvas.annotations
else:
annotations = []

annotations.append({
"id": f"{domain}3/annotations/{identifier}/{djvuFile}/{count}.json",
"type": "AnnotationPage"
})
canvas.annotations = annotations
count += 1
elif mediatype == 'image':
(multiFile, format) = checkMultiItem(metadata)
print (f"Checking multiFile {multiFile} {format}")
Expand Down Expand Up @@ -613,6 +631,44 @@ def create_manifest3(identifier, domain=None, page=None):

return json.loads(manifest.jsonld())

def create_annotations(version, identifier, fileName, canvas_no, domain=None):
annotationPage = AnnotationPage(id=f"{domain}{version}/annotations/{identifier}/{fileName}/{canvas_no}.json")
annotationPage.items = []
index = int(canvas_no) - 1
url = f"{ARCHIVE}/download/{identifier}/{fileName}"
try:
# Fetch the remote XML file
response = requests.get(url)
response.raise_for_status() # Raise an error for bad status codes

# Parse the XML content
djfu = ET.fromstring(response.content)
page = djfu.findall(f".//OBJECT[{canvas_no}]")[0]
words = page.findall(".//WORD")
count = 1
for word in words:
annotationPage.items.append({
"id": f"https://iiif.archive.org/iiif/{identifier}/canvas/{index}/anno/{count}",
"type": "Annotation",
"motivation": "supplementing",
"body": {
"type": "TextualBody",
"format": "text/plain",
"value": word.text
},
"target": f"https://iiif.archive.org/iiif/{identifier}${index}/canvas#xywh={word.attrib['coords']}"
})
count += 1

except requests.exceptions.RequestException as e:
print(f"Error fetching the XML file: {e}")
raise ValueError("Failed to retrieve {url}")
except ET.ParseError as e:
print(f"Error parsing the XML content: {e}")
raise ValueError("Failed to process {url}")

return json.loads(annotationPage.jsonld())

def coerce_list(value):
if isinstance(value, list):
return ". ".join(value)
Expand Down
52 changes: 52 additions & 0 deletions tests/test_annotations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import unittest
from flask.testing import FlaskClient
from iiify.app import app

class TestAnnotations(unittest.TestCase):

def setUp(self) -> None:
self.test_app = FlaskClient(app)

def test_v3_manifest_has_annotations(self):
resp = self.test_app.get("/iiif/3/journalofexpedit00ford/manifest.json?recache=true")
self.assertEqual(resp.status_code, 200)
manifest = resp.json

count = 1
for canvas in manifest['items']:
self.assertTrue('annotations' in canvas, f"Expected annotations in canvas {canvas['id']}")
annotations_url = f"https://localhost/iiif/3/annotations/journalofexpedit00ford/journalofexpedit00ford_djvu.xml/{count}.json"
found=False
for anno in canvas['annotations']:
if anno['id'] == annotations_url:
found=True
self.assertTrue(found, f"Expected to find {annotations_url} in {canvas['annotations']}")
count += 1

def test_v3_annotations(self):
resp = self.test_app.get("/iiif/3/annotations/journalofexpedit00ford/journalofexpedit00ford_djvu.xml/1.json?recache=true")
self.assertEqual(resp.status_code, 200)
annotations = resp.json

self.assertEqual(annotations['id'], "https://localhost/iiif/3/annotations/journalofexpedit00ford/journalofexpedit00ford_djvu.xml/1.json", "Unexpected id")
self.assertEqual(annotations['@context'], "http://iiif.io/api/presentation/3/context.json", "Unexpected context")
self.assertEqual(annotations['type'], "AnnotationPage", "Unexpected type, expected AnnotationPage")
annotationList = annotations['items']
self.assertEqual(len(annotationList), 6, "Unexpected number of annotations")

ids = []
first=True
for anno in annotationList:
self.assertTrue(anno['id'] not in ids,"Duplicate ID: {anno['id']}")
ids.append(anno['id'])
self.assertEqual(anno['type'], "Annotation", "Expected type of Annotation")
self.assertEqual(anno['motivation'], "supplementing", "Expected motivation of supplementing")
self.assertTrue("body" in anno and "target" in anno, "Body or target missing from annotation {anno}")
self.assertEqual(anno['body']['type'], "TextualBody", "Expected body to be a TextualBody")
self.assertEqual(anno['body']['format'], "text/plain", "Expected format to be a text/plain")
self.assertEqual(anno['target'].split('#')[0], "https://iiif.archive.org/iiif/journalofexpedit00ford$0/canvas")
if first:
self.assertEqual(anno['target'].split('#')[1],"xywh=592,1860,1052,1742")
self.assertEqual(anno['body']['value'],"JOURNAL ")

first=False

0 comments on commit 43c8939

Please sign in to comment.