internetarchive · glenrobson · Feb 9, 2024 · Feb 9, 2024 · Feb 9, 2024 · Mar 8, 2024
diff --git a/iiify/app.py b/iiify/app.py
@@ -8,7 +8,7 @@
 from flask_caching import Cache
 from iiif2 import iiif, web
 from .resolver import ia_resolver, create_manifest, create_manifest3, getids, collection, \
-    purify_domain, cantaloupe_resolver, create_collection3, IsCollection
+    purify_domain, cantaloupe_resolver, create_collection3, IsCollection, create_annotations
 from .configs import options, cors, approot, cache_root, media_root, \
     cache_expr, version, image_server, cache_timeouts
 from urllib.parse import quote
@@ -191,6 +191,11 @@ def manifest3(identifier):
         raise excpt
         # abort(404)
 
+@app.route('/iiif/<version>/annotations/<identifier>/<fileName>/<canvas_no>.json')
+@cache.cached(timeout=cache_timeouts["long"], forced_update=cache_bust)
+def annnotations(version, identifier, fileName, canvas_no):
+    domain = purify_domain(request.args.get('domain', request.url_root))
+    return ldjsonify(create_annotations(version, identifier, fileName, canvas_no, domain=domain))
 
 @app.route('/iiif/<identifier>/manifest.json')
 @cache.cached(timeout=cache_timeouts["long"], forced_update=cache_bust)

diff --git a/iiify/resolver.py b/iiify/resolver.py
@@ -9,6 +9,7 @@
 import json
 import math 
 import re
+import xml.etree.ElementTree as ET
 
 IMG_CTX = 'http://iiif.io/api/image/2/context.json'
 PRZ_CTX = 'http://iiif.io/api/presentation/2/context.json'
@@ -37,6 +38,28 @@ def getids(q, limit=1000, cursor=''):
     }, allow_redirects=True, timeout=None)
     return r.json()
 
+def checkMultiItem(metadata):    
+    # Maybe add call to book stack to see if that works first
+
+    # Count the number of each original file
+    file_types = {}
+    for file in metadata['files']:
+        if file['source'] == "original":
+            if file['format'] not in file_types:
+                file_types[file['format']] = 0
+
+            file_types[file['format']] += 1
+    #print (file_types)        
+
+    # If there is multiple files of the same type then return the first format
+    # Will have to see if there are objects with multiple images and formats
+    for format in file_types:
+        if file_types[format] > 1 and format.lower() in valid_filetypes:        
+            return (True, format)
+
+    return (False, None)
+
+
 def to_mimetype(format):
     formats = {
         "VBR MP3": "audio/mp3",
@@ -363,6 +386,7 @@ def addMetadata(item, identifier, metadata, collection=False):
     item.metadata = manifest_metadata
 
 
+
 def create_manifest3(identifier, domain=None, page=None):
     # Get item metadata
     metadata = requests.get('%s/metadata/%s' % (ARCHIVE, identifier)).json()
@@ -383,9 +407,12 @@ def create_manifest3(identifier, domain=None, page=None):
         # subprefix can be different from the identifier use the scandata filename to find the correct prefix
         # if not present fall back to identifier
         subprefix = identifier
+        djvuFile = ""
         for fileMd in metadata['files']:
             if fileMd['name'].endswith('_scandata.xml'):
                 subprefix = fileMd['name'].replace('_scandata.xml', '')
+            if fileMd['format'] == 'Djvu XML':    
+                djvuFile = fileMd['name']
 
         bookReaderURL = f"https://{metadata.get('server')}/BookReader/BookReaderJSIA.php?id={identifier}&itemPath={metadata.get('dir')}&server={metadata.get('server')}&format=jsonp&subPrefix={subprefix}"
 
@@ -446,9 +473,47 @@ def create_manifest3(identifier, domain=None, page=None):
             except:
                 pass
 
-
+        # Add annotations if djvu file is present
+        if djvuFile:
+            count = 1
+            for canvas in manifest.items:
+                if 'annotations' in canvas:
+                    annotations = canvas.annotations
+                else:
+                    annotations = []
+
+                annotations.append({
+                    "id": f"{domain}3/annotations/{identifier}/{djvuFile}/{count}.json",
+                    "type": "AnnotationPage"
+                })         
+                canvas.annotations = annotations
+                count += 1
     elif mediatype == 'image':
-        singleImage(metadata, identifier, manifest, uri)
+        (multiFile, format) = checkMultiItem(metadata)
+        print (f"Checking multiFile {multiFile} {format}")
+        if multiFile:
+            # Create multi file manifest
+            pageCount = 0
+            for file in metadata['files']:
+                if file['source'] == "original" and file['format'] == format:
+                    imgId = f"{identifier}/{file['name']}".replace('/','%2f')
+                    imgURL = f"{IMG_SRV}/3/{imgId}"
+                    pageCount += 1
+
+                    try:
+                        manifest.make_canvas_from_iiif(url=imgURL,
+                                                    id=f"{URI_PRIFIX}/{identifier}${pageCount}/canvas",
+                                                    label=f"{file['name']}",
+                                                    anno_page_id=f"{uri}/annotationPage/1",
+                                                    anno_id=f"{uri}/annotation/1")
+                    except requests.exceptions.HTTPError as error:
+                        print (f'Failed to get {imgURL}')
+                        manifest.make_canvas(label=f"Failed to load {file['name']} from Image Server",
+                                             summary=f"Got {error}",
+                                            id=f"{URI_PRIFIX}/{identifier}/canvas",
+                                            height=1800, width=1200)
+        else:
+            singleImage(metadata, identifier, manifest, uri)
     elif mediatype == 'audio' or mediatype == 'etree':
         # sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
         originals = []
@@ -566,6 +631,44 @@ def create_manifest3(identifier, domain=None, page=None):
 
     return json.loads(manifest.jsonld())
 
+def create_annotations(version, identifier, fileName, canvas_no, domain=None):
+    annotationPage = AnnotationPage(id=f"{domain}{version}/annotations/{identifier}/{fileName}/{canvas_no}.json")
+    annotationPage.items = []
+    index = int(canvas_no) - 1
+    url = f"{ARCHIVE}/download/{identifier}/{fileName}"
+    try:
+        # Fetch the remote XML file
+        response = requests.get(url)
+        response.raise_for_status()  # Raise an error for bad status codes
+
+        # Parse the XML content
+        djfu = ET.fromstring(response.content)
+        page = djfu.findall(f".//OBJECT[{canvas_no}]")[0]
+        words = page.findall(".//WORD")
+        count = 1
+        for word in words:
+            annotationPage.items.append({
+                "id": f"https://iiif.archive.org/iiif/{identifier}/canvas/{index}/anno/{count}",
+                "type": "Annotation",
+                "motivation": "supplementing",
+                "body": {
+                    "type": "TextualBody",
+                    "format": "text/plain",
+                    "value": word.text
+                },
+                "target": f"https://iiif.archive.org/iiif/{identifier}${index}/canvas#xywh={word.attrib['coords']}"
+            })
+            count += 1
+
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching the XML file: {e}")
+        raise ValueError("Failed to retrieve {url}")
+    except ET.ParseError as e:
+        print(f"Error parsing the XML content: {e}")
+        raise ValueError("Failed to process {url}")
+
+    return json.loads(annotationPage.jsonld())
+
 def coerce_list(value):
     if isinstance(value, list):
         return ". ".join(value)

diff --git a/tests/test_annotations.py b/tests/test_annotations.py
@@ -0,0 +1,52 @@
+import unittest
+from flask.testing import FlaskClient
+from iiify.app import app
+
+class TestAnnotations(unittest.TestCase):
+
+    def setUp(self) -> None:
+        self.test_app = FlaskClient(app)
+
+    def test_v3_manifest_has_annotations(self):
+        resp = self.test_app.get("/iiif/3/journalofexpedit00ford/manifest.json?recache=true")
+        self.assertEqual(resp.status_code, 200)
+        manifest = resp.json    
+
+        count = 1
+        for canvas in manifest['items']:
+            self.assertTrue('annotations' in canvas, f"Expected annotations in canvas {canvas['id']}")
+            annotations_url = f"https://localhost/iiif/3/annotations/journalofexpedit00ford/journalofexpedit00ford_djvu.xml/{count}.json"
+            found=False
+            for anno in canvas['annotations']:
+                if anno['id'] == annotations_url:
+                    found=True
+            self.assertTrue(found, f"Expected to find {annotations_url} in {canvas['annotations']}")        
+            count += 1
+
+    def test_v3_annotations(self):
+        resp = self.test_app.get("/iiif/3/annotations/journalofexpedit00ford/journalofexpedit00ford_djvu.xml/1.json?recache=true")
+        self.assertEqual(resp.status_code, 200)
+        annotations = resp.json   
+
+        self.assertEqual(annotations['id'], "https://localhost/iiif/3/annotations/journalofexpedit00ford/journalofexpedit00ford_djvu.xml/1.json", "Unexpected id")
+        self.assertEqual(annotations['@context'], "http://iiif.io/api/presentation/3/context.json", "Unexpected context")
+        self.assertEqual(annotations['type'], "AnnotationPage", "Unexpected type, expected AnnotationPage")
+        annotationList = annotations['items']
+        self.assertEqual(len(annotationList), 6, "Unexpected number of annotations")
+
+        ids = []
+        first=True
+        for anno in annotationList:
+            self.assertTrue(anno['id'] not in ids,"Duplicate ID: {anno['id']}")
+            ids.append(anno['id'])
+            self.assertEqual(anno['type'], "Annotation", "Expected type of Annotation")
+            self.assertEqual(anno['motivation'], "supplementing", "Expected motivation of supplementing")
+            self.assertTrue("body" in anno and "target" in anno, "Body or target missing from annotation {anno}")
+            self.assertEqual(anno['body']['type'], "TextualBody", "Expected body to be a TextualBody")
+            self.assertEqual(anno['body']['format'], "text/plain", "Expected format to be a text/plain")
+            self.assertEqual(anno['target'].split('#')[0], "https://iiif.archive.org/iiif/journalofexpedit00ford$0/canvas")
+            if first:
+                self.assertEqual(anno['target'].split('#')[1],"xywh=592,1860,1052,1742")
+                self.assertEqual(anno['body']['value'],"JOURNAL ")
+
+            first=False
diff --git a/tests/test_manifests.py b/tests/test_manifests.py
@@ -144,6 +144,22 @@ def test_metadata_array(self):
         manifest = resp.json
         self.assertTrue(len(manifest['summary']['none']) > 1, f"Expected multiple summary values, but got {manifest['summary']['none']}")
 
+    def test_multi_file_image(self):
+        resp = self.test_app.get("/iiif/3/arkivkopia.se-lms-G70-48.3/manifest.json")
+        self.assertEqual(resp.status_code, 200)
+        manifest = resp.json
+        self.assertEqual(len(manifest['items']),3, f"Expected three canvases, but got {len(manifest['items'])}")
+
+        firstCanvasId = manifest['items'][0]['id']
+        for i in range(1, len(manifest['items'])):
+            self.assertNotEqual(manifest['items'][i]['id'], firstCanvasId, 'Canvas Ids need to be unique')
+
+    def test_multi_file(self):
+        resp = self.test_app.get("/iiif/3/st-anthony-relics-01/manifest.json")
+        self.assertEqual(resp.status_code, 200)
+        manifest = resp.json
+        self.assertEqual(len(manifest['items']),6, f"Expected five canvases, but got {len(manifest['items'])}")
+
 
 ''' to test:
 kaled_jalil (no derivatives)