ollama · lifeart · Dec 5, 2024 · Dec 6, 2024
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -38,12 +38,14 @@
     "@typescript-eslint/eslint-plugin": "^5.42.1",
     "@typescript-eslint/parser": "^5.42.1",
     "eslint": "^8.29.0",
-    "vitest": "^2.1.6",
     "prettier": "^3.2.4",
     "typescript": "^5.3.2",
-    "unbuild": "^2.0.0"
+    "unbuild": "^2.0.0",
+    "vitest": "^2.1.6",
+    "file-type": "^19.6.0"
   },
   "dependencies": {
+    "sharp": "^0.33.5",
     "whatwg-fetch": "^3.6.20"
   }
 }
diff --git a/src/index.ts b/src/index.ts
@@ -6,9 +6,13 @@ import { dirname, join, resolve } from 'path'
 import { createHash } from 'crypto'
 import { homedir } from 'os'
 import { Ollama as OllamaBrowser } from './browser.js'
+import * as _sharp from 'sharp'
 
 import type { CreateRequest, ProgressResponse } from './interfaces.js'
 
+const sharp = _sharp.default
+
+const IMAGE_EXTENSIONS_TO_CONVERT = ['webp', 'avif', 'gif', 'svg', 'tiff', 'tif']
 export class Ollama extends OllamaBrowser {
   async encodeImage(image: Uint8Array | Buffer | string): Promise<string> {
     if (typeof image !== 'string') {
@@ -19,6 +23,11 @@ export class Ollama extends OllamaBrowser {
       if (fs.existsSync(image)) {
         // this is a filepath, read the file and convert it to base64
         const fileBuffer = await promises.readFile(resolve(image))
+        const imageExtension = image.split('.').pop()?.toLowerCase()
+        if (imageExtension && IMAGE_EXTENSIONS_TO_CONVERT.includes(imageExtension)) {
+          const convertedImage = await sharp(fileBuffer).jpeg().toBuffer()
+          return Buffer.from(convertedImage).toString('base64')
+        }
         return Buffer.from(fileBuffer).toString('base64')
       }
     } catch {

diff --git a/test/e2e-image-formats.spec.ts b/test/e2e-image-formats.spec.ts
@@ -0,0 +1,50 @@
+import { describe, it, expect } from 'vitest'
+import { Ollama } from '../src/index'
+import path from 'path'
+import { fileURLToPath } from 'url'
+import { dirname } from 'path'
+
+
+async function describeImage(imageName: string) {
+    const __filename = fileURLToPath(import.meta.url)
+    const __dirname = dirname(__filename)
+    const instance = new Ollama()
+    const imagePath = path.resolve(__dirname, `./mocks/images/${imageName}`)
+    const response = await instance.chat({
+        model: 'llama3.2-vision',
+        messages: [{ role: 'user', content: 'what is this?', images: [imagePath] }],
+    })
+    return response.message.content;
+}
+
+const testConfig = {
+    timeout: 5 * 60 * 1000, // 5 minutes
+    retry: 3,
+}
+
+describe('Ollama | Nodejs | Vision image formats', () => {
+  it('support ".webp" image recognition', testConfig, async () => {
+    const result = await describeImage('WebP-Gradient.webp')
+    expect(result.toLowerCase()).toContain('gradient')
+  })
+
+  it('support ".gif" image recognition', testConfig, async () => {
+    const result = await describeImage('200w.gif')
+    expect(result.toLowerCase()).toContain('cat')
+  })
+
+  it('support ".avif" image recognition', testConfig, async () => {
+    const result = await describeImage('fox.profile0.8bpc.yuv420.avif')
+    expect(result.toLowerCase()).toContain('fox')
+  })
+
+  it('support ".tiff/.tif" image recognition', testConfig, async () => {
+    const result = await describeImage('julia.tif')
+    expect(result.toLowerCase()).toContain('julia')
+  })
+
+  it('support ".svg" image recognition', testConfig, async () => {
+    const result = await describeImage('house.svg')
+    expect(result.toLowerCase()).toContain('house')
+  })
+})
diff --git a/test/mocks/images/200w.gif b/test/mocks/images/200w.gif
diff --git a/test/mocks/images/WebP-Gradient.webp b/test/mocks/images/WebP-Gradient.webp
diff --git a/test/mocks/images/fox.profile0.8bpc.yuv420.avif b/test/mocks/images/fox.profile0.8bpc.yuv420.avif
diff --git a/test/mocks/images/house.svg b/test/mocks/images/house.svg
diff --git a/test/mocks/images/julia.tif b/test/mocks/images/julia.tif
diff --git a/test/server.spec.ts b/test/server.spec.ts
@@ -0,0 +1,26 @@
+import { describe, it, expect } from 'vitest'
+import { Ollama } from '../src/index'
+import path from 'path'
+import fs from 'fs'
+import { fileTypeFromBuffer } from 'file-type'
+import { fileURLToPath } from 'url'
+import { dirname } from 'path'
+
+const __filename = fileURLToPath(import.meta.url)
+const __dirname = dirname(__filename)
+
+describe('Ollama | Nodejs', () => {
+  it('support webp images convertation', async () => {
+    const instance = new Ollama()
+    const imagePath = path.resolve(__dirname, './mocks/images/WebP-Gradient.webp')
+    const initialBuffer = await fs.promises.readFile(imagePath)
+    const initialFileType = await fileTypeFromBuffer(initialBuffer)
+    expect(initialFileType?.ext).toBe('webp')
+    expect(initialFileType?.mime).toBe('image/webp')
+    const base64img = await instance.encodeImage(imagePath)
+    const buffer = Buffer.from(base64img, 'base64')
+    const convertedFileType = await fileTypeFromBuffer(buffer)
+    expect(convertedFileType?.ext).toBe('jpg')
+    expect(convertedFileType?.mime).toBe('image/jpeg')
+  })
+})