Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: WebP images support #174

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
639 changes: 616 additions & 23 deletions package-lock.json

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,14 @@
"@typescript-eslint/eslint-plugin": "^5.42.1",
"@typescript-eslint/parser": "^5.42.1",
"eslint": "^8.29.0",
"vitest": "^2.1.6",
"prettier": "^3.2.4",
"typescript": "^5.3.2",
"unbuild": "^2.0.0"
"unbuild": "^2.0.0",
"vitest": "^2.1.6",
"file-type": "^19.6.0"
},
"dependencies": {
"sharp": "^0.33.5",
"whatwg-fetch": "^3.6.20"
}
}
9 changes: 9 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@ import { dirname, join, resolve } from 'path'
import { createHash } from 'crypto'
import { homedir } from 'os'
import { Ollama as OllamaBrowser } from './browser.js'
import * as _sharp from 'sharp'

import type { CreateRequest, ProgressResponse } from './interfaces.js'

const sharp = _sharp.default

const IMAGE_EXTENSIONS_TO_CONVERT = ['webp', 'avif', 'gif', 'svg', 'tiff', 'tif']
export class Ollama extends OllamaBrowser {
async encodeImage(image: Uint8Array | Buffer | string): Promise<string> {
if (typeof image !== 'string') {
Expand All @@ -19,6 +23,11 @@ export class Ollama extends OllamaBrowser {
if (fs.existsSync(image)) {
// this is a filepath, read the file and convert it to base64
const fileBuffer = await promises.readFile(resolve(image))
const imageExtension = image.split('.').pop()?.toLowerCase()
if (imageExtension && IMAGE_EXTENSIONS_TO_CONVERT.includes(imageExtension)) {
const convertedImage = await sharp(fileBuffer).jpeg().toBuffer()
return Buffer.from(convertedImage).toString('base64')
}
return Buffer.from(fileBuffer).toString('base64')
}
} catch {
Expand Down
50 changes: 50 additions & 0 deletions test/e2e-image-formats.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { describe, it, expect } from 'vitest'
import { Ollama } from '../src/index'
import path from 'path'
import { fileURLToPath } from 'url'
import { dirname } from 'path'


async function describeImage(imageName: string) {
const __filename = fileURLToPath(import.meta.url)
const __dirname = dirname(__filename)
const instance = new Ollama()
const imagePath = path.resolve(__dirname, `./mocks/images/${imageName}`)
const response = await instance.chat({
model: 'llama3.2-vision',
messages: [{ role: 'user', content: 'what is this?', images: [imagePath] }],
})
return response.message.content;
}

const testConfig = {
timeout: 5 * 60 * 1000, // 5 minutes
retry: 3,
}

describe('Ollama | Nodejs | Vision image formats', () => {
it('support ".webp" image recognition', testConfig, async () => {
const result = await describeImage('WebP-Gradient.webp')
expect(result.toLowerCase()).toContain('gradient')
})

it('support ".gif" image recognition', testConfig, async () => {
const result = await describeImage('200w.gif')
expect(result.toLowerCase()).toContain('cat')
})

it('support ".avif" image recognition', testConfig, async () => {
const result = await describeImage('fox.profile0.8bpc.yuv420.avif')
expect(result.toLowerCase()).toContain('fox')
})

it('support ".tiff/.tif" image recognition', testConfig, async () => {
const result = await describeImage('julia.tif')
expect(result.toLowerCase()).toContain('julia')
})

it('support ".svg" image recognition', testConfig, async () => {
const result = await describeImage('house.svg')
expect(result.toLowerCase()).toContain('house')
})
})
Binary file added test/mocks/images/200w.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test/mocks/images/WebP-Gradient.webp
Binary file not shown.
Binary file added test/mocks/images/fox.profile0.8bpc.yuv420.avif
Binary file not shown.
37 changes: 37 additions & 0 deletions test/mocks/images/house.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test/mocks/images/julia.tif
Binary file not shown.
26 changes: 26 additions & 0 deletions test/server.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { describe, it, expect } from 'vitest'
import { Ollama } from '../src/index'
import path from 'path'
import fs from 'fs'
import { fileTypeFromBuffer } from 'file-type'
import { fileURLToPath } from 'url'
import { dirname } from 'path'

const __filename = fileURLToPath(import.meta.url)
const __dirname = dirname(__filename)

describe('Ollama | Nodejs', () => {
it('support webp images convertation', async () => {
const instance = new Ollama()
const imagePath = path.resolve(__dirname, './mocks/images/WebP-Gradient.webp')
const initialBuffer = await fs.promises.readFile(imagePath)
const initialFileType = await fileTypeFromBuffer(initialBuffer)
expect(initialFileType?.ext).toBe('webp')
expect(initialFileType?.mime).toBe('image/webp')
const base64img = await instance.encodeImage(imagePath)
const buffer = Buffer.from(base64img, 'base64')
const convertedFileType = await fileTypeFromBuffer(buffer)
expect(convertedFileType?.ext).toBe('jpg')
expect(convertedFileType?.mime).toBe('image/jpeg')
})
})
Loading