-
-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathllama.go
150 lines (129 loc) · 3.55 KB
/
llama.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
// Copyright (c) Roman Atachiants and contributors. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for details.
package search
import (
"fmt"
"io"
"sync/atomic"
)
// Vectorizer represents a loaded LLM/Embedding model.
type Vectorizer struct {
handle uintptr
n_embd int32
pool *pool[*Context]
}
// NewVectorizer creates a new vectorizer model from the given model file.
func NewVectorizer(modelPath string, gpuLayers int) (*Vectorizer, error) {
handle := load_model(modelPath, uint32(gpuLayers))
if handle == 0 {
return nil, fmt.Errorf("failed to load model (%s)", modelPath)
}
model := &Vectorizer{
handle: handle,
n_embd: embed_size(handle),
}
// Initialize the context pool to reduce allocations
model.pool = newPool(16, func() *Context {
return model.Context(0)
})
return model, nil
}
// Close closes the model and releases any resources associated with it.
func (m *Vectorizer) Close() error {
free_model(m.handle)
m.handle = 0
m.pool.Close()
return nil
}
// Context creates a new context of the given size.
func (m *Vectorizer) Context(size int) *Context {
return &Context{
parent: m,
handle: load_context(m.handle, uint32(size), true),
}
}
// EmbedText embeds the given text using the model.
func (m *Vectorizer) EmbedText(text string) ([]float32, error) {
ctx := m.pool.Get()
defer m.pool.Put(ctx)
return ctx.EmbedText(text)
}
// --------------------------------- Context ---------------------------------
// Context represents a context for embedding text using the model.
type Context struct {
parent *Vectorizer
handle uintptr
tokens atomic.Uint64
}
// Close closes the context and releases any resources associated with it.
func (ctx *Context) Close() error {
free_context(ctx.handle)
ctx.handle = 0
return nil
}
// Tokens returns the number of tokens processed by the context.
func (ctx *Context) Tokens() uint {
return uint(ctx.tokens.Load())
}
// EmbedText embeds the given text using the model.
func (ctx *Context) EmbedText(text string) ([]float32, error) {
switch {
case ctx.handle == 0 || ctx.parent.handle == 0:
return nil, fmt.Errorf("context is not initialized")
case ctx.parent.n_embd <= 0:
return nil, fmt.Errorf("model does not support embedding")
}
out := make([]float32, ctx.parent.n_embd)
tok := uint32(0)
ret := embed_text(ctx.handle, text, out, &tok)
ctx.tokens.Add(uint64(tok))
switch ret {
case 0:
return out, nil
case 1:
return nil, fmt.Errorf("number of tokens (%d) exceeds batch size", tok)
case 2:
return nil, fmt.Errorf("last token in the prompt is not SEP")
case 3:
return nil, fmt.Errorf("failed to decode/encode text")
default:
return nil, fmt.Errorf("failed to embed text (code=%d)", ret)
}
}
// --------------------------------- Resource Pool ---------------------------------
// Pool is a generic pool of resources that can be reused.
type pool[T io.Closer] struct {
pool chan T
make func() T
}
// newPool creates a new pool of resources.
func newPool[T io.Closer](size int, new func() T) *pool[T] {
return &pool[T]{
pool: make(chan T, size),
make: new,
}
}
// Get returns a resource from the pool or creates a new one.
func (p *pool[T]) Get() T {
select {
case x := <-p.pool:
return x
default:
return p.make()
}
}
// Put returns the resource to the pool.
func (p *pool[T]) Put(x T) {
select {
case p.pool <- x:
default:
x.Close() // Close the resource if the pool is full
}
}
// Close closes the pool and releases any resources associated with it.
func (p *pool[T]) Close() {
close(p.pool)
for x := range p.pool {
x.Close()
}
}