Skip to content

Commit

Permalink
feat: added token limits to the listing (#43) (#34)
Browse files Browse the repository at this point in the history
  • Loading branch information
adubovik authored Nov 21, 2023
1 parent 157164a commit 5d7c839
Show file tree
Hide file tree
Showing 9 changed files with 60 additions and 10 deletions.
5 changes: 2 additions & 3 deletions config/aidial.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
},
"assistant": {
"endpoint": "http://localhost:7001/openai/deployments/assistant/chat/completions",

"assistants": {
"ass": {
"search_assistant": {
"prompt": "Commands: sit_down, get_up, run_away",
"addons": ["search"]
}
Expand Down Expand Up @@ -56,7 +55,7 @@
"search": {},
"forecast": {},
"calculator": {},
"ass": {},
"search_assistant": {},
"app": {}
}
}
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/epam/aidial/core/config/Model.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@
@EqualsAndHashCode(callSuper = true)
public class Model extends Deployment {
private ModelType type;
private TokenLimits limits;
private List<Upstream> upstreams = List.of();
}
10 changes: 10 additions & 0 deletions src/main/java/com/epam/aidial/core/config/TokenLimits.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package com.epam.aidial.core.config;

import lombok.Data;

@Data
public class TokenLimits {
private Integer maxTotalTokens;
private Integer maxPromptTokens;
private Integer maxCompletionTokens;
}
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ private static Controller selectPost(Proxy proxy, ProxyContext context, String p
DeploymentPostController controller = new DeploymentPostController(proxy, context);
return () -> controller.handle(deploymentId, deploymentApi);
}

match = match(PATTERN_FILES, path);
if (match != null) {
String relativeFilePath = match.group(1);
Expand Down
13 changes: 13 additions & 0 deletions src/main/java/com/epam/aidial/core/controller/ModelController.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import com.epam.aidial.core.config.Config;
import com.epam.aidial.core.config.Model;
import com.epam.aidial.core.config.ModelType;
import com.epam.aidial.core.config.TokenLimits;
import com.epam.aidial.core.data.ListData;
import com.epam.aidial.core.data.ModelData;
import com.epam.aidial.core.data.TokenLimitsData;
import com.epam.aidial.core.util.HttpStatus;
import io.vertx.core.Future;
import lombok.RequiredArgsConstructor;
Expand Down Expand Up @@ -67,6 +69,17 @@ private static ModelData createModel(Model model) {
data.getCapabilities().setChatCompletion(true);
}

data.setLimits(createLimits(model.getLimits()));
return data;
}

private static TokenLimitsData createLimits(TokenLimits limits) {
TokenLimitsData data = new TokenLimitsData();
if (limits != null) {
data.setMaxPromptTokens(limits.getMaxPromptTokens());
data.setMaxCompletionTokens(limits.getMaxCompletionTokens());
data.setMaxTotalTokens(limits.getMaxTotalTokens());
}
return data;
}
}
1 change: 1 addition & 0 deletions src/main/java/com/epam/aidial/core/data/ModelData.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ public class ModelData extends DeploymentData {

private String lifecycleStatus = "generally-available";
private CapabilitiesData capabilities = new CapabilitiesData();
private TokenLimitsData limits = new TokenLimitsData();

{
setObject("model");
Expand Down
15 changes: 15 additions & 0 deletions src/main/java/com/epam/aidial/core/data/TokenLimitsData.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package com.epam.aidial.core.data;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.PropertyNamingStrategies;
import com.fasterxml.jackson.databind.annotation.JsonNaming;
import lombok.Data;

@Data
@JsonInclude(JsonInclude.Include.NON_NULL)
@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class)
public class TokenLimitsData {
private Integer maxTotalTokens;
private Integer maxPromptTokens;
private Integer maxCompletionTokens;
}
22 changes: 16 additions & 6 deletions src/main/resources/aidial.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,8 @@
},
"assistant": {
"endpoint": "http://localhost:7001/openai/deployments/assistant/chat/completions",

"assistants": {
"ass": {
"search_assistant": {
"prompt": "Commands: sit_down, get_up, run_away",
"addons": ["search"],
"displayName": "Search Assistant",
Expand All @@ -45,19 +44,29 @@
"displayName": "GPT 3.5",
"iconUrl": "http://localhost:7001/logo.png",
"description": "Some description of the model for testing",
"endpoint" : "http://localhost:7001/openai/deployments/gpt-35-turbo/chat/completions",
"endpoint" : "http://localhost:7001/v1/openai/deployments/gpt-35-turbo/chat/completions",
"upstreams": [
{"endpoint": "http://localhost:7001", "key": "modelKey1"},
{"endpoint": "http://localhost:7002", "key": "modelKey2"},
{"endpoint": "http://localhost:7003", "key": "modelKey3"}
]
],
"limits": {
"maxTotalTokens": 4096
}
},
"embedding-ada": {
"type": "embedding",
"endpoint" : "http://localhost:7001/openai/deployments/ada/embeddings",
"upstreams": [
{"endpoint": "http://localhost:7001", "key": "modelKey4"}
]
],
"limits": {
"maxTotalTokens": 8192
}
},
"exotic-model": {
"type": "chat",
"endpoint" : "http://localhost:7001/openai/deployments/exotic-model/chat/completions"
}
},
"keys": {
Expand All @@ -75,10 +84,11 @@
"limits": {
"chat-gpt-35-turbo": {"minute": "100000", "day": "10000000"},
"embedding-ada": {"minute": "100000", "day": "10000000"},
"exotic-model": {},
"search": {},
"forecast": {},
"calculator": {},
"ass": {},
"search_assistant": {},
"app": {}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/aidial.settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"reload": 60000
},
"identityProvider": {
"jwksUrl": null,
"jwksUrl": "http://fakeJwksUrl:8080",
"appName": "dial"
},
"storage": {
Expand Down

0 comments on commit 5d7c839

Please sign in to comment.