Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Adjust user limits #278 #279

Merged
merged 4 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@ Dynamic settings include:
| models.<model_name>.pricing | `unit`: the pricing units (currently `token` and `char_without_whitespace` are supported).<br />`prompt`: per-unit price for the completion request in USD.<br />`completion`: per-unit price for the completion response in USD. |
| models.<model_name>.features | `rateEndpoint`: endpoint for rate requests *(exposed by core as `<deployment name>/rate`)*.<br />`tokenizeEndpoint`: endpoint for requests to the model tokenizer *(exposed by core as `<deployment name>/tokenize`)*.<br />`truncatePromptEndpoint`: endpoint for truncating prompt requests *(exposed by core as `<deployment name>/truncate_prompt`)*.<br />`systemPromptSupported`: does the model support system prompt (default is `true`).<br />`toolsSupported`: does the model support tools (default is `false`).<br />`seedSupported`: does the model support `seed` request parameter (default is `false`).<br />`urlAttachmentsSupported`: does the model/application support attachments with URLs (default is `false`) |
| models.<model_name>.upstreams | `endpoint`: Model endpoint.<br />`key`: Your API key. |
| models.<model_name>.defaultUserLimit | Default user limit for the given model.<br /> `minute`: Total tokens per minute limit sent to the model, managed via floating window approach for well-distributed rate limiting.<br />`day`: Total tokens per day limit sent to the model, managed via floating window approach for balanced rate limiting.|
| keys | API Keys parameters:<br />`<core_key>`: Your API key. |
| keys.<core_key> | `project`: Project name assigned to this key.<br />`role`: A configured role name that defines key permissions. |
| roles | API key roles `<role_name>` with associated limits. Each API key has one role defined in the list of roles. Roles are associated with models, applications, assistants, and defined limits. |
Expand Down
6 changes: 1 addition & 5 deletions sample/aidial.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,7 @@
"key": "modelKey3"
}
],
"userRoles": ["role1", "role2"],
"defaultUserLimit": {
"minute": "100000",
"day": "10000000"
}
"userRoles": ["role1", "role2"]
},
"embedding-ada": {
"type": "embedding",
Expand Down
1 change: 0 additions & 1 deletion src/main/java/com/epam/aidial/core/config/Model.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,4 @@ public class Model extends Deployment {
private List<Upstream> upstreams = List.of();
// if it's set then the model name is overridden with that name in the request body to the model adapter
private String overrideName;
private Limit defaultUserLimit;
}
40 changes: 20 additions & 20 deletions src/main/java/com/epam/aidial/core/limiter/RateLimiter.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import com.epam.aidial.core.config.Deployment;
import com.epam.aidial.core.config.Key;
import com.epam.aidial.core.config.Limit;
import com.epam.aidial.core.config.Model;
import com.epam.aidial.core.config.Role;
import com.epam.aidial.core.data.LimitStats;
import com.epam.aidial.core.data.ResourceType;
Expand All @@ -30,6 +29,7 @@
public class RateLimiter {

private static final Limit DEFAULT_LIMIT = new Limit();
private static final String DEFAULT_USER_ROLE = "default";

private final Vertx vertx;

Expand Down Expand Up @@ -181,36 +181,36 @@ private Limit getLimitByApiKey(ProxyContext context, String deploymentName) {

private Limit getLimitByUser(ProxyContext context) {
List<String> userRoles = context.getUserRoles();
Limit defaultUserLimit = getDefaultUserLimit(context.getDeployment());
String deploymentName = context.getDeployment().getName();
Map<String, Role> roles = context.getConfig().getRoles();
Limit defaultUserLimit = getLimit(roles, DEFAULT_USER_ROLE, deploymentName, DEFAULT_LIMIT);
if (userRoles.isEmpty()) {
return defaultUserLimit;
}
String deploymentName = context.getDeployment().getName();
Map<String, Role> userRoleToDeploymentLimits = context.getConfig().getRoles();
long minuteLimit = 0;
long dayLimit = 0;
Limit limit = null;
for (String userRole : userRoles) {
Limit limit = Optional.ofNullable(userRoleToDeploymentLimits.get(userRole))
.map(role -> role.getLimits().get(deploymentName))
.orElse(defaultUserLimit);
minuteLimit = Math.max(minuteLimit, limit.getMinute());
dayLimit = Math.max(dayLimit, limit.getDay());
Limit candidate = getLimit(roles, userRole, deploymentName, null);
if (candidate != null) {
if (limit == null) {
limit = new Limit();
limit.setMinute(0);
limit.setDay(0);
}
limit.setMinute(Math.max(candidate.getMinute(), limit.getMinute()));
limit.setDay(Math.max(candidate.getDay(), limit.getDay()));
}
}
Limit limit = new Limit();
limit.setMinute(minuteLimit);
limit.setDay(dayLimit);
return limit;
return limit == null ? defaultUserLimit : limit;
}

private static String getPath(String deploymentName) {
return String.format("%s/tokens", deploymentName);
}

private static Limit getDefaultUserLimit(Deployment deployment) {
if (deployment instanceof Model model) {
return model.getDefaultUserLimit() == null ? DEFAULT_LIMIT : model.getDefaultUserLimit();
}
return DEFAULT_LIMIT;
private static Limit getLimit(Map<String, Role> roles, String userRole, String deploymentName, Limit defaultLimit) {
return Optional.ofNullable(roles.get(userRole))
.map(role -> role.getLimits().get(deploymentName))
.orElse(defaultLimit);
}

}
Loading