Skip to content

Commit

Permalink
feat: Adjust user limits #278
Browse files Browse the repository at this point in the history
  • Loading branch information
astsiapanay committed Mar 13, 2024
1 parent ca0be9e commit fe3fad2
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 27 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@ Dynamic settings include:
| models.<model_name>.pricing | `unit`: the pricing units (currently `token` and `char_without_whitespace` are supported).<br />`prompt`: per-unit price for the completion request in USD.<br />`completion`: per-unit price for the completion response in USD. |
| models.<model_name>.features | `rateEndpoint`: endpoint for rate requests *(exposed by core as `<deployment name>/rate`)*.<br />`tokenizeEndpoint`: endpoint for requests to the model tokenizer *(exposed by core as `<deployment name>/tokenize`)*.<br />`truncatePromptEndpoint`: endpoint for truncating prompt requests *(exposed by core as `<deployment name>/truncate_prompt`)*.<br />`systemPromptSupported`: does the model support system prompt (default is `true`).<br />`toolsSupported`: does the model support tools (default is `false`).<br />`seedSupported`: does the model support `seed` request parameter (default is `false`).<br />`urlAttachmentsSupported`: does the model/application support attachments with URLs (default is `false`) |
| models.<model_name>.upstreams | `endpoint`: Model endpoint.<br />`key`: Your API key. |
| models.<model_name>.defaultUserLimit | Default user limit for the given model.<br /> `minute`: Total tokens per minute limit sent to the model, managed via floating window approach for well-distributed rate limiting.<br />`day`: Total tokens per day limit sent to the model, managed via floating window approach for balanced rate limiting.|
| keys | API Keys parameters:<br />`<core_key>`: Your API key. |
| keys.<core_key> | `project`: Project name assigned to this key.<br />`role`: A configured role name that defines key permissions. |
| roles | API key roles `<role_name>` with associated limits. Each API key has one role defined in the list of roles. Roles are associated with models, applications, assistants, and defined limits. |
Expand Down
6 changes: 1 addition & 5 deletions sample/aidial.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,7 @@
"key": "modelKey3"
}
],
"userRoles": ["role1", "role2"],
"defaultUserLimit": {
"minute": "100000",
"day": "10000000"
}
"userRoles": ["role1", "role2"]
},
"embedding-ada": {
"type": "embedding",
Expand Down
1 change: 0 additions & 1 deletion src/main/java/com/epam/aidial/core/config/Model.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,4 @@ public class Model extends Deployment {
private List<Upstream> upstreams = List.of();
// if it's set then the model name is overridden with that name in the request body to the model adapter
private String overrideName;
private Limit defaultUserLimit;
}
39 changes: 19 additions & 20 deletions src/main/java/com/epam/aidial/core/limiter/RateLimiter.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import com.epam.aidial.core.config.Deployment;
import com.epam.aidial.core.config.Key;
import com.epam.aidial.core.config.Limit;
import com.epam.aidial.core.config.Model;
import com.epam.aidial.core.config.Role;
import com.epam.aidial.core.data.LimitStats;
import com.epam.aidial.core.data.ResourceType;
Expand All @@ -30,6 +29,7 @@
public class RateLimiter {

private static final Limit DEFAULT_LIMIT = new Limit();
private static final String DEFAULT_USER_ROLE = "default";

private final Vertx vertx;

Expand Down Expand Up @@ -181,36 +181,35 @@ private Limit getLimitByApiKey(ProxyContext context, String deploymentName) {

private Limit getLimitByUser(ProxyContext context) {
List<String> userRoles = context.getUserRoles();
Limit defaultUserLimit = getDefaultUserLimit(context.getDeployment());
String deploymentName = context.getDeployment().getName();
Map<String, Role> roles = context.getConfig().getRoles();
Limit defaultUserLimit = getLimit(roles, DEFAULT_USER_ROLE, deploymentName, DEFAULT_LIMIT);
if (userRoles.isEmpty()) {
return defaultUserLimit;
}
String deploymentName = context.getDeployment().getName();
Map<String, Role> userRoleToDeploymentLimits = context.getConfig().getRoles();
long minuteLimit = 0;
long dayLimit = 0;
Limit limit = null;
for (String userRole : userRoles) {
Limit limit = Optional.ofNullable(userRoleToDeploymentLimits.get(userRole))
.map(role -> role.getLimits().get(deploymentName))
.orElse(defaultUserLimit);
minuteLimit = Math.max(minuteLimit, limit.getMinute());
dayLimit = Math.max(dayLimit, limit.getDay());
Limit candidate = getLimit(roles, userRole, deploymentName, null);
if (candidate != null) {
if (limit == null) {
limit = candidate;
} else {
limit.setMinute(Math.max(candidate.getMinute(), limit.getMinute()));
limit.setDay(Math.max(candidate.getDay(), limit.getDay()));
}
}
}
Limit limit = new Limit();
limit.setMinute(minuteLimit);
limit.setDay(dayLimit);
return limit;
return limit == null ? defaultUserLimit : limit;
}

private static String getPath(String deploymentName) {
return String.format("%s/tokens", deploymentName);
}

private static Limit getDefaultUserLimit(Deployment deployment) {
if (deployment instanceof Model model) {
return model.getDefaultUserLimit() == null ? DEFAULT_LIMIT : model.getDefaultUserLimit();
}
return DEFAULT_LIMIT;
private static Limit getLimit(Map<String, Role> roles, String userRole, String deploymentName, Limit defaultLimit) {
return Optional.ofNullable(roles.get(userRole))
.map(role -> role.getLimits().get(deploymentName))
.orElse(defaultLimit);
}

}

0 comments on commit fe3fad2

Please sign in to comment.