From fe3fad2ee3b3900fbc4ddfd4d184d2327b317d19 Mon Sep 17 00:00:00 2001 From: Aliaksandr Stsiapanay Date: Wed, 13 Mar 2024 20:04:01 +0300 Subject: [PATCH 1/3] feat: Adjust user limits #278 --- README.md | 1 - sample/aidial.config.json | 6 +-- .../com/epam/aidial/core/config/Model.java | 1 - .../epam/aidial/core/limiter/RateLimiter.java | 39 +++++++++---------- 4 files changed, 20 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index ec9c6c81d..e79f5f4c5 100644 --- a/README.md +++ b/README.md @@ -175,7 +175,6 @@ Dynamic settings include: | models..pricing | `unit`: the pricing units (currently `token` and `char_without_whitespace` are supported).
`prompt`: per-unit price for the completion request in USD.
`completion`: per-unit price for the completion response in USD. | | models..features | `rateEndpoint`: endpoint for rate requests *(exposed by core as `/rate`)*.
`tokenizeEndpoint`: endpoint for requests to the model tokenizer *(exposed by core as `/tokenize`)*.
`truncatePromptEndpoint`: endpoint for truncating prompt requests *(exposed by core as `/truncate_prompt`)*.
`systemPromptSupported`: does the model support system prompt (default is `true`).
`toolsSupported`: does the model support tools (default is `false`).
`seedSupported`: does the model support `seed` request parameter (default is `false`).
`urlAttachmentsSupported`: does the model/application support attachments with URLs (default is `false`) | | models..upstreams | `endpoint`: Model endpoint.
`key`: Your API key. | -| models..defaultUserLimit | Default user limit for the given model.
`minute`: Total tokens per minute limit sent to the model, managed via floating window approach for well-distributed rate limiting.
`day`: Total tokens per day limit sent to the model, managed via floating window approach for balanced rate limiting.| | keys | API Keys parameters:
``: Your API key. | | keys. | `project`: Project name assigned to this key.
`role`: A configured role name that defines key permissions. | | roles | API key roles `` with associated limits. Each API key has one role defined in the list of roles. Roles are associated with models, applications, assistants, and defined limits. | diff --git a/sample/aidial.config.json b/sample/aidial.config.json index 0bd2c3ac1..f25a05e2c 100644 --- a/sample/aidial.config.json +++ b/sample/aidial.config.json @@ -47,11 +47,7 @@ "key": "modelKey3" } ], - "userRoles": ["role1", "role2"], - "defaultUserLimit": { - "minute": "100000", - "day": "10000000" - } + "userRoles": ["role1", "role2"] }, "embedding-ada": { "type": "embedding", diff --git a/src/main/java/com/epam/aidial/core/config/Model.java b/src/main/java/com/epam/aidial/core/config/Model.java index ccb30e279..a95fa70f2 100644 --- a/src/main/java/com/epam/aidial/core/config/Model.java +++ b/src/main/java/com/epam/aidial/core/config/Model.java @@ -17,5 +17,4 @@ public class Model extends Deployment { private List upstreams = List.of(); // if it's set then the model name is overridden with that name in the request body to the model adapter private String overrideName; - private Limit defaultUserLimit; } \ No newline at end of file diff --git a/src/main/java/com/epam/aidial/core/limiter/RateLimiter.java b/src/main/java/com/epam/aidial/core/limiter/RateLimiter.java index e32f3fa6b..b017f9078 100644 --- a/src/main/java/com/epam/aidial/core/limiter/RateLimiter.java +++ b/src/main/java/com/epam/aidial/core/limiter/RateLimiter.java @@ -4,7 +4,6 @@ import com.epam.aidial.core.config.Deployment; import com.epam.aidial.core.config.Key; import com.epam.aidial.core.config.Limit; -import com.epam.aidial.core.config.Model; import com.epam.aidial.core.config.Role; import com.epam.aidial.core.data.LimitStats; import com.epam.aidial.core.data.ResourceType; @@ -30,6 +29,7 @@ public class RateLimiter { private static final Limit DEFAULT_LIMIT = new Limit(); + private static final String DEFAULT_USER_ROLE = "default"; private final Vertx vertx; @@ -181,36 +181,35 @@ private Limit getLimitByApiKey(ProxyContext context, String deploymentName) { private Limit getLimitByUser(ProxyContext context) { List userRoles = context.getUserRoles(); - Limit defaultUserLimit = getDefaultUserLimit(context.getDeployment()); + String deploymentName = context.getDeployment().getName(); + Map roles = context.getConfig().getRoles(); + Limit defaultUserLimit = getLimit(roles, DEFAULT_USER_ROLE, deploymentName, DEFAULT_LIMIT); if (userRoles.isEmpty()) { return defaultUserLimit; } - String deploymentName = context.getDeployment().getName(); - Map userRoleToDeploymentLimits = context.getConfig().getRoles(); - long minuteLimit = 0; - long dayLimit = 0; + Limit limit = null; for (String userRole : userRoles) { - Limit limit = Optional.ofNullable(userRoleToDeploymentLimits.get(userRole)) - .map(role -> role.getLimits().get(deploymentName)) - .orElse(defaultUserLimit); - minuteLimit = Math.max(minuteLimit, limit.getMinute()); - dayLimit = Math.max(dayLimit, limit.getDay()); + Limit candidate = getLimit(roles, userRole, deploymentName, null); + if (candidate != null) { + if (limit == null) { + limit = candidate; + } else { + limit.setMinute(Math.max(candidate.getMinute(), limit.getMinute())); + limit.setDay(Math.max(candidate.getDay(), limit.getDay())); + } + } } - Limit limit = new Limit(); - limit.setMinute(minuteLimit); - limit.setDay(dayLimit); - return limit; + return limit == null ? defaultUserLimit : limit; } private static String getPath(String deploymentName) { return String.format("%s/tokens", deploymentName); } - private static Limit getDefaultUserLimit(Deployment deployment) { - if (deployment instanceof Model model) { - return model.getDefaultUserLimit() == null ? DEFAULT_LIMIT : model.getDefaultUserLimit(); - } - return DEFAULT_LIMIT; + private static Limit getLimit(Map roles, String userRole, String deploymentName, Limit defaultLimit) { + return Optional.ofNullable(roles.get(userRole)) + .map(role -> role.getLimits().get(deploymentName)) + .orElse(defaultLimit); } } From 95c336a2ea68d06212865ecbd2e950baa760510c Mon Sep 17 00:00:00 2001 From: Aliaksandr Stsiapanay Date: Wed, 13 Mar 2024 20:16:27 +0300 Subject: [PATCH 2/3] fix: don't reuse limit from config --- .../java/com/epam/aidial/core/limiter/RateLimiter.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/epam/aidial/core/limiter/RateLimiter.java b/src/main/java/com/epam/aidial/core/limiter/RateLimiter.java index b017f9078..e31447968 100644 --- a/src/main/java/com/epam/aidial/core/limiter/RateLimiter.java +++ b/src/main/java/com/epam/aidial/core/limiter/RateLimiter.java @@ -192,11 +192,12 @@ private Limit getLimitByUser(ProxyContext context) { Limit candidate = getLimit(roles, userRole, deploymentName, null); if (candidate != null) { if (limit == null) { - limit = candidate; - } else { - limit.setMinute(Math.max(candidate.getMinute(), limit.getMinute())); - limit.setDay(Math.max(candidate.getDay(), limit.getDay())); + limit = new Limit(); + limit.setMinute(0); + limit.setDay(0); } + limit.setMinute(Math.max(candidate.getMinute(), limit.getMinute())); + limit.setDay(Math.max(candidate.getDay(), limit.getDay())); } } return limit == null ? defaultUserLimit : limit; From 8f25ada7026fd26360bfb32aa43bd0390603db0f Mon Sep 17 00:00:00 2001 From: Aliaksandr Stsiapanay Date: Thu, 14 Mar 2024 10:24:31 +0300 Subject: [PATCH 3/3] chore: code re-factoring --- .../java/com/epam/aidial/core/limiter/RateLimiter.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/epam/aidial/core/limiter/RateLimiter.java b/src/main/java/com/epam/aidial/core/limiter/RateLimiter.java index e31447968..901b47e88 100644 --- a/src/main/java/com/epam/aidial/core/limiter/RateLimiter.java +++ b/src/main/java/com/epam/aidial/core/limiter/RateLimiter.java @@ -193,11 +193,12 @@ private Limit getLimitByUser(ProxyContext context) { if (candidate != null) { if (limit == null) { limit = new Limit(); - limit.setMinute(0); - limit.setDay(0); + limit.setMinute(candidate.getMinute()); + limit.setDay(candidate.getDay()); + } else { + limit.setMinute(Math.max(candidate.getMinute(), limit.getMinute())); + limit.setDay(Math.max(candidate.getDay(), limit.getDay())); } - limit.setMinute(Math.max(candidate.getMinute(), limit.getMinute())); - limit.setDay(Math.max(candidate.getDay(), limit.getDay())); } } return limit == null ? defaultUserLimit : limit;