epam · astsiapanay · Mar 14, 2024 · Mar 13, 2024 · Mar 13, 2024 · Mar 14, 2024
@@ -175,7 +175,6 @@ Dynamic settings include:
 | models.<model_name>.pricing           | `unit`: the pricing units (currently `token` and `char_without_whitespace` are supported).<br />`prompt`: per-unit price for the completion request in USD.<br />`completion`: per-unit price for the completion response in USD. |
 | models.<model_name>.features          | `rateEndpoint`: endpoint for rate requests *(exposed by core as `<deployment name>/rate`)*.<br />`tokenizeEndpoint`: endpoint for requests to the model tokenizer *(exposed by core as `<deployment name>/tokenize`)*.<br />`truncatePromptEndpoint`: endpoint for truncating prompt requests *(exposed by core as `<deployment name>/truncate_prompt`)*.<br />`systemPromptSupported`: does the model support system prompt (default is `true`).<br />`toolsSupported`: does the model support tools (default is `false`).<br />`seedSupported`: does the model support `seed` request parameter (default is `false`).<br />`urlAttachmentsSupported`: does the model/application support attachments with URLs (default is `false`) |
 | models.<model_name>.upstreams         | `endpoint`: Model endpoint.<br />`key`: Your API key. |
-| models.<model_name>.defaultUserLimit  | Default user limit for the given model.<br /> `minute`: Total tokens per minute limit sent to the model, managed via floating window approach for well-distributed rate limiting.<br />`day`: Total tokens per day limit sent to the model, managed via floating window approach for balanced rate limiting.|
 | keys                                  | API Keys parameters:<br />`<core_key>`: Your API key. |
 | keys.<core_key>                       | `project`: Project name assigned to this key.<br />`role`: A configured role name that defines key permissions. |
 | roles                                 | API key roles `<role_name>` with associated limits. Each API key has one role defined in the list of roles. Roles are associated with models, applications, assistants, and defined limits. |

@@ -47,11 +47,7 @@
                     "key": "modelKey3"
                 }
             ],
-            "userRoles": ["role1", "role2"],
-            "defaultUserLimit": {
-                "minute": "100000",
-                "day": "10000000"
-            }
+            "userRoles": ["role1", "role2"]
         },
         "embedding-ada": {
             "type": "embedding",

@@ -17,5 +17,4 @@ public class Model extends Deployment {
     private List<Upstream> upstreams = List.of();
     // if it's set then the model name is overridden with that name in the request body to the model adapter
     private String overrideName;
-    private Limit defaultUserLimit;
 }
@@ -4,7 +4,6 @@
 import com.epam.aidial.core.config.Deployment;
 import com.epam.aidial.core.config.Key;
 import com.epam.aidial.core.config.Limit;
-import com.epam.aidial.core.config.Model;
 import com.epam.aidial.core.config.Role;
 import com.epam.aidial.core.data.LimitStats;
 import com.epam.aidial.core.data.ResourceType;
@@ -30,6 +29,7 @@
 public class RateLimiter {
 
     private static final Limit DEFAULT_LIMIT = new Limit();
+    private static final String DEFAULT_USER_ROLE = "default";
 
     private final Vertx vertx;
 
@@ -181,36 +181,36 @@ private Limit getLimitByApiKey(ProxyContext context, String deploymentName) {
 
     private Limit getLimitByUser(ProxyContext context) {
         List<String> userRoles = context.getUserRoles();
-        Limit defaultUserLimit = getDefaultUserLimit(context.getDeployment());
+        String deploymentName = context.getDeployment().getName();
+        Map<String, Role> roles = context.getConfig().getRoles();
+        Limit defaultUserLimit = getLimit(roles, DEFAULT_USER_ROLE, deploymentName, DEFAULT_LIMIT);
         if (userRoles.isEmpty()) {
             return defaultUserLimit;
         }
-        String deploymentName = context.getDeployment().getName();
-        Map<String, Role> userRoleToDeploymentLimits = context.getConfig().getRoles();
-        long minuteLimit = 0;
-        long dayLimit = 0;
+        Limit limit = null;
         for (String userRole : userRoles) {
-            Limit limit = Optional.ofNullable(userRoleToDeploymentLimits.get(userRole))
-                    .map(role -> role.getLimits().get(deploymentName))
-                    .orElse(defaultUserLimit);
-            minuteLimit = Math.max(minuteLimit, limit.getMinute());
-            dayLimit = Math.max(dayLimit, limit.getDay());
+            Limit candidate = getLimit(roles, userRole, deploymentName, null);
+            if (candidate != null) {
+                if (limit == null) {
+                    limit = new Limit();
+                    limit.setMinute(0);
+                    limit.setDay(0);
+                }
+                limit.setMinute(Math.max(candidate.getMinute(), limit.getMinute()));
+                limit.setDay(Math.max(candidate.getDay(), limit.getDay()));
+            }
         }
-        Limit limit = new Limit();
-        limit.setMinute(minuteLimit);
-        limit.setDay(dayLimit);
-        return limit;
+        return limit == null ? defaultUserLimit : limit;
     }
 
     private static String getPath(String deploymentName) {
         return String.format("%s/tokens", deploymentName);
     }
 
-    private static Limit getDefaultUserLimit(Deployment deployment) {
-        if (deployment instanceof Model model) {
-            return model.getDefaultUserLimit() == null ? DEFAULT_LIMIT : model.getDefaultUserLimit();
-        }
-        return DEFAULT_LIMIT;
+    private static Limit getLimit(Map<String, Role> roles, String userRole, String deploymentName, Limit defaultLimit) {
+        return Optional.ofNullable(roles.get(userRole))
+                .map(role -> role.getLimits().get(deploymentName))
+                .orElse(defaultLimit);
     }
 
 }