From 670d9108bff3ea10271e7865a116866fb9279736 Mon Sep 17 00:00:00 2001 From: Aliaksandr Stsiapanay Date: Wed, 17 Jul 2024 14:58:20 +0300 Subject: [PATCH] feat: Add support of interceptors for models, applications #379 (#392) --- README.md | 54 +++-- sample/aidial.config.json | 29 ++- .../com/epam/aidial/core/ProxyContext.java | 25 ++ .../epam/aidial/core/config/ApiKeyData.java | 14 +- .../com/epam/aidial/core/config/Config.java | 1 + .../epam/aidial/core/config/Deployment.java | 4 + .../aidial/core/config/FileConfigStore.java | 6 + .../epam/aidial/core/config/Interceptor.java | 4 + .../controller/DeploymentPostController.java | 44 +++- .../controller/InterceptorController.java | 218 ++++++++++++++++++ .../aidial/core/CustomApplicationApiTest.java | 6 +- 11 files changed, 363 insertions(+), 42 deletions(-) create mode 100644 src/main/java/com/epam/aidial/core/config/Interceptor.java create mode 100644 src/main/java/com/epam/aidial/core/controller/InterceptorController.java diff --git a/README.md b/README.md index f91a4b7bb..8606f0fc5 100644 --- a/README.md +++ b/README.md @@ -201,31 +201,35 @@ specified via "config.reload" static setting. Refer to [example](sample/aidial.c Dynamic settings can include the following parameters: -| Parameter | Description | -|---------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| routes | Path(s) for specific upstream routing or to respond with a configured body. | -| addons | A list of deployed AI DIAL Addons and their parameters:
``: Unique addon name. | -| addons. | `endpoint`: AI DIAL Addon API for chat completions.
`iconUrl`: Icon path for the AI DIAL addon on UI.
`description`: Brief AI DIAL addon description.
`displayName`: AI DIAL addon name on UI.
`inputAttachmentTypes`: A list of allowed MIME types for the input attachments.
`maxInputAttachments`: Maximum number of input attachments (default is zero when `inputAttachmentTypes` is unset, otherwise, infinity)
`forwardAuthToken`: If flag is set to `true` forward Http header with authorization token to chat completion endpoint of the addon.
`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples. | -| assistant | A list of deployed AI DIAL Assistants and their parameters:
``: Unique assistan name. | -| assistant.endpoint | Assistant main endpoint | -| assistant.assistants. | `iconUrl`: Icon path for the AI DIAL assistant on UI.
`description`: Brief AI DIAL assistant description.
`displayName`: AI DIAL assistant name on UI.
`inputAttachmentTypes`: A list of allowed MIME types for the input attachments.
`maxInputAttachments`: Maximum number of input attachments (default is zero when `inputAttachmentTypes` is unset, otherwise, infinity)
`forwardAuthToken`: If flag is set to `true` forward Http header with authorization token to chat completion endpoint of the assistant.
`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples. | -| assistant.assistants..defaults | default parameters are applied if a request doesn't contain them in OpenAI `chat/completions` API call | -| applications | A list of deployed AI DIAL Applications and their parameters:
``: Unique application name. | -| applications. | `endpoint`: AI DIAL Application API for chat completions.
`iconUrl`: Icon path for the AI DIAL Application on UI.
`description`: Brief AI DIAL Application description.
`displayName`: AI DIAL Application name on UI.
`inputAttachmentTypes`: A list of allowed MIME types for the input attachments.
`maxInputAttachments`: Maximum number of input attachments (default is zero when `inputAttachmentTypes` is unset, otherwise, infinity)
`forwardAuthToken`: If flag is set to `true` forward Http header with authorization token to chat completion endpoint of the application.
`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples. | -| applications..defaults | default parameters are applied if a request doesn't contain them in OpenAI `chat/completions` API call | -| models | A list of deployed models and their parameters:
``: Unique model name. | -| models. | `type`: Model type—`chat` or `embedding`.
`iconUrl`: Icon path for the model on UI.
`description`: Brief model description.
`displayName`: Model name on UI.
`displayVersion`: Model version on UI.
`endpoint`: Model API for chat completions or embeddings.
`tokenizerModel`: Identifies the specific model whose tokenization algorithm exactly matches that of the referenced model. This is typically the name of the earliest-released model in a series of models sharing an identical tokenization algorithm (e.g. `gpt-3.5-turbo-0301`, `gpt-4-0314`, or `gpt-4-1106-vision-preview`). This parameter is essential for DIAL clients that reimplement tokenization algorithms on their side, instead of utilizing the `tokenizeEndpoint` provided by the model.
`features`: Model features.
`limits`: Model token limits.
`pricing`: Model pricing.
`upstreams`: Used for load-balancing—request is sent to model endpoint containing X-UPSTREAM-ENDPOINT and X-UPSTREAM-KEY headers.
`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples.| -| models..limits | `maxPromptTokens`: maximum number of tokens in a completion request.
`maxCompletionTokens`: maximum number of tokens in a completion response.
`maxTotalTokens`: maximum number of tokens in completion request and response combined.
Typically either `maxTotalTokens` is specified or `maxPromptTokens` and `maxCompletionTokens`. | -| models..pricing | `unit`: the pricing units (currently `token` and `char_without_whitespace` are supported).
`prompt`: per-unit price for the completion request in USD.
`completion`: per-unit price for the completion response in USD. | -| models..features | `rateEndpoint`: endpoint for rate requests *(exposed by core as `/rate`)*.
`tokenizeEndpoint`: endpoint for requests to the model tokenizer *(exposed by core as `/tokenize`)*.
`truncatePromptEndpoint`: endpoint for truncating prompt requests *(exposed by core as `/truncate_prompt`)*.
`systemPromptSupported`: does the model support system prompt (default is `true`).
`toolsSupported`: does the model support tools (default is `false`).
`seedSupported`: does the model support `seed` request parameter (default is `false`).
`urlAttachmentsSupported`: does the model/application support attachments with URLs (default is `false`).
`folderAttachmentsSupported`: does the model/application support folder attachments (default is `false`) | -| models..upstreams | `endpoint`: Model endpoint.
`key`: Your API key. | -| models..defaults | default parameters are applied if a request doesn't contain them in OpenAI `chat/completions` API call | -| keys | API Keys parameters:
``: Your API key. Refer to [API Keys Roles and Limits](https://github.com/epam/ai-dial/blob/main/docs/tutorials/roles-management.md) to learn more. | -| keys. | `project`: Project name is assigned to this key.
`role`: A configured role name to be defined in the section `roles`. | -| roles | API key or user roles. Each role may have limits to be associated with applications, models, assistants or addons. Refer to [API Keys Roles and Limits](https://github.com/epam/ai-dial/blob/main/docs/tutorials/roles-management.md) to learn more. | -| roles. | `limits`: Limits for models, applications, or assistants. | -| roles..limits | `minute`: Total tokens per minute limit sent to the model, managed via floating window approach for well-distributed rate limiting. If it's not set the default value is unlimited
`day`: Total tokens per day limit sent to the model, managed via floating window approach for balanced rate limiting. If it's not set the default value is unlimited. | -| retriableErrorCodes | List of retriable error codes for handling outages at LLM providers. | +| Parameter | Description | +|-----------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| routes | Path(s) for specific upstream routing or to respond with a configured body. | +| addons | A list of deployed AI DIAL Addons and their parameters:
``: Unique addon name. | +| addons. | `endpoint`: AI DIAL Addon API for chat completions.
`iconUrl`: Icon path for the AI DIAL addon on UI.
`description`: Brief AI DIAL addon description.
`displayName`: AI DIAL addon name on UI.
`inputAttachmentTypes`: A list of allowed MIME types for the input attachments.
`maxInputAttachments`: Maximum number of input attachments (default is zero when `inputAttachmentTypes` is unset, otherwise, infinity)
`forwardAuthToken`: If flag is set to `true` forward Http header with authorization token to chat completion endpoint of the addon.
`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples. | +| interceptors | A list of deployed AI DIAL Interceptors and their parameters:
``: Unique interceptor name. | +| interceptors. | `endpoint`: AI DIAL Interceptor API for chat completions.
`iconUrl`: Icon path for the AI DIAL Interceptor on UI.
`description`: Brief AI DIAL interceptor description.
`displayName`: AI DIAL interceptor name on UI.
`forwardAuthToken`: If flag is set to `true` forward Http header with authorization token to chat completion endpoint of the interceptor. | +| assistant | A list of deployed AI DIAL Assistants and their parameters:
``: Unique assistan name. | +| assistant.endpoint | Assistant main endpoint | +| assistant.assistants. | `iconUrl`: Icon path for the AI DIAL assistant on UI.
`description`: Brief AI DIAL assistant description.
`displayName`: AI DIAL assistant name on UI.
`inputAttachmentTypes`: A list of allowed MIME types for the input attachments.
`maxInputAttachments`: Maximum number of input attachments (default is zero when `inputAttachmentTypes` is unset, otherwise, infinity)
`forwardAuthToken`: If flag is set to `true` forward Http header with authorization token to chat completion endpoint of the assistant.
`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples. | +| assistant.assistants..defaults | default parameters are applied if a request doesn't contain them in OpenAI `chat/completions` API call | +| applications | A list of deployed AI DIAL Applications and their parameters:
``: Unique application name. | +| applications. | `endpoint`: AI DIAL Application API for chat completions.
`iconUrl`: Icon path for the AI DIAL Application on UI.
`description`: Brief AI DIAL Application description.
`displayName`: AI DIAL Application name on UI.
`inputAttachmentTypes`: A list of allowed MIME types for the input attachments.
`maxInputAttachments`: Maximum number of input attachments (default is zero when `inputAttachmentTypes` is unset, otherwise, infinity)
`forwardAuthToken`: If flag is set to `true` forward Http header with authorization token to chat completion endpoint of the application.
`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples. | +| applications..defaults | default parameters are applied if a request doesn't contain them in OpenAI `chat/completions` API call | +| models..interceptors | list of interceptors to be triggered for the given application | +| models | A list of deployed models and their parameters:
``: Unique model name. | +| models. | `type`: Model type—`chat` or `embedding`.
`iconUrl`: Icon path for the model on UI.
`description`: Brief model description.
`displayName`: Model name on UI.
`displayVersion`: Model version on UI.
`endpoint`: Model API for chat completions or embeddings.
`tokenizerModel`: Identifies the specific model whose tokenization algorithm exactly matches that of the referenced model. This is typically the name of the earliest-released model in a series of models sharing an identical tokenization algorithm (e.g. `gpt-3.5-turbo-0301`, `gpt-4-0314`, or `gpt-4-1106-vision-preview`). This parameter is essential for DIAL clients that reimplement tokenization algorithms on their side, instead of utilizing the `tokenizeEndpoint` provided by the model.
`features`: Model features.
`limits`: Model token limits.
`pricing`: Model pricing.
`upstreams`: Used for load-balancing—request is sent to model endpoint containing X-UPSTREAM-ENDPOINT and X-UPSTREAM-KEY headers.
`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples. | +| models..limits | `maxPromptTokens`: maximum number of tokens in a completion request.
`maxCompletionTokens`: maximum number of tokens in a completion response.
`maxTotalTokens`: maximum number of tokens in completion request and response combined.
Typically either `maxTotalTokens` is specified or `maxPromptTokens` and `maxCompletionTokens`. | +| models..pricing | `unit`: the pricing units (currently `token` and `char_without_whitespace` are supported).
`prompt`: per-unit price for the completion request in USD.
`completion`: per-unit price for the completion response in USD. | +| models..features | `rateEndpoint`: endpoint for rate requests *(exposed by core as `/rate`)*.
`tokenizeEndpoint`: endpoint for requests to the model tokenizer *(exposed by core as `/tokenize`)*.
`truncatePromptEndpoint`: endpoint for truncating prompt requests *(exposed by core as `/truncate_prompt`)*.
`systemPromptSupported`: does the model support system prompt (default is `true`).
`toolsSupported`: does the model support tools (default is `false`).
`seedSupported`: does the model support `seed` request parameter (default is `false`).
`urlAttachmentsSupported`: does the model/application support attachments with URLs (default is `false`).
`folderAttachmentsSupported`: does the model/application support folder attachments (default is `false`) | +| models..upstreams | `endpoint`: Model endpoint.
`key`: Your API key. | +| models..defaults | default parameters are applied if a request doesn't contain them in OpenAI `chat/completions` API call | +| models..interceptors | list of interceptors to be triggered for the given model | +| keys | API Keys parameters:
``: Your API key. Refer to [API Keys Roles and Limits](https://github.com/epam/ai-dial/blob/main/docs/tutorials/roles-management.md) to learn more. | +| keys. | `project`: Project name is assigned to this key.
`role`: A configured role name to be defined in the section `roles`. | +| roles | API key or user roles. Each role may have limits to be associated with applications, models, assistants or addons. Refer to [API Keys Roles and Limits](https://github.com/epam/ai-dial/blob/main/docs/tutorials/roles-management.md) to learn more. | +| roles. | `limits`: Limits for models, applications, or assistants. | +| roles..limits | `minute`: Total tokens per minute limit sent to the model, managed via floating window approach for well-distributed rate limiting. If it's not set the default value is unlimited
`day`: Total tokens per day limit sent to the model, managed via floating window approach for balanced rate limiting. If it's not set the default value is unlimited. | +| retriableErrorCodes | List of retriable error codes for handling outages at LLM providers. | ## License diff --git a/sample/aidial.config.json b/sample/aidial.config.json index 3e074217f..418b2ba44 100644 --- a/sample/aidial.config.json +++ b/sample/aidial.config.json @@ -1,12 +1,22 @@ { "routes": {}, + "interceptors": { + "interceptor1": { + "endpoint": "http://localhost:4088/api/v1/interceptor/handle" + }, + "interceptor2": { + "endpoint": "http://localhost:4089/api/v1/interceptor/handle" + }, + "interceptor3": { + "endpoint": "http://localhost:4090/api/v1/interceptor/handle" + } + }, "addons": { "search": { "endpoint": "http://localhost:7010/search" }, "forecast": { "endpoint": "http://localhost:7010/forecast", - "token": "token", "displayName": "Forecast", "iconUrl": "https://host/forecast.svg", "description": "Addon that provides forecast", @@ -72,7 +82,6 @@ "applications": { "app": { "endpoint": "http://localhost:7001/openai/deployments/10k/chat/completions", - "token": "token", "displayName": "Forecast", "iconUrl": "https://host/app.svg", "description": "Addon that provides forecast", @@ -98,7 +107,8 @@ "paramBool": true, "paramInt": 123, "paramFloat": 0.25 - } + }, + "interceptors": ["interceptor1", "interceptor2", "interceptor3"] } }, "models": { @@ -112,8 +122,8 @@ }, "pricing": { "unit": "token", - "prompt": 0.56, - "completion": 0.67 + "prompt": "0.56", + "completion": "0.67" }, "overrideName": "/some[!exotic?]/model/name", "displayName": "GPT-3.5", @@ -153,7 +163,8 @@ "paramBool": true, "paramInt": 123, "paramFloat": 0.25 - } + }, + "interceptors": ["interceptor1"] }, "embedding-ada": { "type": "embedding", @@ -163,9 +174,9 @@ "endpoint": "http://localhost:7001", "key": "modelKey4" } - ] - }, - "userRoles": ["role3"] + ], + "userRoles": ["role3"] + } }, "keys": { "proxyKey1": { diff --git a/src/main/java/com/epam/aidial/core/ProxyContext.java b/src/main/java/com/epam/aidial/core/ProxyContext.java index 6dd0051e2..380bb45fe 100644 --- a/src/main/java/com/epam/aidial/core/ProxyContext.java +++ b/src/main/java/com/epam/aidial/core/ProxyContext.java @@ -67,6 +67,11 @@ public class ProxyContext { private long responseBodyTimestamp; private ExtractedClaims extractedClaims; private ApiKeyData proxyApiKeyData; + // deployment triggers interceptors + private String initialDeployment; + private String initialDeploymentApi; + // List of interceptors copied from the deployment config + private List interceptors; public ProxyContext(Config config, HttpServerRequest request, ApiKeyData apiKeyData, ExtractedClaims extractedClaims, String traceId, String spanId) { this.config = config; @@ -138,4 +143,24 @@ public List getExecutionPath() { public boolean getBooleanRequestQueryParam(String name) { return Boolean.parseBoolean(request.getParam(name, "false")); } + + public List getInterceptors() { + return interceptors == null ? apiKeyData.getInterceptors() : interceptors; + } + + public boolean hasNextInterceptor() { + if (apiKeyData.getInterceptors() == null) { // initial call to the deployment + return !deployment.getInterceptors().isEmpty(); + } else { // make sure if a next interceptor is available from the list + return apiKeyData.getInterceptorIndex() + 1 < apiKeyData.getInterceptors().size(); + } + } + + public String getInitialDeployment() { + return initialDeployment == null ? apiKeyData.getInitialDeployment() : initialDeployment; + } + + public String getInitialDeploymentApi() { + return initialDeploymentApi == null ? apiKeyData.getInitialDeploymentApi() : initialDeploymentApi; + } } \ No newline at end of file diff --git a/src/main/java/com/epam/aidial/core/config/ApiKeyData.java b/src/main/java/com/epam/aidial/core/config/ApiKeyData.java index 0dfa5fa66..b4a0c617d 100644 --- a/src/main/java/com/epam/aidial/core/config/ApiKeyData.java +++ b/src/main/java/com/epam/aidial/core/config/ApiKeyData.java @@ -34,10 +34,17 @@ public class ApiKeyData { // list of attached file URLs collected from conversation history of the current request private Set attachedFiles = new HashSet<>(); private List attachedFolders = new ArrayList<>(); - // deployment name of the source(application/assistant/model) associated with the current request + // deployment name of the source(application/assistant/model/interceptor) associated with the current request private String sourceDeployment; // Execution path of the root request private List executionPath; + // List of interceptors copied from the deployment config + private List interceptors; + // Index to track which interceptor is called next + private int interceptorIndex = -1; + // deployment triggers interceptors + private String initialDeployment; + private String initialDeploymentApi; public ApiKeyData() { } @@ -45,6 +52,11 @@ public ApiKeyData() { public static void initFromContext(ApiKeyData proxyApiKeyData, ProxyContext context) { ApiKeyData apiKeyData = context.getApiKeyData(); List currentPath; + proxyApiKeyData.setInterceptors(context.getInterceptors()); + proxyApiKeyData.setInterceptorIndex(apiKeyData.getInterceptorIndex() + 1); // move to next interceptor + proxyApiKeyData.setInitialDeployment(context.getInitialDeployment()); + proxyApiKeyData.setInitialDeploymentApi(context.getInitialDeploymentApi()); + if (apiKeyData.getPerRequestKey() == null) { proxyApiKeyData.setOriginalKey(context.getKey()); proxyApiKeyData.setExtractedClaims(context.getExtractedClaims()); diff --git a/src/main/java/com/epam/aidial/core/config/Config.java b/src/main/java/com/epam/aidial/core/config/Config.java index 5934592e6..4d6ee8b8b 100644 --- a/src/main/java/com/epam/aidial/core/config/Config.java +++ b/src/main/java/com/epam/aidial/core/config/Config.java @@ -22,6 +22,7 @@ public class Config { private Map keys = new HashMap<>(); private Map roles = new HashMap<>(); private Set retriableErrorCodes = Set.of(); + private Map interceptors = Map.of(); public Deployment selectDeployment(String deploymentId) { diff --git a/src/main/java/com/epam/aidial/core/config/Deployment.java b/src/main/java/com/epam/aidial/core/config/Deployment.java index 2acc05402..22d73ff81 100644 --- a/src/main/java/com/epam/aidial/core/config/Deployment.java +++ b/src/main/java/com/epam/aidial/core/config/Deployment.java @@ -27,4 +27,8 @@ public abstract class Deployment { * Default parameters are applied if a request doesn't contain them in OpenAI chat/completions API call. */ private Map defaults = Map.of(); + /** + * List of interceptors to be called for the deployment + */ + private List interceptors = List.of(); } \ No newline at end of file diff --git a/src/main/java/com/epam/aidial/core/config/FileConfigStore.java b/src/main/java/com/epam/aidial/core/config/FileConfigStore.java index d990effb9..1926cdb31 100644 --- a/src/main/java/com/epam/aidial/core/config/FileConfigStore.java +++ b/src/main/java/com/epam/aidial/core/config/FileConfigStore.java @@ -97,6 +97,12 @@ private void load(boolean fail) { role.setName(name); } + for (Map.Entry entry : config.getInterceptors().entrySet()) { + String name = entry.getKey(); + Interceptor interceptor = entry.getValue(); + interceptor.setName(name); + } + this.config = config; } catch (Throwable e) { if (fail) { diff --git a/src/main/java/com/epam/aidial/core/config/Interceptor.java b/src/main/java/com/epam/aidial/core/config/Interceptor.java new file mode 100644 index 000000000..cc70f322d --- /dev/null +++ b/src/main/java/com/epam/aidial/core/config/Interceptor.java @@ -0,0 +1,4 @@ +package com.epam.aidial.core.config; + +public class Interceptor extends Deployment { +} diff --git a/src/main/java/com/epam/aidial/core/controller/DeploymentPostController.java b/src/main/java/com/epam/aidial/core/controller/DeploymentPostController.java index 93d697071..adc60b364 100644 --- a/src/main/java/com/epam/aidial/core/controller/DeploymentPostController.java +++ b/src/main/java/com/epam/aidial/core/controller/DeploymentPostController.java @@ -5,6 +5,7 @@ import com.epam.aidial.core.config.ApiKeyData; import com.epam.aidial.core.config.Config; import com.epam.aidial.core.config.Deployment; +import com.epam.aidial.core.config.Interceptor; import com.epam.aidial.core.config.Model; import com.epam.aidial.core.config.ModelType; import com.epam.aidial.core.config.Pricing; @@ -78,7 +79,14 @@ public Future handle(String deploymentId, String deploymentApi) { if (!StringUtils.containsIgnoreCase(contentType, Proxy.HEADER_CONTENT_TYPE_APPLICATION_JSON)) { return respond(HttpStatus.UNSUPPORTED_MEDIA_TYPE, "Only application/json is supported"); } + // handle a special deployment `interceptor` + if ("interceptor".equals(deploymentId)) { + return handleInterceptor(); + } + return handleDeployment(deploymentId, deploymentApi); + } + private Future handleDeployment(String deploymentId, String deploymentApi) { Deployment deployment = context.getConfig().selectDeployment(deploymentId); boolean isValidDeployment = isValidDeploymentApi(deployment, deploymentApi); @@ -109,7 +117,7 @@ public Future handle(String deploymentId, String deploymentApi) { return dep; }) .compose(dep -> { - if (dep instanceof Model) { + if (dep instanceof Model && !context.hasNextInterceptor()) { return proxy.getRateLimiter().limit(context); } else { return Future.succeededFuture(RateLimitResult.SUCCESS); @@ -117,7 +125,14 @@ public Future handle(String deploymentId, String deploymentApi) { }) .map(rateLimitResult -> { if (rateLimitResult.status() == HttpStatus.OK) { - handleRateLimitSuccess(deploymentId); + if (context.hasNextInterceptor()) { + context.setInitialDeployment(deploymentId); + context.setInitialDeploymentApi(deploymentApi); + context.setInterceptors(context.getDeployment().getInterceptors()); + handleInterceptor(); + } else { + handleRateLimitSuccess(deploymentId); + } } else { handleRateLimitHit(deploymentId, rateLimitResult); } @@ -129,6 +144,28 @@ public Future handle(String deploymentId, String deploymentApi) { }); } + private Future handleInterceptor() { + ApiKeyData apiKeyData = context.getApiKeyData(); + List interceptors = context.getInterceptors(); + int nextIndex = apiKeyData.getInterceptorIndex() + 1; + if (nextIndex < interceptors.size()) { + String interceptorName = interceptors.get(nextIndex); + Interceptor interceptor = context.getConfig().getInterceptors().get(interceptorName); + if (interceptor == null) { + log.warn("Interceptor is not found for the given name: {}", interceptorName); + return respond(HttpStatus.NOT_FOUND, "Interceptor is not found"); + } + context.setDeployment(interceptor); + + setupProxyApiKeyData(); + + InterceptorController controller = new InterceptorController(proxy, context); + return controller.handle(); + } else { // all interceptors are completed we should call the initial deployment + return handleDeployment(apiKeyData.getInitialDeployment(), apiKeyData.getInitialDeploymentApi()); + } + } + private void handleRequestError(String deploymentId, Throwable error) { if (error instanceof PermissionDeniedException) { log.error("Forbidden deployment {}. Key: {}. User sub: {}", deploymentId, context.getProject(), context.getUserSub()); @@ -172,9 +209,6 @@ private void handleRateLimitSuccess(String deploymentId) { .onFailure(this::handleRequestBodyError); } - /** - * The method uses blocking calls and should not be used in the event loop thread. - */ private void setupProxyApiKeyData() { ApiKeyData proxyApiKeyData = new ApiKeyData(); context.setProxyApiKeyData(proxyApiKeyData); diff --git a/src/main/java/com/epam/aidial/core/controller/InterceptorController.java b/src/main/java/com/epam/aidial/core/controller/InterceptorController.java new file mode 100644 index 000000000..73a8592c1 --- /dev/null +++ b/src/main/java/com/epam/aidial/core/controller/InterceptorController.java @@ -0,0 +1,218 @@ +package com.epam.aidial.core.controller; + +import com.epam.aidial.core.Proxy; +import com.epam.aidial.core.ProxyContext; +import com.epam.aidial.core.config.ApiKeyData; +import com.epam.aidial.core.function.BaseFunction; +import com.epam.aidial.core.function.CollectAttachmentsFn; +import com.epam.aidial.core.function.enhancement.ApplyDefaultDeploymentSettingsFn; +import com.epam.aidial.core.function.enhancement.EnhanceAssistantRequestFn; +import com.epam.aidial.core.function.enhancement.EnhanceModelRequestFn; +import com.epam.aidial.core.util.BufferingReadStream; +import com.epam.aidial.core.util.HttpStatus; +import com.epam.aidial.core.util.ProxyUtil; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.netty.buffer.ByteBufInputStream; +import io.vertx.core.Future; +import io.vertx.core.MultiMap; +import io.vertx.core.buffer.Buffer; +import io.vertx.core.http.HttpClientRequest; +import io.vertx.core.http.HttpClientResponse; +import io.vertx.core.http.HttpHeaders; +import io.vertx.core.http.HttpServerRequest; +import io.vertx.core.http.HttpServerResponse; +import io.vertx.core.http.RequestOptions; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; + +@Slf4j +public class InterceptorController { + + private final Proxy proxy; + private final ProxyContext context; + + private final List> enhancementFunctions; + + public InterceptorController(Proxy proxy, ProxyContext context) { + this.proxy = proxy; + this.context = context; + this.enhancementFunctions = List.of(new CollectAttachmentsFn(proxy, context)); + } + + public Future handle() { + log.info("Received request from client. Trace: {}. Span: {}. Key: {}. Deployment: {}. Headers: {}", + context.getTraceId(), context.getSpanId(), + context.getProject(), context.getDeployment().getName(), + context.getRequest().headers().size()); + + context.getRequest().body() + .onSuccess(body -> proxy.getVertx().executeBlocking(() -> { + handleRequestBody(body); + return null; + }, false).onFailure(this::handleError)) + .onFailure(this::handleRequestBodyError); + return Future.succeededFuture(); + } + + private void handleError(Throwable error) { + log.error("Can't handle request. Key: {}. User sub: {}. Trace: {}. Span: {}. Error: {}", + context.getProject(), context.getUserSub(), context.getTraceId(), context.getSpanId(), error.getMessage()); + respond(HttpStatus.INTERNAL_SERVER_ERROR); + } + + private void handleRequestBody(Buffer requestBody) { + context.setRequestBody(requestBody); + context.setRequestBodyTimestamp(System.currentTimeMillis()); + try (InputStream stream = new ByteBufInputStream(requestBody.getByteBuf())) { + ObjectNode tree = (ObjectNode) ProxyUtil.MAPPER.readTree(stream); + Throwable error = ProxyUtil.processChain(tree, enhancementFunctions); + if (error != null) { + finalizeRequest(); + return; + } + } catch (IOException e) { + respond(HttpStatus.BAD_REQUEST); + log.warn("Can't parse JSON request body. Trace: {}. Span: {}. Error:", + context.getTraceId(), context.getSpanId(), e); + return; + } + sendRequest(); + } + + private void sendRequest() { + String uri = context.getDeployment().getEndpoint(); + RequestOptions options = new RequestOptions() + .setAbsoluteURI(uri) + .setMethod(context.getRequest().method()); + + proxy.getClient().request(options) + .onSuccess(this::handleProxyRequest) + .onFailure(this::handleProxyConnectionError); + } + + private void handleRequestBodyError(Throwable error) { + log.warn("Failed to receive client body. Trace: {}. Span: {}. Error: {}", + context.getTraceId(), context.getSpanId(), error.getMessage()); + + respond(HttpStatus.UNPROCESSABLE_ENTITY, "Failed to receive body"); + } + + /** + * Called when proxy failed to connect to the origin. + */ + private void handleProxyConnectionError(Throwable error) { + log.warn("Can't connect to origin. Trace: {}. Span: {}. Key: {}. Deployment: {}. Address: {}. Error: {}", + context.getTraceId(), context.getSpanId(), + context.getProject(), context.getDeployment().getName(), + context.getDeployment().getEndpoint(), error.getMessage()); + + respond(HttpStatus.BAD_GATEWAY, "Failed to connect to origin"); + } + + + void handleProxyRequest(HttpClientRequest proxyRequest) { + log.info("Connected to interceptor. Trace: {}. Span: {}. Key: {}. Deployment: {}. Address: {}", + context.getTraceId(), context.getSpanId(), + context.getProject(), context.getDeployment().getName(), + proxyRequest.connection().remoteAddress()); + + HttpServerRequest request = context.getRequest(); + context.setProxyRequest(proxyRequest); + context.setProxyConnectTimestamp(System.currentTimeMillis()); + + MultiMap excludeHeaders = MultiMap.caseInsensitiveMultiMap(); + if (!context.getDeployment().isForwardAuthToken()) { + excludeHeaders.add(HttpHeaders.AUTHORIZATION, "whatever"); + } + + ProxyUtil.copyHeaders(request.headers(), proxyRequest.headers(), excludeHeaders); + + ApiKeyData proxyApiKeyData = context.getProxyApiKeyData(); + proxyRequest.headers().add(Proxy.HEADER_API_KEY, proxyApiKeyData.getPerRequestKey()); + + + Buffer requestBody = context.getRequestBody(); + proxyRequest.putHeader(HttpHeaders.CONTENT_LENGTH, Integer.toString(requestBody.length())); + + proxyRequest.send(requestBody) + .onSuccess(this::handleProxyResponse) + .onFailure(this::handleProxyResponseError); + } + + /** + * Called when proxy failed to receive response header from origin. + */ + private void handleProxyResponseError(Throwable error) { + log.warn("Proxy failed to receive response header from origin. Trace: {}. Span: {}. Key: {}. Deployment: {}. Address: {}. Error:", + context.getTraceId(), context.getSpanId(), + context.getProject(), context.getDeployment().getName(), + context.getProxyRequest().connection().remoteAddress(), + error); + } + + private void handleProxyResponse(HttpClientResponse proxyResponse) { + log.info("Received header from origin. Trace: {}. Span: {}. Key: {}. Deployment: {}. Endpoint: {}. Status: {}. Headers: {}", + context.getTraceId(), context.getSpanId(), + context.getProject(), context.getDeployment().getName(), + context.getDeployment().getEndpoint(), + proxyResponse.statusCode(), proxyResponse.headers().size()); + + BufferingReadStream responseStream = new BufferingReadStream(proxyResponse, + ProxyUtil.contentLength(proxyResponse, 1024)); + + context.setProxyResponse(proxyResponse); + context.setProxyResponseTimestamp(System.currentTimeMillis()); + context.setResponseStream(responseStream); + + HttpServerResponse response = context.getResponse(); + + response.setChunked(true); + response.setStatusCode(proxyResponse.statusCode()); + + ProxyUtil.copyHeaders(proxyResponse.headers(), response.headers()); + + responseStream.pipe() + .endOnFailure(false) + .to(response) + .onSuccess(ignore -> finalizeRequest()) + .onFailure(this::handleResponseError); + } + + /** + * Called when proxy failed to send response to the client. + */ + private void handleResponseError(Throwable error) { + log.warn("Can't send response to client. Trace: {}. Span: {}. Error:", + context.getTraceId(), context.getSpanId(), error); + + context.getProxyRequest().reset(); // drop connection to stop origin response + context.getResponse().reset(); // drop connection, so that partial client response won't seem complete + finalizeRequest(); + } + + private void respond(HttpStatus status) { + finalizeRequest(); + context.respond(status); + } + + private void respond(HttpStatus status, Object result) { + finalizeRequest(); + context.respond(status, result); + } + + private void finalizeRequest() { + ApiKeyData proxyApiKeyData = context.getProxyApiKeyData(); + if (proxyApiKeyData != null) { + proxy.getApiKeyStore().invalidatePerRequestApiKey(proxyApiKeyData) + .onSuccess(invalidated -> { + if (!invalidated) { + log.warn("Per request is not removed: {}", proxyApiKeyData.getPerRequestKey()); + } + }).onFailure(error -> log.error("error occurred on invalidating per-request key", error)); + } + } + +} diff --git a/src/test/java/com/epam/aidial/core/CustomApplicationApiTest.java b/src/test/java/com/epam/aidial/core/CustomApplicationApiTest.java index b5cdda362..f8ef2bf73 100644 --- a/src/test/java/com/epam/aidial/core/CustomApplicationApiTest.java +++ b/src/test/java/com/epam/aidial/core/CustomApplicationApiTest.java @@ -43,7 +43,8 @@ void testApplicationCreation() { "icon_url":"http://application1/icon.svg", "description":"My Custom Application Description", "forward_auth_token":false, - "defaults": {} + "defaults": {}, + "interceptors": [] } """); @@ -175,7 +176,8 @@ void testApplicationDeletion() { "rate_endpoint": "http://application1/rate", "configuration_endpoint": "http://application1/configuration" }, - "defaults": {} + "defaults": {}, + "interceptors": [] } """);