From 670d9108bff3ea10271e7865a116866fb9279736 Mon Sep 17 00:00:00 2001
From: Aliaksandr Stsiapanay <aliaksandr_stsiapanay@epam.com>
Date: Wed, 17 Jul 2024 14:58:20 +0300
Subject: [PATCH] feat: Add support of interceptors for models, applications
 #379 (#392)

---
 README.md                                     |  54 +++--
 sample/aidial.config.json                     |  29 ++-
 .../com/epam/aidial/core/ProxyContext.java    |  25 ++
 .../epam/aidial/core/config/ApiKeyData.java   |  14 +-
 .../com/epam/aidial/core/config/Config.java   |   1 +
 .../epam/aidial/core/config/Deployment.java   |   4 +
 .../aidial/core/config/FileConfigStore.java   |   6 +
 .../epam/aidial/core/config/Interceptor.java  |   4 +
 .../controller/DeploymentPostController.java  |  44 +++-
 .../controller/InterceptorController.java     | 218 ++++++++++++++++++
 .../aidial/core/CustomApplicationApiTest.java |   6 +-
 11 files changed, 363 insertions(+), 42 deletions(-)
 create mode 100644 src/main/java/com/epam/aidial/core/config/Interceptor.java
 create mode 100644 src/main/java/com/epam/aidial/core/controller/InterceptorController.java

diff --git a/README.md b/README.md
index f91a4b7bb..8606f0fc5 100644
--- a/README.md
+++ b/README.md
@@ -201,31 +201,35 @@ specified via "config.reload" static setting. Refer to [example](sample/aidial.c
 
 Dynamic settings can include the following parameters:
 
-| Parameter                       | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
-|---------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| routes                          | Path(s) for specific upstream routing or to respond with a configured body.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
-| addons                          | A list of deployed AI DIAL Addons and their parameters:<br />`<addon_name>`: Unique addon name.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
-| addons.<addon_name>             | `endpoint`: AI DIAL Addon API for chat completions.<br />`iconUrl`: Icon path for the AI DIAL addon on UI.<br />`description`: Brief AI DIAL addon description.<br />`displayName`: AI DIAL addon name on UI.<br />`inputAttachmentTypes`: A list of allowed MIME types for the input attachments.<br />`maxInputAttachments`: Maximum number of input attachments (default is zero when `inputAttachmentTypes` is unset, otherwise, infinity) <br/> `forwardAuthToken`: If flag is set to `true` forward Http header with authorization token to chat completion endpoint of the addon. <br />`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples.                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
-| assistant                          | A list of deployed AI DIAL Assistants and their parameters:<br />`<assistant_name>`: Unique assistan name.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
-| assistant.endpoint                          | Assistant main endpoint                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| assistant.assistants.<asistant_name>             | `iconUrl`: Icon path for the AI DIAL assistant on UI.<br />`description`: Brief AI DIAL assistant description.<br />`displayName`: AI DIAL assistant name on UI.<br />`inputAttachmentTypes`: A list of allowed MIME types for the input attachments.<br />`maxInputAttachments`: Maximum number of input attachments (default is zero when `inputAttachmentTypes` is unset, otherwise, infinity) <br/> `forwardAuthToken`: If flag is set to `true` forward Http header with authorization token to chat completion endpoint of the assistant. <br />`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| assistant.assistants.<asistant_name>.defaults         | default parameters are applied if a request doesn't contain them in OpenAI `chat/completions` API call                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-| applications                    | A list of deployed AI DIAL Applications and their parameters:<br />`<application_name>`: Unique application name.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-| applications.<application_name> | `endpoint`: AI DIAL Application API for chat completions.<br />`iconUrl`: Icon path for the AI DIAL Application on UI.<br />`description`: Brief AI DIAL Application description.<br />`displayName`: AI DIAL Application name on UI.<br />`inputAttachmentTypes`: A list of allowed MIME types for the input attachments.<br />`maxInputAttachments`: Maximum number of input attachments (default is zero when `inputAttachmentTypes` is unset, otherwise, infinity) <br/> `forwardAuthToken`: If flag is set to `true` forward Http header with authorization token to chat completion endpoint of the application. <br />`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples.                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| applications.<application_name>.defaults         | default parameters are applied if a request doesn't contain them in OpenAI `chat/completions` API call                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-| models                          | A list of deployed models and their parameters:<br />`<model_name>`: Unique model name.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| models.<model_name>             | `type`: Model type—`chat` or `embedding`.<br />`iconUrl`: Icon path for the model on UI.<br />`description`: Brief model description.<br />`displayName`: Model name on UI.<br />`displayVersion`: Model version on UI.<br />`endpoint`: Model API for chat completions or embeddings.<br />`tokenizerModel`: Identifies the specific model whose tokenization algorithm exactly matches that of the referenced model. This is typically the name of the earliest-released model in a series of models sharing an identical tokenization algorithm (e.g. `gpt-3.5-turbo-0301`, `gpt-4-0314`, or `gpt-4-1106-vision-preview`). This parameter is essential for DIAL clients that reimplement tokenization algorithms on their side, instead of utilizing the `tokenizeEndpoint` provided by the model.<br />`features`: Model features.<br />`limits`: Model token limits.<br />`pricing`: Model pricing.<br />`upstreams`: Used for load-balancing—request is sent to model endpoint containing X-UPSTREAM-ENDPOINT and X-UPSTREAM-KEY headers.<br />`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples.|
-| models.<model_name>.limits      | `maxPromptTokens`: maximum number of tokens in a completion request.<br />`maxCompletionTokens`: maximum number of tokens in a completion response.<br />`maxTotalTokens`: maximum number of tokens in completion request and response combined.<br />Typically either `maxTotalTokens` is specified or `maxPromptTokens` and `maxCompletionTokens`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-| models.<model_name>.pricing     | `unit`: the pricing units (currently `token` and `char_without_whitespace` are supported).<br />`prompt`: per-unit price for the completion request in USD.<br />`completion`: per-unit price for the completion response in USD.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
-| models.<model_name>.features    | `rateEndpoint`: endpoint for rate requests *(exposed by core as `<deployment name>/rate`)*.<br />`tokenizeEndpoint`: endpoint for requests to the model tokenizer *(exposed by core as `<deployment name>/tokenize`)*.<br />`truncatePromptEndpoint`: endpoint for truncating prompt requests *(exposed by core as `<deployment name>/truncate_prompt`)*.<br />`systemPromptSupported`: does the model support system prompt (default is `true`).<br />`toolsSupported`: does the model support tools (default is `false`).<br />`seedSupported`: does the model support `seed` request parameter (default is `false`).<br />`urlAttachmentsSupported`: does the model/application support attachments with URLs (default is `false`).<br />`folderAttachmentsSupported`: does the model/application support folder attachments (default is `false`)                                                                                                                                                                                            |
-| models.<model_name>.upstreams   | `endpoint`: Model endpoint.<br />`key`: Your API key.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
-| models.<model_name>.defaults         | default parameters are applied if a request doesn't contain them in OpenAI `chat/completions` API call                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-| keys                            | API Keys parameters:<br />`<core_key>`: Your API key. Refer to [API Keys Roles and Limits](https://github.com/epam/ai-dial/blob/main/docs/tutorials/roles-management.md) to learn more.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
-| keys.<core_key>                 | `project`: Project name is assigned to this key.<br />`role`: A configured role name to be defined in the section `roles`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
-| roles                           | API key or user roles. Each role may have limits to be associated with applications, models, assistants or addons. Refer to [API Keys Roles and Limits](https://github.com/epam/ai-dial/blob/main/docs/tutorials/roles-management.md) to learn more.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| roles.<role_name>               | `limits`: Limits for models, applications, or assistants.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-| roles.<role_name>.limits        | `minute`: Total tokens per minute limit sent to the model, managed via floating window approach for well-distributed rate limiting. If it's not set the default value is unlimited<br />`day`: Total tokens per day limit sent to the model, managed via floating window approach for balanced rate limiting. If it's not set the default value is unlimited.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| retriableErrorCodes        | List of retriable error codes for handling outages at LLM providers.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| Parameter                                     | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+|-----------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| routes                                        | Path(s) for specific upstream routing or to respond with a configured body.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| addons                                        | A list of deployed AI DIAL Addons and their parameters:<br />`<addon_name>`: Unique addon name.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+| addons.<addon_name>                           | `endpoint`: AI DIAL Addon API for chat completions.<br />`iconUrl`: Icon path for the AI DIAL addon on UI.<br />`description`: Brief AI DIAL addon description.<br />`displayName`: AI DIAL addon name on UI.<br />`inputAttachmentTypes`: A list of allowed MIME types for the input attachments.<br />`maxInputAttachments`: Maximum number of input attachments (default is zero when `inputAttachmentTypes` is unset, otherwise, infinity) <br/> `forwardAuthToken`: If flag is set to `true` forward Http header with authorization token to chat completion endpoint of the addon. <br />`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples.                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| interceptors                                  | A list of deployed AI DIAL Interceptors and their parameters:<br />`<interceptor_name>`: Unique interceptor name.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| interceptors.<interceptor_name>               | `endpoint`: AI DIAL Interceptor API for chat completions.<br />`iconUrl`: Icon path for the AI DIAL Interceptor on UI.<br />`description`: Brief AI DIAL interceptor description.<br />`displayName`: AI DIAL interceptor name on UI.<br/> `forwardAuthToken`: If flag is set to `true` forward Http header with authorization token to chat completion endpoint of the interceptor.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| assistant                                     | A list of deployed AI DIAL Assistants and their parameters:<br />`<assistant_name>`: Unique assistan name.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| assistant.endpoint                            | Assistant main endpoint                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| assistant.assistants.<asistant_name>          | `iconUrl`: Icon path for the AI DIAL assistant on UI.<br />`description`: Brief AI DIAL assistant description.<br />`displayName`: AI DIAL assistant name on UI.<br />`inputAttachmentTypes`: A list of allowed MIME types for the input attachments.<br />`maxInputAttachments`: Maximum number of input attachments (default is zero when `inputAttachmentTypes` is unset, otherwise, infinity) <br/> `forwardAuthToken`: If flag is set to `true` forward Http header with authorization token to chat completion endpoint of the assistant. <br />`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| assistant.assistants.<asistant_name>.defaults | default parameters are applied if a request doesn't contain them in OpenAI `chat/completions` API call                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| applications                                  | A list of deployed AI DIAL Applications and their parameters:<br />`<application_name>`: Unique application name.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| applications.<application_name>               | `endpoint`: AI DIAL Application API for chat completions.<br />`iconUrl`: Icon path for the AI DIAL Application on UI.<br />`description`: Brief AI DIAL Application description.<br />`displayName`: AI DIAL Application name on UI.<br />`inputAttachmentTypes`: A list of allowed MIME types for the input attachments.<br />`maxInputAttachments`: Maximum number of input attachments (default is zero when `inputAttachmentTypes` is unset, otherwise, infinity) <br/> `forwardAuthToken`: If flag is set to `true` forward Http header with authorization token to chat completion endpoint of the application. <br />`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples.                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| applications.<application_name>.defaults      | default parameters are applied if a request doesn't contain them in OpenAI `chat/completions` API call                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| models.<application_name>.interceptors              | list of interceptors to be triggered for the given application                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+| models                                        | A list of deployed models and their parameters:<br />`<model_name>`: Unique model name.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| models.<model_name>                           | `type`: Model type—`chat` or `embedding`.<br />`iconUrl`: Icon path for the model on UI.<br />`description`: Brief model description.<br />`displayName`: Model name on UI.<br />`displayVersion`: Model version on UI.<br />`endpoint`: Model API for chat completions or embeddings.<br />`tokenizerModel`: Identifies the specific model whose tokenization algorithm exactly matches that of the referenced model. This is typically the name of the earliest-released model in a series of models sharing an identical tokenization algorithm (e.g. `gpt-3.5-turbo-0301`, `gpt-4-0314`, or `gpt-4-1106-vision-preview`). This parameter is essential for DIAL clients that reimplement tokenization algorithms on their side, instead of utilizing the `tokenizeEndpoint` provided by the model.<br />`features`: Model features.<br />`limits`: Model token limits.<br />`pricing`: Model pricing.<br />`upstreams`: Used for load-balancing—request is sent to model endpoint containing X-UPSTREAM-ENDPOINT and X-UPSTREAM-KEY headers.<br />`userRoles`: a specific claim value provided by a specific IDP. Refer to [IDP Configuration](https://github.com/epam/ai-dial/blob/main/docs/Deployment/idp-configuration/auth0.md) to view examples. |
+| models.<model_name>.limits                    | `maxPromptTokens`: maximum number of tokens in a completion request.<br />`maxCompletionTokens`: maximum number of tokens in a completion response.<br />`maxTotalTokens`: maximum number of tokens in completion request and response combined.<br />Typically either `maxTotalTokens` is specified or `maxPromptTokens` and `maxCompletionTokens`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| models.<model_name>.pricing                   | `unit`: the pricing units (currently `token` and `char_without_whitespace` are supported).<br />`prompt`: per-unit price for the completion request in USD.<br />`completion`: per-unit price for the completion response in USD.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| models.<model_name>.features                  | `rateEndpoint`: endpoint for rate requests *(exposed by core as `<deployment name>/rate`)*.<br />`tokenizeEndpoint`: endpoint for requests to the model tokenizer *(exposed by core as `<deployment name>/tokenize`)*.<br />`truncatePromptEndpoint`: endpoint for truncating prompt requests *(exposed by core as `<deployment name>/truncate_prompt`)*.<br />`systemPromptSupported`: does the model support system prompt (default is `true`).<br />`toolsSupported`: does the model support tools (default is `false`).<br />`seedSupported`: does the model support `seed` request parameter (default is `false`).<br />`urlAttachmentsSupported`: does the model/application support attachments with URLs (default is `false`).<br />`folderAttachmentsSupported`: does the model/application support folder attachments (default is `false`)                                                                                                                                                                                                                                                                                                                                                                                                      |
+| models.<model_name>.upstreams                 | `endpoint`: Model endpoint.<br />`key`: Your API key.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| models.<model_name>.defaults                  | default parameters are applied if a request doesn't contain them in OpenAI `chat/completions` API call                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| models.<model_name>.interceptors              | list of interceptors to be triggered for the given model                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| keys                                          | API Keys parameters:<br />`<core_key>`: Your API key. Refer to [API Keys Roles and Limits](https://github.com/epam/ai-dial/blob/main/docs/tutorials/roles-management.md) to learn more.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| keys.<core_key>                               | `project`: Project name is assigned to this key.<br />`role`: A configured role name to be defined in the section `roles`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| roles                                         | API key or user roles. Each role may have limits to be associated with applications, models, assistants or addons. Refer to [API Keys Roles and Limits](https://github.com/epam/ai-dial/blob/main/docs/tutorials/roles-management.md) to learn more.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| roles.<role_name>                             | `limits`: Limits for models, applications, or assistants.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| roles.<role_name>.limits                      | `minute`: Total tokens per minute limit sent to the model, managed via floating window approach for well-distributed rate limiting. If it's not set the default value is unlimited<br />`day`: Total tokens per day limit sent to the model, managed via floating window approach for balanced rate limiting. If it's not set the default value is unlimited.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+| retriableErrorCodes                           | List of retriable error codes for handling outages at LLM providers.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
 
 ## License
 
diff --git a/sample/aidial.config.json b/sample/aidial.config.json
index 3e074217f..418b2ba44 100644
--- a/sample/aidial.config.json
+++ b/sample/aidial.config.json
@@ -1,12 +1,22 @@
 {
     "routes": {},
+    "interceptors": {
+        "interceptor1": {
+            "endpoint": "http://localhost:4088/api/v1/interceptor/handle"
+        },
+        "interceptor2": {
+            "endpoint": "http://localhost:4089/api/v1/interceptor/handle"
+        },
+        "interceptor3": {
+            "endpoint": "http://localhost:4090/api/v1/interceptor/handle"
+        }
+    },
     "addons": {
         "search": {
             "endpoint": "http://localhost:7010/search"
         },
         "forecast": {
             "endpoint": "http://localhost:7010/forecast",
-            "token": "token",
             "displayName": "Forecast",
             "iconUrl": "https://host/forecast.svg",
             "description": "Addon that provides forecast",
@@ -72,7 +82,6 @@
     "applications": {
         "app": {
             "endpoint": "http://localhost:7001/openai/deployments/10k/chat/completions",
-            "token": "token",
             "displayName": "Forecast",
             "iconUrl": "https://host/app.svg",
             "description": "Addon that provides forecast",
@@ -98,7 +107,8 @@
                 "paramBool": true,
                 "paramInt": 123,
                 "paramFloat": 0.25
-            }
+            },
+            "interceptors": ["interceptor1", "interceptor2", "interceptor3"]
         }
     },
     "models": {
@@ -112,8 +122,8 @@
             },
             "pricing": {
                 "unit": "token",
-                "prompt": 0.56,
-                "completion": 0.67
+                "prompt": "0.56",
+                "completion": "0.67"
             },
             "overrideName": "/some[!exotic?]/model/name",
             "displayName": "GPT-3.5",
@@ -153,7 +163,8 @@
                 "paramBool": true,
                 "paramInt": 123,
                 "paramFloat": 0.25
-            }
+            },
+            "interceptors": ["interceptor1"]
         },
         "embedding-ada": {
             "type": "embedding",
@@ -163,9 +174,9 @@
                     "endpoint": "http://localhost:7001",
                     "key": "modelKey4"
                 }
-            ]
-        },
-        "userRoles": ["role3"]
+            ],
+            "userRoles": ["role3"]
+        }
     },
     "keys": {
         "proxyKey1": {
diff --git a/src/main/java/com/epam/aidial/core/ProxyContext.java b/src/main/java/com/epam/aidial/core/ProxyContext.java
index 6dd0051e2..380bb45fe 100644
--- a/src/main/java/com/epam/aidial/core/ProxyContext.java
+++ b/src/main/java/com/epam/aidial/core/ProxyContext.java
@@ -67,6 +67,11 @@ public class ProxyContext {
     private long responseBodyTimestamp;
     private ExtractedClaims extractedClaims;
     private ApiKeyData proxyApiKeyData;
+    // deployment triggers interceptors
+    private String initialDeployment;
+    private String initialDeploymentApi;
+    // List of interceptors copied from the deployment config
+    private List<String> interceptors;
 
     public ProxyContext(Config config, HttpServerRequest request, ApiKeyData apiKeyData, ExtractedClaims extractedClaims, String traceId, String spanId) {
         this.config = config;
@@ -138,4 +143,24 @@ public List<String> getExecutionPath() {
     public boolean getBooleanRequestQueryParam(String name) {
         return Boolean.parseBoolean(request.getParam(name, "false"));
     }
+
+    public List<String> getInterceptors() {
+        return interceptors == null ? apiKeyData.getInterceptors() : interceptors;
+    }
+
+    public boolean hasNextInterceptor() {
+        if (apiKeyData.getInterceptors() == null) { // initial call to the deployment
+            return !deployment.getInterceptors().isEmpty();
+        } else { // make sure if a next interceptor is available from the list
+            return apiKeyData.getInterceptorIndex() + 1 < apiKeyData.getInterceptors().size();
+        }
+    }
+
+    public String getInitialDeployment() {
+        return initialDeployment == null ? apiKeyData.getInitialDeployment() : initialDeployment;
+    }
+
+    public String getInitialDeploymentApi() {
+        return initialDeploymentApi == null ? apiKeyData.getInitialDeploymentApi() : initialDeploymentApi;
+    }
 }
\ No newline at end of file
diff --git a/src/main/java/com/epam/aidial/core/config/ApiKeyData.java b/src/main/java/com/epam/aidial/core/config/ApiKeyData.java
index 0dfa5fa66..b4a0c617d 100644
--- a/src/main/java/com/epam/aidial/core/config/ApiKeyData.java
+++ b/src/main/java/com/epam/aidial/core/config/ApiKeyData.java
@@ -34,10 +34,17 @@ public class ApiKeyData {
     // list of attached file URLs collected from conversation history of the current request
     private Set<String> attachedFiles = new HashSet<>();
     private List<String> attachedFolders = new ArrayList<>();
-    // deployment name of the source(application/assistant/model) associated with the current request
+    // deployment name of the source(application/assistant/model/interceptor) associated with the current request
     private String sourceDeployment;
     // Execution path of the root request
     private List<String> executionPath;
+    // List of interceptors copied from the deployment config
+    private List<String> interceptors;
+    // Index to track which interceptor is called next
+    private int interceptorIndex = -1;
+    // deployment triggers interceptors
+    private String initialDeployment;
+    private String initialDeploymentApi;
 
     public ApiKeyData() {
     }
@@ -45,6 +52,11 @@ public ApiKeyData() {
     public static void initFromContext(ApiKeyData proxyApiKeyData, ProxyContext context) {
         ApiKeyData apiKeyData = context.getApiKeyData();
         List<String> currentPath;
+        proxyApiKeyData.setInterceptors(context.getInterceptors());
+        proxyApiKeyData.setInterceptorIndex(apiKeyData.getInterceptorIndex() + 1); // move to next interceptor
+        proxyApiKeyData.setInitialDeployment(context.getInitialDeployment());
+        proxyApiKeyData.setInitialDeploymentApi(context.getInitialDeploymentApi());
+
         if (apiKeyData.getPerRequestKey() == null) {
             proxyApiKeyData.setOriginalKey(context.getKey());
             proxyApiKeyData.setExtractedClaims(context.getExtractedClaims());
diff --git a/src/main/java/com/epam/aidial/core/config/Config.java b/src/main/java/com/epam/aidial/core/config/Config.java
index 5934592e6..4d6ee8b8b 100644
--- a/src/main/java/com/epam/aidial/core/config/Config.java
+++ b/src/main/java/com/epam/aidial/core/config/Config.java
@@ -22,6 +22,7 @@ public class Config {
     private Map<String, Key> keys = new HashMap<>();
     private Map<String, Role> roles = new HashMap<>();
     private Set<Integer> retriableErrorCodes = Set.of();
+    private Map<String, Interceptor> interceptors = Map.of();
 
 
     public Deployment selectDeployment(String deploymentId) {
diff --git a/src/main/java/com/epam/aidial/core/config/Deployment.java b/src/main/java/com/epam/aidial/core/config/Deployment.java
index 2acc05402..22d73ff81 100644
--- a/src/main/java/com/epam/aidial/core/config/Deployment.java
+++ b/src/main/java/com/epam/aidial/core/config/Deployment.java
@@ -27,4 +27,8 @@ public abstract class Deployment {
      * Default parameters are applied if a request doesn't contain them in OpenAI chat/completions API call.
      */
     private Map<String, Object> defaults = Map.of();
+    /**
+     * List of interceptors to be called for the deployment
+     */
+    private List<String> interceptors = List.of();
 }
\ No newline at end of file
diff --git a/src/main/java/com/epam/aidial/core/config/FileConfigStore.java b/src/main/java/com/epam/aidial/core/config/FileConfigStore.java
index d990effb9..1926cdb31 100644
--- a/src/main/java/com/epam/aidial/core/config/FileConfigStore.java
+++ b/src/main/java/com/epam/aidial/core/config/FileConfigStore.java
@@ -97,6 +97,12 @@ private void load(boolean fail) {
                 role.setName(name);
             }
 
+            for (Map.Entry<String, Interceptor> entry : config.getInterceptors().entrySet()) {
+                String name = entry.getKey();
+                Interceptor interceptor = entry.getValue();
+                interceptor.setName(name);
+            }
+
             this.config = config;
         } catch (Throwable e) {
             if (fail) {
diff --git a/src/main/java/com/epam/aidial/core/config/Interceptor.java b/src/main/java/com/epam/aidial/core/config/Interceptor.java
new file mode 100644
index 000000000..cc70f322d
--- /dev/null
+++ b/src/main/java/com/epam/aidial/core/config/Interceptor.java
@@ -0,0 +1,4 @@
+package com.epam.aidial.core.config;
+
+public class Interceptor extends Deployment {
+}
diff --git a/src/main/java/com/epam/aidial/core/controller/DeploymentPostController.java b/src/main/java/com/epam/aidial/core/controller/DeploymentPostController.java
index 93d697071..adc60b364 100644
--- a/src/main/java/com/epam/aidial/core/controller/DeploymentPostController.java
+++ b/src/main/java/com/epam/aidial/core/controller/DeploymentPostController.java
@@ -5,6 +5,7 @@
 import com.epam.aidial.core.config.ApiKeyData;
 import com.epam.aidial.core.config.Config;
 import com.epam.aidial.core.config.Deployment;
+import com.epam.aidial.core.config.Interceptor;
 import com.epam.aidial.core.config.Model;
 import com.epam.aidial.core.config.ModelType;
 import com.epam.aidial.core.config.Pricing;
@@ -78,7 +79,14 @@ public Future<?> handle(String deploymentId, String deploymentApi) {
         if (!StringUtils.containsIgnoreCase(contentType, Proxy.HEADER_CONTENT_TYPE_APPLICATION_JSON)) {
             return respond(HttpStatus.UNSUPPORTED_MEDIA_TYPE, "Only application/json is supported");
         }
+        // handle a special deployment `interceptor`
+        if ("interceptor".equals(deploymentId)) {
+            return handleInterceptor();
+        }
+        return handleDeployment(deploymentId, deploymentApi);
+    }
 
+    private Future<?> handleDeployment(String deploymentId, String deploymentApi) {
         Deployment deployment = context.getConfig().selectDeployment(deploymentId);
         boolean isValidDeployment = isValidDeploymentApi(deployment, deploymentApi);
 
@@ -109,7 +117,7 @@ public Future<?> handle(String deploymentId, String deploymentApi) {
                     return dep;
                 })
                 .compose(dep -> {
-                    if (dep instanceof Model) {
+                    if (dep instanceof Model && !context.hasNextInterceptor()) {
                         return proxy.getRateLimiter().limit(context);
                     } else {
                         return Future.succeededFuture(RateLimitResult.SUCCESS);
@@ -117,7 +125,14 @@ public Future<?> handle(String deploymentId, String deploymentApi) {
                 })
                 .map(rateLimitResult -> {
                     if (rateLimitResult.status() == HttpStatus.OK) {
-                        handleRateLimitSuccess(deploymentId);
+                        if (context.hasNextInterceptor()) {
+                            context.setInitialDeployment(deploymentId);
+                            context.setInitialDeploymentApi(deploymentApi);
+                            context.setInterceptors(context.getDeployment().getInterceptors());
+                            handleInterceptor();
+                        } else {
+                            handleRateLimitSuccess(deploymentId);
+                        }
                     } else {
                         handleRateLimitHit(deploymentId, rateLimitResult);
                     }
@@ -129,6 +144,28 @@ public Future<?> handle(String deploymentId, String deploymentApi) {
                 });
     }
 
+    private Future<?> handleInterceptor() {
+        ApiKeyData apiKeyData = context.getApiKeyData();
+        List<String> interceptors = context.getInterceptors();
+        int nextIndex = apiKeyData.getInterceptorIndex() + 1;
+        if (nextIndex < interceptors.size()) {
+            String interceptorName = interceptors.get(nextIndex);
+            Interceptor interceptor = context.getConfig().getInterceptors().get(interceptorName);
+            if (interceptor == null) {
+                log.warn("Interceptor is not found for the given name: {}", interceptorName);
+                return respond(HttpStatus.NOT_FOUND, "Interceptor is not found");
+            }
+            context.setDeployment(interceptor);
+
+            setupProxyApiKeyData();
+
+            InterceptorController controller = new InterceptorController(proxy, context);
+            return controller.handle();
+        } else { // all interceptors are completed we should call the initial deployment
+            return handleDeployment(apiKeyData.getInitialDeployment(), apiKeyData.getInitialDeploymentApi());
+        }
+    }
+
     private void handleRequestError(String deploymentId, Throwable error) {
         if (error instanceof PermissionDeniedException) {
             log.error("Forbidden deployment {}. Key: {}. User sub: {}", deploymentId, context.getProject(), context.getUserSub());
@@ -172,9 +209,6 @@ private void handleRateLimitSuccess(String deploymentId) {
                 .onFailure(this::handleRequestBodyError);
     }
 
-    /**
-     * The method uses blocking calls and should not be used in the event loop thread.
-     */
     private void setupProxyApiKeyData() {
         ApiKeyData proxyApiKeyData = new ApiKeyData();
         context.setProxyApiKeyData(proxyApiKeyData);
diff --git a/src/main/java/com/epam/aidial/core/controller/InterceptorController.java b/src/main/java/com/epam/aidial/core/controller/InterceptorController.java
new file mode 100644
index 000000000..73a8592c1
--- /dev/null
+++ b/src/main/java/com/epam/aidial/core/controller/InterceptorController.java
@@ -0,0 +1,218 @@
+package com.epam.aidial.core.controller;
+
+import com.epam.aidial.core.Proxy;
+import com.epam.aidial.core.ProxyContext;
+import com.epam.aidial.core.config.ApiKeyData;
+import com.epam.aidial.core.function.BaseFunction;
+import com.epam.aidial.core.function.CollectAttachmentsFn;
+import com.epam.aidial.core.function.enhancement.ApplyDefaultDeploymentSettingsFn;
+import com.epam.aidial.core.function.enhancement.EnhanceAssistantRequestFn;
+import com.epam.aidial.core.function.enhancement.EnhanceModelRequestFn;
+import com.epam.aidial.core.util.BufferingReadStream;
+import com.epam.aidial.core.util.HttpStatus;
+import com.epam.aidial.core.util.ProxyUtil;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import io.netty.buffer.ByteBufInputStream;
+import io.vertx.core.Future;
+import io.vertx.core.MultiMap;
+import io.vertx.core.buffer.Buffer;
+import io.vertx.core.http.HttpClientRequest;
+import io.vertx.core.http.HttpClientResponse;
+import io.vertx.core.http.HttpHeaders;
+import io.vertx.core.http.HttpServerRequest;
+import io.vertx.core.http.HttpServerResponse;
+import io.vertx.core.http.RequestOptions;
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+
+@Slf4j
+public class InterceptorController {
+
+    private final Proxy proxy;
+    private final ProxyContext context;
+
+    private final List<BaseFunction<ObjectNode>> enhancementFunctions;
+
+    public InterceptorController(Proxy proxy, ProxyContext context) {
+        this.proxy = proxy;
+        this.context = context;
+        this.enhancementFunctions = List.of(new CollectAttachmentsFn(proxy, context));
+    }
+
+    public Future<?> handle() {
+        log.info("Received request from client. Trace: {}. Span: {}. Key: {}. Deployment: {}. Headers: {}",
+                context.getTraceId(), context.getSpanId(),
+                context.getProject(), context.getDeployment().getName(),
+                context.getRequest().headers().size());
+
+        context.getRequest().body()
+                .onSuccess(body -> proxy.getVertx().executeBlocking(() -> {
+                    handleRequestBody(body);
+                    return null;
+                }, false).onFailure(this::handleError))
+                .onFailure(this::handleRequestBodyError);
+        return Future.succeededFuture();
+    }
+
+    private void handleError(Throwable error) {
+        log.error("Can't handle request. Key: {}. User sub: {}. Trace: {}. Span: {}. Error: {}",
+                context.getProject(), context.getUserSub(), context.getTraceId(), context.getSpanId(), error.getMessage());
+        respond(HttpStatus.INTERNAL_SERVER_ERROR);
+    }
+
+    private void handleRequestBody(Buffer requestBody) {
+        context.setRequestBody(requestBody);
+        context.setRequestBodyTimestamp(System.currentTimeMillis());
+        try (InputStream stream = new ByteBufInputStream(requestBody.getByteBuf())) {
+            ObjectNode tree = (ObjectNode) ProxyUtil.MAPPER.readTree(stream);
+            Throwable error = ProxyUtil.processChain(tree, enhancementFunctions);
+            if (error != null) {
+                finalizeRequest();
+                return;
+            }
+        } catch (IOException e) {
+            respond(HttpStatus.BAD_REQUEST);
+            log.warn("Can't parse JSON request body. Trace: {}. Span: {}. Error:",
+                    context.getTraceId(), context.getSpanId(), e);
+            return;
+        }
+        sendRequest();
+    }
+
+    private void sendRequest() {
+        String uri = context.getDeployment().getEndpoint();
+        RequestOptions options = new RequestOptions()
+                .setAbsoluteURI(uri)
+                .setMethod(context.getRequest().method());
+
+        proxy.getClient().request(options)
+                .onSuccess(this::handleProxyRequest)
+                .onFailure(this::handleProxyConnectionError);
+    }
+
+    private void handleRequestBodyError(Throwable error) {
+        log.warn("Failed to receive client body. Trace: {}. Span: {}. Error: {}",
+                context.getTraceId(), context.getSpanId(), error.getMessage());
+
+        respond(HttpStatus.UNPROCESSABLE_ENTITY, "Failed to receive body");
+    }
+
+    /**
+     * Called when proxy failed to connect to the origin.
+     */
+    private void handleProxyConnectionError(Throwable error) {
+        log.warn("Can't connect to origin. Trace: {}. Span: {}. Key: {}. Deployment: {}. Address: {}. Error: {}",
+                context.getTraceId(), context.getSpanId(),
+                context.getProject(), context.getDeployment().getName(),
+                context.getDeployment().getEndpoint(), error.getMessage());
+
+        respond(HttpStatus.BAD_GATEWAY, "Failed to connect to origin");
+    }
+
+
+    void handleProxyRequest(HttpClientRequest proxyRequest) {
+        log.info("Connected to interceptor. Trace: {}. Span: {}. Key: {}. Deployment: {}. Address: {}",
+                context.getTraceId(), context.getSpanId(),
+                context.getProject(), context.getDeployment().getName(),
+                proxyRequest.connection().remoteAddress());
+
+        HttpServerRequest request = context.getRequest();
+        context.setProxyRequest(proxyRequest);
+        context.setProxyConnectTimestamp(System.currentTimeMillis());
+
+        MultiMap excludeHeaders = MultiMap.caseInsensitiveMultiMap();
+        if (!context.getDeployment().isForwardAuthToken()) {
+            excludeHeaders.add(HttpHeaders.AUTHORIZATION, "whatever");
+        }
+
+        ProxyUtil.copyHeaders(request.headers(), proxyRequest.headers(), excludeHeaders);
+
+        ApiKeyData proxyApiKeyData = context.getProxyApiKeyData();
+        proxyRequest.headers().add(Proxy.HEADER_API_KEY, proxyApiKeyData.getPerRequestKey());
+
+
+        Buffer requestBody = context.getRequestBody();
+        proxyRequest.putHeader(HttpHeaders.CONTENT_LENGTH, Integer.toString(requestBody.length()));
+
+        proxyRequest.send(requestBody)
+                .onSuccess(this::handleProxyResponse)
+                .onFailure(this::handleProxyResponseError);
+    }
+
+    /**
+     * Called when proxy failed to receive response header from origin.
+     */
+    private void handleProxyResponseError(Throwable error) {
+        log.warn("Proxy failed to receive response header from origin. Trace: {}. Span: {}. Key: {}. Deployment: {}. Address: {}. Error:",
+                context.getTraceId(), context.getSpanId(),
+                context.getProject(), context.getDeployment().getName(),
+                context.getProxyRequest().connection().remoteAddress(),
+                error);
+    }
+
+    private void handleProxyResponse(HttpClientResponse proxyResponse) {
+        log.info("Received header from origin. Trace: {}. Span: {}. Key: {}. Deployment: {}. Endpoint: {}. Status: {}. Headers: {}",
+                context.getTraceId(), context.getSpanId(),
+                context.getProject(), context.getDeployment().getName(),
+                context.getDeployment().getEndpoint(),
+                proxyResponse.statusCode(), proxyResponse.headers().size());
+
+        BufferingReadStream responseStream = new BufferingReadStream(proxyResponse,
+                ProxyUtil.contentLength(proxyResponse, 1024));
+
+        context.setProxyResponse(proxyResponse);
+        context.setProxyResponseTimestamp(System.currentTimeMillis());
+        context.setResponseStream(responseStream);
+
+        HttpServerResponse response = context.getResponse();
+
+        response.setChunked(true);
+        response.setStatusCode(proxyResponse.statusCode());
+
+        ProxyUtil.copyHeaders(proxyResponse.headers(), response.headers());
+
+        responseStream.pipe()
+                .endOnFailure(false)
+                .to(response)
+                .onSuccess(ignore -> finalizeRequest())
+                .onFailure(this::handleResponseError);
+    }
+
+    /**
+     * Called when proxy failed to send response to the client.
+     */
+    private void handleResponseError(Throwable error) {
+        log.warn("Can't send response to client. Trace: {}. Span: {}. Error:",
+                context.getTraceId(), context.getSpanId(), error);
+
+        context.getProxyRequest().reset(); // drop connection to stop origin response
+        context.getResponse().reset();     // drop connection, so that partial client response won't seem complete
+        finalizeRequest();
+    }
+
+    private void respond(HttpStatus status) {
+        finalizeRequest();
+        context.respond(status);
+    }
+
+    private void respond(HttpStatus status, Object result) {
+        finalizeRequest();
+        context.respond(status, result);
+    }
+
+    private void finalizeRequest() {
+        ApiKeyData proxyApiKeyData = context.getProxyApiKeyData();
+        if (proxyApiKeyData != null) {
+            proxy.getApiKeyStore().invalidatePerRequestApiKey(proxyApiKeyData)
+                    .onSuccess(invalidated -> {
+                        if (!invalidated) {
+                            log.warn("Per request is not removed: {}", proxyApiKeyData.getPerRequestKey());
+                        }
+                    }).onFailure(error -> log.error("error occurred on invalidating per-request key", error));
+        }
+    }
+
+}
diff --git a/src/test/java/com/epam/aidial/core/CustomApplicationApiTest.java b/src/test/java/com/epam/aidial/core/CustomApplicationApiTest.java
index b5cdda362..f8ef2bf73 100644
--- a/src/test/java/com/epam/aidial/core/CustomApplicationApiTest.java
+++ b/src/test/java/com/epam/aidial/core/CustomApplicationApiTest.java
@@ -43,7 +43,8 @@ void testApplicationCreation() {
                 "icon_url":"http://application1/icon.svg",
                 "description":"My Custom Application Description",
                 "forward_auth_token":false,
-                "defaults": {}
+                "defaults": {},
+                "interceptors": []
                 }
                 """);
 
@@ -175,7 +176,8 @@ void testApplicationDeletion() {
                  "rate_endpoint": "http://application1/rate",
                  "configuration_endpoint": "http://application1/configuration"
                  },
-                "defaults": {}
+                "defaults": {},
+                "interceptors": []
                 }
                 """);