feat: Streaming azure openai (#244)

* debug code * Make streaming work * fix: remove await * fix: await again * small changes * chore: add missing javadoc * wip * feat: pipe streams * feat: wrap chunk to see usage and finish reason * refactor: pipe streams * refactor * refactor: change streamString to streamContent * fix: lint * refactor * feat: demo streaming in sample-code * fix: end res in sample code when finish * fix: lint * refactor * fix: check public-api * chore: add tests for stream chunk response * fix: Changes from lint * fix: chunk type inference * refactor: change some types * wip * fix: internal.js.map issue * chore: add tests for chat completion stream * refactor: move stream files * fix: remove duplicated file * refactor: rename stream * refactor: openai stream * chore: add tests for sse-stream (copied from openai) * refactor: rename test responses * refactor: replace streamContent with a method * feat: support multiple choices * fix: Changes from lint * fix: add abortcontroler and fix sample code * fix: add controller signal to axios * fix: Changes from lint * chore: add unit test for stream() * fix: Changes from lint * fix: stream finish reason index 0 * lint * fix: type test * fix: make toContentStream return AzureOpenAiChatCompletionStream * fix: lint * feat: throw if sse payload invalid * fix: Changes from lint * refactor: interface * refactor * chore: add changeset * chore: improve sample code for streaming * fix: Changes from lint * docs * refactor: get by index * fix: lint * chore: small changes * fix: Changes from lint * fix: type of finish reason * Update packages/foundation-models/src/azure-openai/stream/line-decoder.ts Co-authored-by: KavithaSiva <[email protected]> * Update .changeset/seven-chairs-change.md Co-authored-by: KavithaSiva <[email protected]> * Update packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts * fix: get token usage return type * chore: comment finish reasons map * refactor: code review * fix: ignore _ in eslint * Update packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts Co-authored-by: Tom Frenken <[email protected]> --------- Co-authored-by: cloud-sdk-js <[email protected]> Co-authored-by: Junjie Tang <[email protected]> Co-authored-by: KavithaSiva <[email protected]> Co-authored-by: Tom Frenken <[email protected]>
SAP · Nov 18, 2024 · 71b1a52 · 71b1a52
1 parent b900c46
commit 71b1a52
Show file tree

Hide file tree

Showing 25 changed files with 1,501 additions and 6 deletions.
diff --git a/.changeset/seven-chairs-change.md b/.changeset/seven-chairs-change.md
@@ -0,0 +1,5 @@
+---
+'@sap-ai-sdk/foundation-models': minor
+---
+
+[New Functionality] Support streaming chat completion in the Azure OpenAI client in `foundation-models`.
diff --git a/eslint.config.js b/eslint.config.js
@@ -23,6 +23,12 @@ export default [
         {
           noUselessIndex: false
         }
+      ],
+      '@typescript-eslint/no-unused-vars': [
+        'error',
+        {
+          'varsIgnorePattern': '^_'
+        }
       ]
     }
   },

diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts
@@ -3,11 +3,12 @@ import {
   mockClientCredentialsGrantCall,
   mockDeploymentsList,
   mockInference,
+  parseFileToString,
   parseMockResponse
 } from '../../../../test-util/mock-http.js';
 import { AzureOpenAiChatClient } from './azure-openai-chat-client.js';
 import { apiVersion } from './model-types.js';
-import type { AzureOpenAiCreateChatCompletionResponse } from './client/inference/schema';
+import type { AzureOpenAiCreateChatCompletionResponse } from './client/inference/schema/index.js';
 
 describe('Azure OpenAI chat client', () => {
   const chatCompletionEndpoint = {
@@ -159,4 +160,46 @@ describe('Azure OpenAI chat client', () => {
     const response = await clientWithResourceGroup.run(prompt);
     expect(response.data).toEqual(mockResponse);
   });
+
+  it('executes a streaming request with correct chunk response', async () => {
+    const prompt = {
+      messages: [
+        {
+          role: 'user' as const,
+          content: 'Where is the deepest place on earth located'
+        }
+      ],
+      stream: true,
+      stream_options: {
+        include_usage: true
+      }
+    };
+
+    const mockResponse = await parseFileToString(
+      'foundation-models',
+      'azure-openai-chat-completion-stream-chunks.txt'
+    );
+
+    mockInference(
+      {
+        data: prompt
+      },
+      {
+        data: mockResponse,
+        status: 200
+      },
+      chatCompletionEndpoint
+    );
+
+    const initialResponse = await parseFileToString(
+      'foundation-models',
+      'azure-openai-chat-completion-stream-chunk-response-initial.json'
+    );
+
+    const response = await client.stream(prompt);
+    for await (const chunk of response.stream) {
+      expect(JSON.stringify(chunk.data)).toEqual(initialResponse);
+      break;
+    }
+  });
 });
diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts
@@ -6,6 +6,10 @@ import {
 } from '@sap-ai-sdk/ai-api/internal.js';
 import { apiVersion, type AzureOpenAiChatModel } from './model-types.js';
 import { AzureOpenAiChatCompletionResponse } from './azure-openai-chat-completion-response.js';
+import { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js';
+import { AzureOpenAiChatCompletionStream } from './azure-openai-chat-completion-stream.js';
+import type { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js';
+import type { HttpResponse } from '@sap-cloud-sdk/http-client';
 import type { AzureOpenAiCreateChatCompletionRequest } from './client/inference/schema/index.js';
 
 /**
@@ -28,12 +32,43 @@ export class AzureOpenAiChatClient {
     data: AzureOpenAiCreateChatCompletionRequest,
     requestConfig?: CustomRequestConfig
   ): Promise<AzureOpenAiChatCompletionResponse> {
+    const response = await this.executeRequest(data, requestConfig);
+    return new AzureOpenAiChatCompletionResponse(response);
+  }
+
+  /**
+   * Creates a completion stream for the chat messages.
+   * @param data - The input parameters for the chat completion.
+   * @param controller - The abort controller.
+   * @param requestConfig - The request configuration.
+   * @returns A response containing the chat completion stream.
+   */
+  async stream(
+    data: AzureOpenAiCreateChatCompletionRequest,
+    controller = new AbortController(),
+    requestConfig?: CustomRequestConfig
+  ): Promise<
+    AzureOpenAiChatCompletionStreamResponse<AzureOpenAiChatCompletionStreamChunkResponse>
+  > {
+    const response =
+      new AzureOpenAiChatCompletionStreamResponse<AzureOpenAiChatCompletionStreamChunkResponse>();
+    response.stream = (await this.createStream(data, controller, requestConfig))
+      ._pipe(AzureOpenAiChatCompletionStream._processChunk)
+      ._pipe(AzureOpenAiChatCompletionStream._processFinishReason, response)
+      ._pipe(AzureOpenAiChatCompletionStream._processTokenUsage, response);
+    return response;
+  }
+
+  private async executeRequest(
+    data: AzureOpenAiCreateChatCompletionRequest,
+    requestConfig?: CustomRequestConfig
+  ): Promise<HttpResponse> {
     const deploymentId = await getDeploymentId(
       this.modelDeployment,
       'azure-openai'
     );
     const resourceGroup = getResourceGroup(this.modelDeployment);
-    const response = await executeRequest(
+    return executeRequest(
       {
         url: `/inference/deployments/${deploymentId}/chat/completions`,
         apiVersion,
@@ -42,6 +77,27 @@ export class AzureOpenAiChatClient {
       data,
       requestConfig
     );
-    return new AzureOpenAiChatCompletionResponse(response);
+  }
+
+  private async createStream(
+    data: AzureOpenAiCreateChatCompletionRequest,
+    controller: AbortController,
+    requestConfig?: CustomRequestConfig
+  ): Promise<AzureOpenAiChatCompletionStream<any>> {
+    const response = await this.executeRequest(
+      {
+        ...data,
+        stream: true,
+        stream_options: {
+          include_usage: true
+        }
+      },
+      {
+        ...requestConfig,
+        responseType: 'stream',
+        signal: controller.signal
+      }
+    );
+    return AzureOpenAiChatCompletionStream._create(response, controller);
   }
 }
diff --git a/...dation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts b/...dation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts
@@ -0,0 +1,77 @@
+import { parseMockResponse } from '../../../../test-util/mock-http.js';
+import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js';
+
+describe('OpenAI chat completion stream chunk response', () => {
+  let mockResponses: {
+    tokenUsageResponse: any;
+    finishReasonResponse: any;
+    deltaContentResponse: any;
+  };
+  let azureOpenAiChatCompletionStreamChunkResponses: {
+    tokenUsageResponse: AzureOpenAiChatCompletionStreamChunkResponse;
+    finishReasonResponse: AzureOpenAiChatCompletionStreamChunkResponse;
+    deltaContentResponse: AzureOpenAiChatCompletionStreamChunkResponse;
+  };
+
+  beforeAll(async () => {
+    mockResponses = {
+      tokenUsageResponse: await parseMockResponse<any>(
+        'foundation-models',
+        'azure-openai-chat-completion-stream-chunk-response-token-usage.json'
+      ),
+      finishReasonResponse: await parseMockResponse<any>(
+        'foundation-models',
+        'azure-openai-chat-completion-stream-chunk-response-finish-reason.json'
+      ),
+      deltaContentResponse: await parseMockResponse<any>(
+        'foundation-models',
+        'azure-openai-chat-completion-stream-chunk-response-delta-content.json'
+      )
+    };
+    azureOpenAiChatCompletionStreamChunkResponses = {
+      tokenUsageResponse: new AzureOpenAiChatCompletionStreamChunkResponse(
+        mockResponses.tokenUsageResponse
+      ),
+      finishReasonResponse: new AzureOpenAiChatCompletionStreamChunkResponse(
+        mockResponses.finishReasonResponse
+      ),
+      deltaContentResponse: new AzureOpenAiChatCompletionStreamChunkResponse(
+        mockResponses.deltaContentResponse
+      )
+    };
+  });
+
+  it('should return the chat completion stream chunk response', () => {
+    expect(
+      azureOpenAiChatCompletionStreamChunkResponses.tokenUsageResponse.data
+    ).toStrictEqual(mockResponses.tokenUsageResponse);
+    expect(
+      azureOpenAiChatCompletionStreamChunkResponses.finishReasonResponse.data
+    ).toStrictEqual(mockResponses.finishReasonResponse);
+    expect(
+      azureOpenAiChatCompletionStreamChunkResponses.deltaContentResponse.data
+    ).toStrictEqual(mockResponses.deltaContentResponse);
+  });
+
+  it('should get token usage', () => {
+    expect(
+      azureOpenAiChatCompletionStreamChunkResponses.tokenUsageResponse.getTokenUsage()
+    ).toMatchObject({
+      completion_tokens: expect.any(Number),
+      prompt_tokens: expect.any(Number),
+      total_tokens: expect.any(Number)
+    });
+  });
+
+  it('should return finish reason', () => {
+    expect(
+      azureOpenAiChatCompletionStreamChunkResponses.finishReasonResponse.getFinishReason()
+    ).toBe('stop');
+  });
+
+  it('should return delta content with default index 0', () => {
+    expect(
+      azureOpenAiChatCompletionStreamChunkResponses.deltaContentResponse.getDeltaContent()
+    ).toBe(' is');
+  });
+});
diff --git a/.../foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts b/.../foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts
@@ -0,0 +1,39 @@
+import type { AzureOpenAiCompletionUsage } from './client/inference/schema/index.js';
+
+/**
+ * Azure OpenAI chat completion stream chunk response.
+ */
+export class AzureOpenAiChatCompletionStreamChunkResponse {
+  constructor(public readonly data: any) {
+    // TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable.
+    this.data = data;
+  }
+
+  /**
+   * Usage of tokens in the chunk response.
+   * @returns Token usage.
+   */
+  getTokenUsage(): AzureOpenAiCompletionUsage | null {
+    return this.data.usage;
+  }
+
+  /**
+   * Reason for stopping the completion stream chunk.
+   * @param choiceIndex - The index of the choice to parse.
+   * @returns The finish reason.
+   */
+  getFinishReason(choiceIndex = 0): string | undefined {
+    return this.data.choices.find((c: any) => c.index === choiceIndex)
+      ?.finish_reason;
+  }
+
+  /**
+   * Parses the chunk response and returns the delta content.
+   * @param choiceIndex - The index of the choice to parse.
+   * @returns The message delta content.
+   */
+  getDeltaContent(choiceIndex = 0): string | undefined | null {
+    return this.data.choices.find((c: any) => c.index === choiceIndex)?.delta
+      .content;
+  }
+}
diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts
@@ -0,0 +1,57 @@
+import type { AzureOpenAiCompletionUsage } from './client/inference/schema/index.js';
+import type { AzureOpenAiChatCompletionStream } from './azure-openai-chat-completion-stream.js';
+
+/**
+ * Azure OpenAI chat completion stream response.
+ */
+export class AzureOpenAiChatCompletionStreamResponse<T> {
+  private _usage: AzureOpenAiCompletionUsage | undefined;
+  /**
+   * Finish reasons for all choices.
+   */
+  private _finishReasons: Map<number, string> = new Map();
+  private _stream: AzureOpenAiChatCompletionStream<T> | undefined;
+
+  public getTokenUsage(): AzureOpenAiCompletionUsage | undefined {
+    return this._usage;
+  }
+
+  /**
+   * @internal
+   */
+  _setTokenUsage(usage: AzureOpenAiCompletionUsage): void {
+    this._usage = usage;
+  }
+
+  public getFinishReason(choiceIndex = 0): string | undefined | null {
+    return this._finishReasons.get(choiceIndex);
+  }
+
+  /**
+   * @internal
+   */
+  _getFinishReasons(): Map<number, string> {
+    return this._finishReasons;
+  }
+
+  /**
+   * @internal
+   */
+  _setFinishReasons(finishReasons: Map<number, string>): void {
+    this._finishReasons = finishReasons;
+  }
+
+  get stream(): AzureOpenAiChatCompletionStream<T> {
+    if (!this._stream) {
+      throw new Error('Response stream is undefined.');
+    }
+    return this._stream;
+  }
+
+  /**
+   * @internal
+   */
+  set stream(stream: AzureOpenAiChatCompletionStream<T>) {
+    this._stream = stream;
+  }
+}