Skip to content

Commit

Permalink
feat: Streaming azure openai (#244)
Browse files Browse the repository at this point in the history
* debug code

* Make streaming work

* fix: remove await

* fix: await again

* small changes

* chore: add missing javadoc

* wip

* feat: pipe streams

* feat: wrap chunk to see usage and finish reason

* refactor: pipe streams

* refactor

* refactor: change streamString to streamContent

* fix: lint

* refactor

* feat: demo streaming in sample-code

* fix: end res in sample code when finish

* fix: lint

* refactor

* fix: check public-api

* chore: add tests for stream chunk response

* fix: Changes from lint

* fix: chunk type inference

* refactor: change some types

* wip

* fix: internal.js.map issue

* chore: add tests for chat completion stream

* refactor: move stream files

* fix: remove duplicated file

* refactor: rename stream

* refactor: openai stream

* chore: add tests for sse-stream (copied from openai)

* refactor: rename test responses

* refactor: replace streamContent with a method

* feat: support multiple choices

* fix: Changes from lint

* fix: add abortcontroler and fix sample code

* fix: add controller signal to axios

* fix: Changes from lint

* chore: add unit test for stream()

* fix: Changes from lint

* fix: stream finish reason index 0

* lint

* fix: type test

* fix: make toContentStream return AzureOpenAiChatCompletionStream

* fix: lint

* feat: throw if sse payload invalid

* fix: Changes from lint

* refactor: interface

* refactor

* chore: add changeset

* chore: improve sample code for streaming

* fix: Changes from lint

* docs

* refactor: get by index

* fix: lint

* chore: small changes

* fix: Changes from lint

* fix: type of finish reason

* Update packages/foundation-models/src/azure-openai/stream/line-decoder.ts

Co-authored-by: KavithaSiva <[email protected]>

* Update .changeset/seven-chairs-change.md

Co-authored-by: KavithaSiva <[email protected]>

* Update packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts

* fix: get token usage return type

* chore: comment finish reasons map

* refactor: code review

* fix: ignore _ in eslint

* Update packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts

Co-authored-by: Tom Frenken <[email protected]>

---------

Co-authored-by: cloud-sdk-js <[email protected]>
Co-authored-by: Junjie Tang <[email protected]>
Co-authored-by: KavithaSiva <[email protected]>
Co-authored-by: Tom Frenken <[email protected]>
  • Loading branch information
5 people authored Nov 18, 2024
1 parent b900c46 commit 71b1a52
Show file tree
Hide file tree
Showing 25 changed files with 1,501 additions and 6 deletions.
5 changes: 5 additions & 0 deletions .changeset/seven-chairs-change.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@sap-ai-sdk/foundation-models': minor
---

[New Functionality] Support streaming chat completion in the Azure OpenAI client in `foundation-models`.
6 changes: 6 additions & 0 deletions eslint.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ export default [
{
noUselessIndex: false
}
],
'@typescript-eslint/no-unused-vars': [
'error',
{
'varsIgnorePattern': '^_'
}
]
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ import {
mockClientCredentialsGrantCall,
mockDeploymentsList,
mockInference,
parseFileToString,
parseMockResponse
} from '../../../../test-util/mock-http.js';
import { AzureOpenAiChatClient } from './azure-openai-chat-client.js';
import { apiVersion } from './model-types.js';
import type { AzureOpenAiCreateChatCompletionResponse } from './client/inference/schema';
import type { AzureOpenAiCreateChatCompletionResponse } from './client/inference/schema/index.js';

describe('Azure OpenAI chat client', () => {
const chatCompletionEndpoint = {
Expand Down Expand Up @@ -159,4 +160,46 @@ describe('Azure OpenAI chat client', () => {
const response = await clientWithResourceGroup.run(prompt);
expect(response.data).toEqual(mockResponse);
});

it('executes a streaming request with correct chunk response', async () => {
const prompt = {
messages: [
{
role: 'user' as const,
content: 'Where is the deepest place on earth located'
}
],
stream: true,
stream_options: {
include_usage: true
}
};

const mockResponse = await parseFileToString(
'foundation-models',
'azure-openai-chat-completion-stream-chunks.txt'
);

mockInference(
{
data: prompt
},
{
data: mockResponse,
status: 200
},
chatCompletionEndpoint
);

const initialResponse = await parseFileToString(
'foundation-models',
'azure-openai-chat-completion-stream-chunk-response-initial.json'
);

const response = await client.stream(prompt);
for await (const chunk of response.stream) {
expect(JSON.stringify(chunk.data)).toEqual(initialResponse);
break;
}
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ import {
} from '@sap-ai-sdk/ai-api/internal.js';
import { apiVersion, type AzureOpenAiChatModel } from './model-types.js';
import { AzureOpenAiChatCompletionResponse } from './azure-openai-chat-completion-response.js';
import { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js';
import { AzureOpenAiChatCompletionStream } from './azure-openai-chat-completion-stream.js';
import type { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js';
import type { HttpResponse } from '@sap-cloud-sdk/http-client';
import type { AzureOpenAiCreateChatCompletionRequest } from './client/inference/schema/index.js';

/**
Expand All @@ -28,12 +32,43 @@ export class AzureOpenAiChatClient {
data: AzureOpenAiCreateChatCompletionRequest,
requestConfig?: CustomRequestConfig
): Promise<AzureOpenAiChatCompletionResponse> {
const response = await this.executeRequest(data, requestConfig);
return new AzureOpenAiChatCompletionResponse(response);
}

/**
* Creates a completion stream for the chat messages.
* @param data - The input parameters for the chat completion.
* @param controller - The abort controller.
* @param requestConfig - The request configuration.
* @returns A response containing the chat completion stream.
*/
async stream(
data: AzureOpenAiCreateChatCompletionRequest,
controller = new AbortController(),
requestConfig?: CustomRequestConfig
): Promise<
AzureOpenAiChatCompletionStreamResponse<AzureOpenAiChatCompletionStreamChunkResponse>
> {
const response =
new AzureOpenAiChatCompletionStreamResponse<AzureOpenAiChatCompletionStreamChunkResponse>();
response.stream = (await this.createStream(data, controller, requestConfig))
._pipe(AzureOpenAiChatCompletionStream._processChunk)
._pipe(AzureOpenAiChatCompletionStream._processFinishReason, response)
._pipe(AzureOpenAiChatCompletionStream._processTokenUsage, response);
return response;
}

private async executeRequest(
data: AzureOpenAiCreateChatCompletionRequest,
requestConfig?: CustomRequestConfig
): Promise<HttpResponse> {
const deploymentId = await getDeploymentId(
this.modelDeployment,
'azure-openai'
);
const resourceGroup = getResourceGroup(this.modelDeployment);
const response = await executeRequest(
return executeRequest(
{
url: `/inference/deployments/${deploymentId}/chat/completions`,
apiVersion,
Expand All @@ -42,6 +77,27 @@ export class AzureOpenAiChatClient {
data,
requestConfig
);
return new AzureOpenAiChatCompletionResponse(response);
}

private async createStream(
data: AzureOpenAiCreateChatCompletionRequest,
controller: AbortController,
requestConfig?: CustomRequestConfig
): Promise<AzureOpenAiChatCompletionStream<any>> {
const response = await this.executeRequest(
{
...data,
stream: true,
stream_options: {
include_usage: true
}
},
{
...requestConfig,
responseType: 'stream',
signal: controller.signal
}
);
return AzureOpenAiChatCompletionStream._create(response, controller);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import { parseMockResponse } from '../../../../test-util/mock-http.js';
import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js';

describe('OpenAI chat completion stream chunk response', () => {
let mockResponses: {
tokenUsageResponse: any;
finishReasonResponse: any;
deltaContentResponse: any;
};
let azureOpenAiChatCompletionStreamChunkResponses: {
tokenUsageResponse: AzureOpenAiChatCompletionStreamChunkResponse;
finishReasonResponse: AzureOpenAiChatCompletionStreamChunkResponse;
deltaContentResponse: AzureOpenAiChatCompletionStreamChunkResponse;
};

beforeAll(async () => {
mockResponses = {
tokenUsageResponse: await parseMockResponse<any>(
'foundation-models',
'azure-openai-chat-completion-stream-chunk-response-token-usage.json'
),
finishReasonResponse: await parseMockResponse<any>(
'foundation-models',
'azure-openai-chat-completion-stream-chunk-response-finish-reason.json'
),
deltaContentResponse: await parseMockResponse<any>(
'foundation-models',
'azure-openai-chat-completion-stream-chunk-response-delta-content.json'
)
};
azureOpenAiChatCompletionStreamChunkResponses = {
tokenUsageResponse: new AzureOpenAiChatCompletionStreamChunkResponse(
mockResponses.tokenUsageResponse
),
finishReasonResponse: new AzureOpenAiChatCompletionStreamChunkResponse(
mockResponses.finishReasonResponse
),
deltaContentResponse: new AzureOpenAiChatCompletionStreamChunkResponse(
mockResponses.deltaContentResponse
)
};
});

it('should return the chat completion stream chunk response', () => {
expect(
azureOpenAiChatCompletionStreamChunkResponses.tokenUsageResponse.data
).toStrictEqual(mockResponses.tokenUsageResponse);
expect(
azureOpenAiChatCompletionStreamChunkResponses.finishReasonResponse.data
).toStrictEqual(mockResponses.finishReasonResponse);
expect(
azureOpenAiChatCompletionStreamChunkResponses.deltaContentResponse.data
).toStrictEqual(mockResponses.deltaContentResponse);
});

it('should get token usage', () => {
expect(
azureOpenAiChatCompletionStreamChunkResponses.tokenUsageResponse.getTokenUsage()
).toMatchObject({
completion_tokens: expect.any(Number),
prompt_tokens: expect.any(Number),
total_tokens: expect.any(Number)
});
});

it('should return finish reason', () => {
expect(
azureOpenAiChatCompletionStreamChunkResponses.finishReasonResponse.getFinishReason()
).toBe('stop');
});

it('should return delta content with default index 0', () => {
expect(
azureOpenAiChatCompletionStreamChunkResponses.deltaContentResponse.getDeltaContent()
).toBe(' is');
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import type { AzureOpenAiCompletionUsage } from './client/inference/schema/index.js';

/**
* Azure OpenAI chat completion stream chunk response.
*/
export class AzureOpenAiChatCompletionStreamChunkResponse {
constructor(public readonly data: any) {
// TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable.
this.data = data;
}

/**
* Usage of tokens in the chunk response.
* @returns Token usage.
*/
getTokenUsage(): AzureOpenAiCompletionUsage | null {
return this.data.usage;
}

/**
* Reason for stopping the completion stream chunk.
* @param choiceIndex - The index of the choice to parse.
* @returns The finish reason.
*/
getFinishReason(choiceIndex = 0): string | undefined {
return this.data.choices.find((c: any) => c.index === choiceIndex)
?.finish_reason;
}

/**
* Parses the chunk response and returns the delta content.
* @param choiceIndex - The index of the choice to parse.
* @returns The message delta content.
*/
getDeltaContent(choiceIndex = 0): string | undefined | null {
return this.data.choices.find((c: any) => c.index === choiceIndex)?.delta
.content;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import type { AzureOpenAiCompletionUsage } from './client/inference/schema/index.js';
import type { AzureOpenAiChatCompletionStream } from './azure-openai-chat-completion-stream.js';

/**
* Azure OpenAI chat completion stream response.
*/
export class AzureOpenAiChatCompletionStreamResponse<T> {
private _usage: AzureOpenAiCompletionUsage | undefined;
/**
* Finish reasons for all choices.
*/
private _finishReasons: Map<number, string> = new Map();
private _stream: AzureOpenAiChatCompletionStream<T> | undefined;

public getTokenUsage(): AzureOpenAiCompletionUsage | undefined {
return this._usage;
}

/**
* @internal
*/
_setTokenUsage(usage: AzureOpenAiCompletionUsage): void {
this._usage = usage;
}

public getFinishReason(choiceIndex = 0): string | undefined | null {
return this._finishReasons.get(choiceIndex);
}

/**
* @internal
*/
_getFinishReasons(): Map<number, string> {
return this._finishReasons;
}

/**
* @internal
*/
_setFinishReasons(finishReasons: Map<number, string>): void {
this._finishReasons = finishReasons;
}

get stream(): AzureOpenAiChatCompletionStream<T> {
if (!this._stream) {
throw new Error('Response stream is undefined.');
}
return this._stream;
}

/**
* @internal
*/
set stream(stream: AzureOpenAiChatCompletionStream<T>) {
this._stream = stream;
}
}
Loading

0 comments on commit 71b1a52

Please sign in to comment.