Skip to content

Commit

Permalink
Support setting voice inside the TTS stream (#8)
Browse files Browse the repository at this point in the history
* Support setting voice inside the TTS stream

This change supports switching voice on the fly according to
STREAMING_SET_VOICE event. This sets a local variable to the event
value. If there is no such value (""), TTS voice falls back to default
voice.

* Provide type for external events (i/o) SpeechState

* version 2.4.0
  • Loading branch information
vladmaraev authored Aug 16, 2024
1 parent e268656 commit be7eed4
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 28 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "speechstate",
"version": "2.3.0",
"version": "2.4.0",
"license": "GPL-3.0",
"homepage": "http://localhost/speechstate",
"main": "./dist/index.js",
Expand Down
2 changes: 2 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ export {
Agenda,
RecogniseParameters,
} from "./types";

export type { SpeechStateExternalEvent } from "./types";
28 changes: 2 additions & 26 deletions src/speechstate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,44 +8,20 @@ import {
import { ttsMachine } from "./tts";
import { asrMachine } from "./asr";

import { Settings, Agenda, Hypothesis, RecogniseParameters } from "./types";
import { Settings, SpeechStateEvent } from "./types";
interface SSContext {
settings: Settings;
audioContext?: AudioContext;
asrRef?: any;
ttsRef?: any;
}

/** events sent to the spawned `speechstate` machine **/
type SSEventExtIn =
| { type: "PREPARE" }
| { type: "CONTROL" }
| { type: "STOP" }
| { type: "SPEAK"; value: Agenda }
| { type: "LISTEN"; value: RecogniseParameters };

/** for sendParent, not type-checked */
type SSEventExtOut =
| { type: "ASR_NOINPUT" }
| { type: "ASRTTS_READY" }
| { type: "ASR_STARTED" }
| { type: "TTS_STARTED" }
| { type: "SPEAK_COMPLETE" }
| { type: "RECOGNISED"; value: Hypothesis[]; nluValue?: any };

type SSEventIntIn =
| { type: "TTS_READY" }
| { type: "ASR_READY" }
| { type: "TTS_ERROR" };

type SSEvent = SSEventIntIn | SSEventExtIn | SSEventExtOut;

const speechstate = createMachine(
{
types: {} as {
input: Settings;
context: SSContext;
events: SSEvent;
events: SpeechStateEvent;
},
context: ({ input }) => ({
settings: input,
Expand Down
26 changes: 25 additions & 1 deletion src/tts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ interface TTSContext extends TTSInit {
wsaUtt?: MySpeechSynthesisUtterance;
agenda?: Agenda;
buffer?: string;
currentVoice?: string;
utteranceFromStream?: string;
}

Expand All @@ -48,6 +49,7 @@ type TTSEvent =
| { type: "SPEAK"; value: Agenda }
| { type: "TTS_STARTED" }
| { type: "STREAMING_CHUNK"; value: string }
| { type: "STREAMING_SET_VOICE"; value: string }
| { type: "STREAMING_DONE" }
| { type: "SPEAK_COMPLETE" };

Expand All @@ -72,6 +74,17 @@ export const ttsMachine = setup({
context.buffer.substring(spaceIndex),
};
}),
assignCurrentVoice: assign(
({
event,
}: {
event: { type: "STREAMING_SET_VOICE"; value: string };
}) => {
return {
currentVoice: event.value,
};
},
),
},
actors: {
getToken: getToken,
Expand All @@ -90,6 +103,10 @@ export const ttsMachine = setup({
console.log("received streaming chunk:", event);
sendBack({ type: "STREAMING_CHUNK", value: event.data });
});
eventSource.addEventListener("STREAMING_SET_VOICE", (event) => {
console.log("received streaming voice set command:", event);
sendBack({ type: "STREAMING_SET_VOICE", value: event.data });
});
},
),
ponyfill: fromCallback<null, TTSPonyfillInput>(({ sendBack, input }) => {
Expand Down Expand Up @@ -228,6 +245,11 @@ export const ttsMachine = setup({
states: {
Buffer: {
initial: "BufferIdle",
on: {
STREAMING_SET_VOICE: {
actions: "assignCurrentVoice",
},
},
states: {
BufferIdle: {
id: "BufferIdle",
Expand Down Expand Up @@ -389,7 +411,9 @@ export const ttsMachine = setup({
wsaUtt: context.wsaUtt,
ttsLexicon: context.ttsLexicon,
voice:
context.agenda.voice || context.ttsDefaultVoice,
context.currentVoice ||
context.agenda.voice ||
context.ttsDefaultVoice,
utterance: context.utteranceFromStream,
}),
},
Expand Down
25 changes: 25 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,28 @@ export interface RecogniseParameters {
hints?: string[];
nlu?: boolean | AzureLanguageCredentials;
}

/** events sent to the spawned `speechstate` machine **/
type SSEventExtIn =
| { type: "PREPARE" }
| { type: "CONTROL" }
| { type: "STOP" }
| { type: "SPEAK"; value: Agenda }
| { type: "LISTEN"; value: RecogniseParameters };

/** for sendParent, not type-checked */
type SSEventExtOut =
| { type: "ASR_NOINPUT" }
| { type: "ASRTTS_READY" }
| { type: "ASR_STARTED" }
| { type: "TTS_STARTED" }
| { type: "SPEAK_COMPLETE" }
| { type: "RECOGNISED"; value: Hypothesis[]; nluValue?: any };

type SSEventIntIn =
| { type: "TTS_READY" }
| { type: "ASR_READY" }
| { type: "TTS_ERROR" };

export type SpeechStateExternalEvent = SSEventExtIn | SSEventExtOut;
export type SpeechStateEvent = SSEventIntIn | SpeechStateExternalEvent;

0 comments on commit be7eed4

Please sign in to comment.