Skip to content

Commit

Permalink
Add TTS test, implement server-sent events server
Browse files Browse the repository at this point in the history
  • Loading branch information
vladmaraev committed Sep 7, 2024
1 parent f5b5ed3 commit 677429f
Show file tree
Hide file tree
Showing 12 changed files with 792 additions and 169 deletions.
27 changes: 9 additions & 18 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
"name": "speechstate",
"version": "2.5.0",
"license": "GPL-3.0",
"type": "module",
"homepage": "http://localhost/speechstate",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"files": [
"/dist"
],
"files": ["/dist"],
"dependencies": {
"@davi-ai/web-speech-cognitive-services-davi": "^2.0.8",
"microsoft-cognitiveservices-speech-sdk": "^1.38.0",
Expand All @@ -18,20 +17,14 @@
"dev": "vite",
"compile": "tsc",
"test": "vitest",
"test:browser": "vitest"
"test:browser": "vitest",
"sse": "node test/server"
},
"eslintConfig": {
"extends": [
"react-app",
"react-app/jest"
]
"extends": ["react-app", "react-app/jest"]
},
"browserslist": {
"production": [
">0.2%",
"not dead",
"not op_mini all"
],
"production": [">0.2%", "not dead", "not op_mini all"],
"development": [
"last 1 chrome version",
"last 1 firefox version",
Expand All @@ -45,6 +38,8 @@
"@types/webspeechapi": "^0.0.29",
"@vitest/browser": "^2.0.5",
"buffer": "^5.5.0||^6.0.0",
"cors": "^2.8.5",
"express": "^4.19.2",
"jest": "^29.5.0",
"jest-environment-jsdom": "^29.5.0",
"ts-jest": "^29.1.0",
Expand All @@ -54,11 +49,7 @@
"webdriverio": "^9.0.7",
"ws": "^8.16.0"
},
"trustedDependencies": [
"p-defer-es5",
"edgedriver",
"core-js-pure"
],
"trustedDependencies": ["p-defer-es5", "edgedriver", "core-js-pure"],
"packageManager": "[email protected]",
"description": "* SDK",
"directories": {
Expand Down
3 changes: 2 additions & 1 deletion src/asr.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ export const asrMachine = setup({
wsaGrammarList: SpeechGrammarList,
},
});
console.debug("[ASR] READY", input);
const { azureAuthorizationToken, ...rest } = input;
console.debug("[ASR] READY", rest);
}),
recStart: fromCallback(
({ sendBack, input }: { sendBack: any; input: any }) => {
Expand Down
3 changes: 2 additions & 1 deletion src/dev.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ import {
Settings,
} from "./types";
import { createBrowserInspector } from "@statelyai/inspect";
import { AZURE_KEY } from "./credentials";

const inspector = createBrowserInspector();

const azureSpeechCredentials: AzureSpeechCredentials = {
endpoint:
"https://northeurope.api.cognitive.microsoft.com/sts/v1.0/issuetoken",
key: "",
key: AZURE_KEY,
};

const azureLanguageCredentials: AzureLanguageCredentials = {
Expand Down
123 changes: 43 additions & 80 deletions src/speechstate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,16 +195,7 @@ const speechstate = setup({
},
},
Speaking: {
initial: "Proceed",
entry: [
({ event }) =>
console.debug("[SpSt→TTS] SPEAK", (event as any).value),
({ context, event }) =>
context.ttsRef.send({
type: "SPEAK",
value: (event as any).value,
}),
],
initial: "Start",
on: {
STOP: {
target: "#speechstate.Stopped",
Expand All @@ -216,10 +207,14 @@ const speechstate = setup({
}),
],
},
TTS_STARTED: {
VISEME: {
actions: [
() => console.debug("[TTS→SpSt] TTS_STARTED"),
sendParent({ type: "TTS_STARTED" }),
({ event }) =>
console.debug("[TTS→SpSt] VISEME", event.value),
sendParent(({ event }) => ({
type: "VISEME",
value: event.value,
})),
],
},
SPEAK_COMPLETE: {
Expand All @@ -229,76 +224,44 @@ const speechstate = setup({
sendParent({ type: "SPEAK_COMPLETE" }),
],
},
STREAMING_SET_PERSONA: {
actions: [
() => console.debug("[TTS→SpSt] STREAMING_SET_PERSONA"),
sendParent(({ event }) => ({
type: "STREAMING_SET_PERSONA",
value: event.value,
})),
],
},
},
Speaking: {
initial: "Proceed",
entry: [
({ event }) =>
console.debug("[SpSt→TTS] SPEAK", (event as any).value),
({ context, event }) =>
context.ttsRef.send({
type: "SPEAK",
value: (event as any).value,
}),
],
on: {
STOP: {
target: "#speechstate.Stopped",
actions: [
({}) => console.debug("[SpSt→TTS] STOP"),
({ context }) =>
context.ttsRef.send({
type: "STOP",
}),
],
},
TTS_STARTED: {
actions: [
() => console.debug("[TTS→SpSt] TTS_STARTED"),
sendParent({ type: "TTS_STARTED" }),
],
},
VISEME: {
actions: [
({ event }) =>
console.debug("[TTS→SpSt] VISEME", event.value),
sendParent(({ event }) => ({
type: "VISEME",
value: event.value,
})),
],
},
SPEAK_COMPLETE: {
target: "Idle",
actions: [
() => console.debug("[TTS→SpSt] SPEAK_COMPLETE"),
sendParent({ type: "SPEAK_COMPLETE" }),
],
states: {
Start: {
meta: { view: "idle" },
entry: [
({ event }) =>
console.debug(
"[SpSt→TTS] SPEAK",
(event as any).value,
),
({ context, event }) =>
context.ttsRef.send({
type: "SPEAK",
value: (event as any).value,
}),
],
on: {
TTS_STARTED: {
target: "Proceed",
actions: [
() => console.debug("[TTS→SpSt] TTS_STARTED"),
sendParent({ type: "TTS_STARTED" }),
],
},
},
},
states: {
Proceed: {
meta: { view: "speaking" },
on: {
CONTROL: {
target: "Paused",
actions: [
() => console.debug("[SpSt→TTS] CONTROL"),
({ context }) =>
context.ttsRef.send({
type: "CONTROL",
}),
],
},
Proceed: {
meta: { view: "speaking" },
on: {
CONTROL: {
target: "Paused",
actions: [
() => console.debug("[SpSt→TTS] CONTROL"),
({ context }) =>
context.ttsRef.send({
type: "CONTROL",
}),
],
},
},
},
Expand Down
47 changes: 23 additions & 24 deletions src/tts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,18 @@ import {
} from "xstate";

import {
AzureSpeechCredentials,
Agenda,
TTSInit,
TTSEvent,
TTSContext,
TTSPonyfillInput,
ConstructableSpeechSynthesisUtterance,
} from "./types";

import { getToken } from "./getToken";

import createSpeechSynthesisPonyfill from "@davi-ai/web-speech-cognitive-services-davi";
import type {
SpeechSynthesisUtterance,
SpeechSynthesisEventProps,
} from "@davi-ai/web-speech-cognitive-services-davi";

interface ConstructableSpeechSynthesisUtterance
extends SpeechSynthesisUtterance {
new (s: string);
}
import type { SpeechSynthesisEventProps } from "@davi-ai/web-speech-cognitive-services-davi";

const UTTERANCE_CHUNK_REGEX = /(^.*([!?]+|([.,]+\s)))/;

Expand Down Expand Up @@ -113,7 +105,7 @@ export const ttsMachine = setup({
tts.onvoiceschanged = () => {
const voices = tts.getVoices();
if (voices.length > 0) {
console.debug("[TTS] READY", tts);
console.debug("[TTS] READY");
sendBack({
type: "READY",
value: { wsaTTS: tts, wsaUtt: ttsUtterance },
Expand All @@ -132,6 +124,7 @@ export const ttsMachine = setup({
ttsLexicon: string;
wsaUtt: ConstructableSpeechSynthesisUtterance;
wsaTTS: SpeechSynthesis;
visemes?: boolean;
}
>(({ sendBack, input }) => {
if (["", " "].includes(input.utterance)) {
Expand All @@ -154,15 +147,17 @@ export const ttsMachine = setup({
sendBack({ type: "SPEAK_COMPLETE" });
console.debug("[TTS] SPEAK_COMPLETE");
};
utterance.onviseme = (event: SpeechSynthesisEventProps) => {
const name = event.name;
const fromStart = event.elapsedTime / 1e6;
sendBack({
type: "VISEME",
value: { name: name, frames: [visemeStart, fromStart] },
});
visemeStart = event.elapsedTime / 1e6;
};
if (input.visemes) {
utterance.onviseme = (event: SpeechSynthesisEventProps) => {
const name = event.name;
const fromStart = event.elapsedTime / 1e6;
sendBack({
type: "VISEME",
value: { name: name, frames: [visemeStart, fromStart] },
});
visemeStart = event.elapsedTime / 1e6;
};
}
input.wsaTTS.speak(utterance);
}
}),
Expand Down Expand Up @@ -420,6 +415,7 @@ export const ttsMachine = setup({
wsaTTS: context.wsaTTS,
wsaUtt: context.wsaUtt,
ttsLexicon: context.ttsLexicon,
visemes: context.agenda.visemes,
voice:
context.currentVoice ||
context.agenda.voice ||
Expand Down Expand Up @@ -454,10 +450,12 @@ export const ttsMachine = setup({
actions: sendParent({ type: "TTS_STARTED" }),
},
VISEME: {
actions: sendParent(({ event }) => ({
type: "VISEME",
value: event.value,
})),
actions: sendParent(
({ event }: { event: { type: "VISEME"; value: any } }) => ({
type: "VISEME",
value: event.value,
}),
),
},
SPEAK_COMPLETE: {
target: "Idle",
Expand All @@ -473,6 +471,7 @@ export const ttsMachine = setup({
wsaUtt: context.wsaUtt,
ttsLexicon: context.ttsLexicon,
voice: context.agenda.voice || context.ttsDefaultVoice,
visemes: context.agenda.visemes,
// streamURL: context.agenda.streamURL,
utterance: context.agenda.utterance,
}),
Expand Down
Loading

0 comments on commit 677429f

Please sign in to comment.