diff --git a/nvdaHelper/espeak/sconscript b/nvdaHelper/espeak/sconscript index 18bcd308f6..dceb2a143c 100644 --- a/nvdaHelper/espeak/sconscript +++ b/nvdaHelper/espeak/sconscript @@ -1,3 +1,8 @@ +# A part of NonVisual Desktop Access (NVDA) +# Copyright (C) 2011-2025 NV Access Limited +# This file may be used under the terms of the GNU General Public License, version 2 or later. +# For more details see: https://www.gnu.org/licenses/gpl-2.0.html + import enum import typing import os @@ -1034,10 +1039,13 @@ def espeak_compileDict_buildAction( return ACTION_SUCCESS -sonicLib = env.StaticLibrary( +sonicLib = env.SharedLibrary( target="sonic", srcdir=sonicSrcDir.abspath, - source="sonic.c", + source=[ + "sonic.c", + Dir(".").File("sonic.def"), + ], ) espeakLib = env.SharedLibrary( @@ -1082,7 +1090,6 @@ espeakLib = env.SharedLibrary( "tr_languages.c", "voices.c", "wavegen.c", - sonicLib, # espeak OPT_SPEECHPLAYER block "sPlayer.c", "../speechPlayer/src/frame.cpp", @@ -1100,7 +1107,8 @@ espeakLib = env.SharedLibrary( # com\ttsengine.cpp # We do not use the ASYNC compile option in espeak. ], - LIBS=["advapi32"], + LIBS=["advapi32", "sonic"], + LIBPATH=".", ) @@ -1151,6 +1159,7 @@ for dictFileName, (langCode, inputFiles) in espeakDictionaryCompileList.items(): ) env.Install(synthDriversDir, espeakLib) +env.Install(synthDriversDir, sonicLib) # install espeak-ng-data targetEspeakDataDir = synthDriversDir.Dir("espeak-ng-data") diff --git a/nvdaHelper/espeak/sonic.def b/nvdaHelper/espeak/sonic.def new file mode 100644 index 0000000000..89045cb240 --- /dev/null +++ b/nvdaHelper/espeak/sonic.def @@ -0,0 +1,37 @@ +; A part of NonVisual Desktop Access (NVDA) +; Copyright (C) 2025-2025 NV Access Limited +; This file may be used under the terms of the GNU General Public License, version 2 or later. +; For more details see: https://www.gnu.org/licenses/gpl-2.0.html + +LIBRARY sonic +EXPORTS + sonicCreateStream + sonicDestroyStream + sonicSetUserData + sonicGetUserData + sonicWriteFloatToStream + sonicWriteShortToStream + sonicWriteUnsignedCharToStream + sonicReadFloatFromStream + sonicReadShortFromStream + sonicReadUnsignedCharFromStream + sonicFlushStream + sonicSamplesAvailable + sonicGetSpeed + sonicSetSpeed + sonicGetPitch + sonicSetPitch + sonicGetRate + sonicSetRate + sonicGetVolume + sonicSetVolume + sonicGetChordPitch + sonicSetChordPitch + sonicGetQuality + sonicSetQuality + sonicGetSampleRate + sonicSetSampleRate + sonicGetNumChannels + sonicSetNumChannels + sonicChangeFloatSpeed + sonicChangeShortSpeed diff --git a/source/synthDrivers/_sonic.py b/source/synthDrivers/_sonic.py new file mode 100644 index 0000000000..dbfb9790f1 --- /dev/null +++ b/source/synthDrivers/_sonic.py @@ -0,0 +1,225 @@ +# A part of NonVisual Desktop Access (NVDA) +# Copyright (C) 2025-2025 NV Access Limited +# This file may be used under the terms of the GNU General Public License, version 2 or later. +# For more details see: https://www.gnu.org/licenses/gpl-2.0.html + +from ctypes import CDLL, POINTER, Array, c_float, c_int, c_short, c_ubyte, c_void_p, cdll +import os +from typing import TYPE_CHECKING +import globalVars +from logHandler import log + +if TYPE_CHECKING: + from ctypes import _Pointer + + c_float_p = _Pointer[c_float] + c_short_p = _Pointer[c_short] + c_ubyte_p = _Pointer[c_ubyte] +else: + c_float_p = POINTER(c_float) + c_short_p = POINTER(c_short) + c_ubyte_p = POINTER(c_ubyte) + +sonicLib: CDLL | None = None + + +class SonicStreamP(c_void_p): + pass + + +def initialize(): + """Initialize the Sonic DLL. + The sonic.dll file should be in the synthDrivers directory. + This can be called more than once.""" + global sonicLib + if sonicLib: + return + log.debug("Initializing Sonic library") + sonicLib = cdll.LoadLibrary(os.path.join(globalVars.appDir, "synthDrivers", "sonic.dll")) + sonicLib.sonicCreateStream.restype = SonicStreamP + sonicLib.sonicCreateStream.argtypes = [c_int, c_int] + sonicLib.sonicDestroyStream.restype = None + sonicLib.sonicDestroyStream.argtypes = [SonicStreamP] + sonicLib.sonicWriteFloatToStream.restype = c_int + sonicLib.sonicWriteFloatToStream.argtypes = [SonicStreamP, c_void_p, c_int] + sonicLib.sonicWriteShortToStream.restype = c_int + sonicLib.sonicWriteShortToStream.argtypes = [SonicStreamP, c_void_p, c_int] + sonicLib.sonicWriteUnsignedCharToStream.restype = c_int + sonicLib.sonicWriteUnsignedCharToStream.argtypes = [SonicStreamP, c_void_p, c_int] + sonicLib.sonicReadFloatFromStream.restype = c_int + sonicLib.sonicReadFloatFromStream.argtypes = [SonicStreamP, c_void_p, c_int] + sonicLib.sonicReadShortFromStream.restype = c_int + sonicLib.sonicReadShortFromStream.argtypes = [SonicStreamP, c_void_p, c_int] + sonicLib.sonicReadUnsignedCharFromStream.restype = c_int + sonicLib.sonicReadUnsignedCharFromStream.argtypes = [SonicStreamP, c_void_p, c_int] + sonicLib.sonicFlushStream.restype = c_int + sonicLib.sonicFlushStream.argtypes = [SonicStreamP] + sonicLib.sonicSamplesAvailable.restype = c_int + sonicLib.sonicSamplesAvailable.argtypes = [SonicStreamP] + sonicLib.sonicGetSpeed.restype = c_float + sonicLib.sonicGetSpeed.argtypes = [SonicStreamP] + sonicLib.sonicSetSpeed.restype = None + sonicLib.sonicSetSpeed.argtypes = [SonicStreamP, c_float] + sonicLib.sonicGetPitch.restype = c_float + sonicLib.sonicGetPitch.argtypes = [SonicStreamP] + sonicLib.sonicSetPitch.restype = None + sonicLib.sonicSetPitch.argtypes = [SonicStreamP, c_float] + sonicLib.sonicGetRate.restype = c_float + sonicLib.sonicGetRate.argtypes = [SonicStreamP] + sonicLib.sonicSetRate.restype = None + sonicLib.sonicSetRate.argtypes = [SonicStreamP, c_float] + sonicLib.sonicGetVolume.restype = c_float + sonicLib.sonicGetVolume.argtypes = [SonicStreamP] + sonicLib.sonicSetVolume.restype = None + sonicLib.sonicSetVolume.argtypes = [SonicStreamP, c_float] + sonicLib.sonicGetQuality.restype = c_int + sonicLib.sonicGetQuality.argtypes = [SonicStreamP] + sonicLib.sonicSetQuality.restype = None + sonicLib.sonicSetQuality.argtypes = [SonicStreamP, c_int] + sonicLib.sonicGetSampleRate.restype = c_int + sonicLib.sonicGetSampleRate.argtypes = [SonicStreamP] + sonicLib.sonicSetSampleRate.restype = None + sonicLib.sonicSetSampleRate.argtypes = [SonicStreamP, c_int] + sonicLib.sonicGetNumChannels.restype = c_int + sonicLib.sonicGetNumChannels.argtypes = [SonicStreamP] + sonicLib.sonicSetNumChannels.restype = None + sonicLib.sonicSetNumChannels.argtypes = [SonicStreamP, c_int] + + +class SonicStream: + """ + Audio stream that wraps the Sonic library to process audio, + which is optimised for speeding up speech by high factors. + Audio data are stored internally as 16-bit integers. + """ + + def __init__(self, sampleRate: int, channels: int): + self.stream: SonicStreamP = sonicLib.sonicCreateStream(sampleRate, channels) + if not self.stream: + raise MemoryError() + + def __del__(self): + sonicLib.sonicDestroyStream(self.stream) + + def writeFloat(self, data: c_float_p, numSamples: int) -> None: + """Write 32-bit floating point data to be processed into the stream, + where each sample must be between -1 and 1. + :param data: A pointer to 32-bit floating point wave data. + :param numSamples: The number of samples. + Multiply this by channel count to get the total number of values. + :raises MemoryError: If memory allocation failed.""" + if not sonicLib.sonicWriteFloatToStream(self.stream, data, numSamples): + raise MemoryError() + + def writeShort(self, data: c_short_p, numSamples: int) -> None: + """Write 16-bit integer data to be processed into the stream. + :param data: A pointer to 16-bit integer wave data. + :param numSamples: The number of samples. + Multiply this by channel count to get the total number of values. + :raises MemoryError: If memory allocation failed.""" + if not sonicLib.sonicWriteShortToStream(self.stream, data, numSamples): + raise MemoryError() + + def writeUnsignedChar(self, data: c_ubyte_p, numSamples: int) -> None: + """Write 8-bit unsigned integer data to be processed into the stream. + :param data: A pointer to 8-bit integer wave data. + :param numSamples: The number of samples. + Multiply this by channel count to get the total number of values. + :raises MemoryError: If memory allocation failed.""" + if not sonicLib.sonicWriteUnsignedCharToStream(self.stream, data, numSamples): + raise MemoryError() + + def readFloat(self) -> Array[c_float]: + """Read processed data from the stream as 32-bit floating point data.""" + samples = self.samplesAvailable + arrayLength = samples * self.channels + buffer = (c_float * arrayLength)() + sonicLib.sonicReadFloatFromStream(self.stream, buffer, samples) + return buffer + + def readShort(self) -> Array[c_short]: + """Read processed data from the stream as 16-bit integer data.""" + samples = self.samplesAvailable + arrayLength = samples * self.channels + buffer = (c_short * arrayLength)() + sonicLib.sonicReadShortFromStream(self.stream, buffer, samples) + return buffer + + def readUnsignedChar(self) -> Array[c_ubyte]: + """Read processed data from the stream as 8-bit unsigned integer data.""" + samples = self.samplesAvailable + arrayLength = samples * self.channels + buffer = (c_ubyte * arrayLength)() + sonicLib.sonicReadUnsignedCharFromStream(self.stream, buffer, samples) + return buffer + + def flush(self) -> None: + """Force the sonic stream to generate output using whatever data it currently has. + No extra delay will be added to the output, but flushing in the middle of words could introduce distortion. + This is usually done when data writing is completed. + :raises MemoryError: If memory allocation failed.""" + if not sonicLib.sonicFlushStream(self.stream): + raise MemoryError() + + @property + def samplesAvailable(self) -> int: + return sonicLib.sonicSamplesAvailable(self.stream) + + @property + def speed(self) -> float: + return sonicLib.sonicGetSpeed(self.stream) + + @speed.setter + def speed(self, value: float): + sonicLib.sonicSetSpeed(self.stream, value) + + @property + def pitch(self) -> float: + return sonicLib.sonicGetPitch(self.stream) + + @pitch.setter + def pitch(self, value: float): + sonicLib.sonicSetPitch(self.stream, value) + + @property + def rate(self) -> float: + """This scales pitch and speed at the same time.""" + return sonicLib.sonicGetRate(self.stream) + + @rate.setter + def rate(self, value: float): + sonicLib.sonicSetRate(self.stream, value) + + @property + def volume(self) -> float: + """The scaling factor of the stream.""" + return sonicLib.sonicGetVolume(self.stream) + + @volume.setter + def volume(self, value: float): + sonicLib.sonicSetVolume(self.stream, value) + + @property + def quality(self) -> int: + """Default 0 is virtually as good as 1, but very much faster.""" + return sonicLib.sonicGetQuality(self.stream) + + @quality.setter + def quality(self, value: int): + sonicLib.sonicSetQuality(self.stream, value) + + @property + def sampleRate(self) -> int: + return sonicLib.sonicGetSampleRate(self.stream) + + @sampleRate.setter + def sampleRate(self, value: int): + sonicLib.sonicSetSampleRate(self.stream, value) + + @property + def channels(self) -> int: + return sonicLib.sonicGetNumChannels(self.stream) + + @channels.setter + def channels(self, value: int): + sonicLib.sonicSetNumChannels(self.stream, value) diff --git a/source/synthDrivers/sapi5.py b/source/synthDrivers/sapi5.py index bc70db39cd..7f75210496 100644 --- a/source/synthDrivers/sapi5.py +++ b/source/synthDrivers/sapi5.py @@ -1,6 +1,6 @@ # -*- coding: UTF-8 -*- # A part of NonVisual Desktop Access (NVDA) -# Copyright (C) 2006-2024 NV Access Limited, Peter Vágner, Aleksey Sadovoy +# Copyright (C) 2006-2025 NV Access Limited, Peter Vágner, Aleksey Sadovoy # This file is covered by the GNU General Public License. # See the file COPYING for more details. @@ -32,6 +32,7 @@ PhonemeCommand, SpeechCommand, ) +from ._sonic import SonicStream, initialize as sonicInitialize class SpeechVoiceSpeakFlags(IntEnum): @@ -94,7 +95,9 @@ def ISequentialStream_RemoteWrite( return hresult.E_UNEXPECTED if not synth.isSpeaking: return hresult.E_FAIL - synth.player.feed(pv, cb) + synth.sonicStream.writeShort(pv, cb // 2 // synth.sonicStream.channels) + audioData = synth.sonicStream.readShort() + synth.player.feed(audioData, len(audioData) * 2) if pcbWritten: pcbWritten[0] = cb self._writtenBytes += cb @@ -194,6 +197,10 @@ def Bookmark(self, streamNum: int, pos: int, bookmark: str, bookmarkId: int): def EndStream(self, streamNum: int, pos: int): synth = self.synthRef() + # Flush the stream and get the remaining data. + synth.sonicStream.flush() + audioData = synth.sonicStream.readShort() + synth.player.feed(audioData, len(audioData) * 2) synth.player.idle() # trigger all untriggered bookmarks if streamNum in synth._streamBookmarks: @@ -221,6 +228,7 @@ class SynthDriver(SynthDriver): supportedSettings = ( SynthDriver.VoiceSetting(), SynthDriver.RateSetting(), + SynthDriver.RateBoostSetting(), SynthDriver.PitchSetting(), SynthDriver.VolumeSetting(), ) @@ -257,8 +265,10 @@ def __init__(self, _defaultVoiceToken=None): @type _defaultVoiceToken: ISpeechObjectToken """ self._pitch = 50 + self._rate = 50 self.player = None self.isSpeaking = False + self._rateBoost = False self._initTts(_defaultVoiceToken) # key = stream num, value = deque of bookmarks self._streamBookmarks = dict() # bookmarks in currently speaking streams @@ -293,7 +303,10 @@ def _getVoiceTokens(self): return self.tts.getVoices() def _get_rate(self): - return (self.tts.rate * 5) + 50 + return self._rate + + def _get_rateBoost(self): + return self._rateBoost def _get_pitch(self): return self._pitch @@ -311,11 +324,32 @@ def _get_lastIndex(self): else: return None + @classmethod + def _percentToParam(self, percent, min, max) -> float: + """Overrides SynthDriver._percentToParam to return floating point parameter values.""" + return float(percent) / 100 * (max - min) + min + def _percentToRate(self, percent): return (percent - 50) // 5 def _set_rate(self, rate): - self.tts.Rate = self._percentToRate(rate) + self._rate = rate + if self._rateBoost: + # When rate boost is enabled, use sonicStream to change the speed. + # Supports 0.5x~6x speed. + self.tts.Rate = 0 + self.sonicStream.speed = self._percentToParam(rate, 0.5, 6.0) + else: + # When rate boost is disabled, let the voice itself change the speed. + self.tts.Rate = self._percentToRate(rate) + self.sonicStream.speed = 1 + + def _set_rateBoost(self, enable: bool): + if enable == self._rateBoost: + return + rate = self._rate + self._rateBoost = enable + self.rate = rate def _set_pitch(self, value): # pitch is really controled with xml around speak commands @@ -336,6 +370,11 @@ def _initTts(self, voice=None): self.tts.AudioOutput = self.tts.AudioOutput # Reset the audio and its format parameters fmt = self.tts.AudioOutputStream.Format wfx = fmt.GetWaveFormatEx() + # Force the wave format to be 16-bit integer (which Sonic uses internally). + # SAPI will convert the format for us if it isn't supported by the voice. + wfx.FormatTag = nvwave.WAVE_FORMAT_PCM + wfx.BitsPerSample = 16 + fmt.SetWaveFormatEx(wfx) if self.player: self.player.close() self.player = nvwave.WavePlayer( @@ -350,6 +389,8 @@ def _initTts(self, voice=None): customStream.BaseStream = audioStream customStream.Format = fmt self.tts.AudioOutputStream = customStream + sonicInitialize() + self.sonicStream = SonicStream(wfx.SamplesPerSec, wfx.Channels) # Set event notify sink self.tts.EventInterests = ( diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md index 1d701a8811..30d4755a33 100644 --- a/user_docs/en/changes.md +++ b/user_docs/en/changes.md @@ -28,6 +28,7 @@ To use this feature, "allow NVDA to control the volume of other applications" mu * NVDA can now be configured to speak the current line or paragraph when navigating with braille navigation keys. (#17053, @nvdaes) * In Word, the selection update is now reported when using Word commands to extend or reduce the selection (`f8` or `shift+f8`). (#3293, @CyrilleB79) * In Microsoft Word 16.0.18226 and higher or when using Word object model, NVDA will now report if a heading is collapsed in both speech and braille. (#17499) +* Rate boost is now supported when using Microsoft Speech API version 5 (SAPI5) and Microsoft Speech Platform voices, which supports up to 6X speed. (#17606, @gexgd0419) ### Changes