diff --git a/Content.Client/Entry/EntryPoint.cs b/Content.Client/Entry/EntryPoint.cs index cff8bd326d57..acb98563b913 100644 --- a/Content.Client/Entry/EntryPoint.cs +++ b/Content.Client/Entry/EntryPoint.cs @@ -41,6 +41,7 @@ using Robust.Shared.Replays; using Content.Client.SS220.Discord; using Robust.Shared.Timing; +using Content.Client.SS220.TTS; namespace Content.Client.Entry { @@ -81,6 +82,7 @@ public sealed class EntryPoint : GameClient [Dependency] private readonly DiscordPlayerInfoManager _discordPlayerInfoManager = default!; // SS220 discord info manager [Dependency] private readonly DebugMonitorManager _debugMonitorManager = default!; [Dependency] private readonly TitleWindowManager _titleWindowManager = default!; + [Dependency] private readonly TTSManager _ttsManager = default!; // SS220 TTS public override void Init() { @@ -182,6 +184,7 @@ public override void PostInit() _documentParsingManager.Initialize(); _discordPlayerInfoManager.Initialize(); // SS220 tier info _titleWindowManager.Initialize(); + _ttsManager.Initialize(); // SS220 TTS _baseClient.RunLevelChanged += (_, args) => { diff --git a/Content.Client/IoC/ClientContentIoC.cs b/Content.Client/IoC/ClientContentIoC.cs index b670f9225702..9f909ef17ff3 100644 --- a/Content.Client/IoC/ClientContentIoC.cs +++ b/Content.Client/IoC/ClientContentIoC.cs @@ -28,6 +28,7 @@ using Content.Shared.Chat; using Content.Shared.Players.PlayTimeTracking; using Content.Shared.Players.RateLimiting; +using Content.Client.SS220.TTS; namespace Content.Client.IoC { @@ -67,6 +68,7 @@ public static void Register() collection.Register(); collection.Register(); collection.Register(); + collection.Register(); // SS220 TTS } } } diff --git a/Content.Client/SS220/TTS/TTSManager.cs b/Content.Client/SS220/TTS/TTSManager.cs new file mode 100644 index 000000000000..36adf1f3b010 --- /dev/null +++ b/Content.Client/SS220/TTS/TTSManager.cs @@ -0,0 +1,20 @@ +// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt + +using Content.Shared.SS220.TTS; +using Robust.Shared.Network; + +namespace Content.Client.SS220.TTS; + +public sealed class TTSManager +{ + [Dependency] private readonly IClientNetManager _netManager = default!; + + public event Action? PlayTtsReceived; + public event Action? PlayAnnounceTtsReceived; + + public void Initialize() + { + _netManager.RegisterNetMessage(x => PlayTtsReceived?.Invoke(x)); + _netManager.RegisterNetMessage(x => PlayAnnounceTtsReceived?.Invoke(x)); + } +} diff --git a/Content.Client/SS220/TTS/TTSSystem.Announce.cs b/Content.Client/SS220/TTS/TTSSystem.Announce.cs index 309abae2a3d8..fd3505e42b89 100644 --- a/Content.Client/SS220/TTS/TTSSystem.Announce.cs +++ b/Content.Client/SS220/TTS/TTSSystem.Announce.cs @@ -1,7 +1,7 @@ // © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt using Content.Shared.Corvax.CCCVars; -using Content.Shared.SS220.AnnounceTTS; +using Content.Shared.SS220.TTS; using Robust.Shared.Audio; using Robust.Shared.Utility; @@ -16,30 +16,31 @@ public sealed partial class TTSSystem : EntitySystem private void InitializeAnnounces() { _cfg.OnValueChanged(CCCVars.TTSAnnounceVolume, OnTtsAnnounceVolumeChanged, true); - SubscribeNetworkEvent(OnAnnounceTTSPlay); + _ttsManager.PlayAnnounceTtsReceived += OnAnnounceTtsPlay; } private void ShutdownAnnounces() { _cfg.UnsubValueChanged(CCCVars.TTSAnnounceVolume, OnTtsAnnounceVolumeChanged); + _ttsManager.PlayAnnounceTtsReceived -= OnAnnounceTtsPlay; } - private void OnAnnounceTTSPlay(AnnounceTTSEvent ev) + private void OnAnnounceTtsPlay(MsgPlayAnnounceTts msg) { // Early creation of entities can lead to crashes, so we postpone it as much as possible if (AnnouncementUid == EntityUid.Invalid) AnnouncementUid = Spawn(null); - var volume = AdjustVolume(isRadio: false, isAnounce: true, isWhisper: false); + var volume = AdjustVolume(TtsKind.Announce); var audioParams = AudioParams.Default.WithVolume(volume); // Play announcement sound - var announcementSoundPath = new ResPath(ev.AnnouncementSound); + var announcementSoundPath = new ResPath(msg.AnnouncementSound); PlaySoundQueued(AnnouncementUid, announcementSoundPath, audioParams, true); // Play announcement itself - PlayTTSBytes(ev.Data, AnnouncementUid, audioParams, true); + PlayTtsBytes(msg.Data, AnnouncementUid, audioParams, true); } private void OnTtsAnnounceVolumeChanged(float volume) diff --git a/Content.Client/SS220/TTS/TTSSystem.cs b/Content.Client/SS220/TTS/TTSSystem.cs index f6617378875f..7bc3ab854871 100644 --- a/Content.Client/SS220/TTS/TTSSystem.cs +++ b/Content.Client/SS220/TTS/TTSSystem.cs @@ -3,8 +3,6 @@ using Content.Shared.Corvax.CCCVars; using Content.Shared.SS220.TTS; using Content.Shared.SS220.TTS.Commands; -using Robust.Client.Audio; -using Robust.Client.GameObjects; using Robust.Client.ResourceManagement; using Robust.Shared.Audio; using Robust.Shared.Audio.Components; @@ -26,6 +24,7 @@ public sealed partial class TTSSystem : EntitySystem [Dependency] private readonly IResourceCache _resourceCache = default!; [Dependency] private readonly SharedAudioSystem _audio = default!; [Dependency] private readonly IDependencyCollection _dependencyCollection = default!; + [Dependency] private readonly TTSManager _ttsManager = default!; private ISawmill _sawmill = default!; @@ -62,9 +61,10 @@ public override void Initialize() _cfg.OnValueChanged(CCCVars.TTSVolume, OnTtsVolumeChanged, true); _cfg.OnValueChanged(CCCVars.TTSRadioVolume, OnTtsRadioVolumeChanged, true); - SubscribeNetworkEvent(OnPlayTTS); SubscribeNetworkEvent(OnQueueResetRequest); + _ttsManager.PlayTtsReceived += OnPlayTts; + InitializeAnnounces(); } @@ -77,6 +77,8 @@ public override void Shutdown() // clear virtual files ContentRoot.Clear(); + _ttsManager.PlayTtsReceived -= OnPlayTts; + ShutdownAnnounces(); ResetQueuesAndEndStreams(); } @@ -157,7 +159,7 @@ public override void FrameUpdate(float frameTime) SoundPathSpecifier soundPath; if (request is PlayRequestById requestById) { - tempFilePath = new ResPath($"{requestById.FileIdx}.wav"); + tempFilePath = new ResPath($"{requestById.FileIdx}.ogg"); soundPath = new SoundPathSpecifier(Prefix / tempFilePath.Value, requestById.Params); } else if (request is PlayRequestByPath requestByPath) @@ -229,7 +231,7 @@ private void PlaySoundQueued(EntityUid entity, ResPath sound, AudioParams? audio TryQueueRequest(entity, request); } - private void PlayTTSBytes(byte[] data, EntityUid? sourceUid = null, AudioParams? audioParams = null, bool globally = false) + private void PlayTtsBytes(TtsAudioData data, EntityUid? sourceUid = null, AudioParams? audioParams = null, bool globally = false) { _sawmill.Debug($"Play TTS audio {data.Length} bytes from {sourceUid} entity"); if (data.Length == 0) @@ -237,8 +239,8 @@ private void PlayTTSBytes(byte[] data, EntityUid? sourceUid = null, AudioParams? var finalParams = audioParams ?? AudioParams.Default; - var filePath = new ResPath($"{_fileIdx}.wav"); - ContentRoot.AddOrUpdateFile(filePath, data); + var filePath = new ResPath($"{_fileIdx}.ogg"); + ContentRoot.AddOrUpdateFile(filePath, data.Buffer); // Cache does a funny. // If we have disconnected and reconnected, the Idx will be reset @@ -266,22 +268,26 @@ private void PlayTTSBytes(byte[] data, EntityUid? sourceUid = null, AudioParams? _fileIdx++; } - private void OnPlayTTS(PlayTTSEvent ev) + private void OnPlayTts(MsgPlayTts msg) { - var volume = AdjustVolume(ev.IsRadio, isAnounce: false, ev.IsWhisper); - + var volume = AdjustVolume(msg.Kind); var audioParams = AudioParams.Default.WithVolume(volume); - PlayTTSBytes(ev.Data, GetEntity(ev.SourceUid), audioParams); + PlayTtsBytes(msg.Data, GetEntity(msg.SourceUid), audioParams); } - private float AdjustVolume(bool isRadio, bool isAnounce, bool isWhisper) + private float AdjustVolume(TtsKind kind) { - var volume = isRadio ? _radioVolume : isAnounce ? VolumeAnnounce : _volume; + var volume = kind switch + { + TtsKind.Radio => _radioVolume, + TtsKind.Announce => VolumeAnnounce, + _ => _volume, + }; volume = SharedAudioSystem.GainToVolume(volume); - if (isWhisper) + if (kind == TtsKind.Whisper) { volume -= SharedAudioSystem.GainToVolume(WhisperFade); } diff --git a/Content.Server/SS220/TTS/TTSManager.cs b/Content.Server/SS220/TTS/TTSManager.cs index 5184814e2789..3691ef5a6a29 100644 --- a/Content.Server/SS220/TTS/TTSManager.cs +++ b/Content.Server/SS220/TTS/TTSManager.cs @@ -1,23 +1,29 @@ +// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt + +using System.Buffers; using System.Collections.Concurrent; +using System.Collections.Specialized; +using System.ComponentModel; +using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.IO; using System.Linq; using System.Net; using System.Net.Http; -using System.Net.Http.Json; using System.Text; -using System.Text.Json.Serialization; using System.Threading; using System.Threading.Tasks; +using System.Web; using Content.Shared.Corvax.CCCVars; +using Content.Shared.SS220.TTS; using FFMpegCore; using FFMpegCore.Arguments; using FFMpegCore.Pipes; +using Microsoft.IO; using Prometheus; using Robust.Shared.Configuration; -using System.ComponentModel; -using System.Collections.Specialized; -using System.Web; +using Robust.Shared.Network; +using Robust.Shared.Utility; namespace Content.Server.SS220.TTS; @@ -49,20 +55,21 @@ public sealed class TTSManager "tts_reused_radio_count", "Amount of reused TTS audio from cache."); + private const string AudioFileExtension = "ogg"; + [Dependency] private readonly IConfigurationManager _cfg = default!; + [Dependency] private readonly IServerNetManager _netManager = default!; private readonly HttpClient _httpClient = new(); private ISawmill _sawmill = default!; - private readonly ConcurrentDictionary _cache = new(); - private readonly HashSet _cacheKeysSeq = new(); - private readonly ConcurrentDictionary _cacheRadio = new(); - private readonly HashSet _cacheRadioKeysSeq = new(); + private readonly TtsCache _cache = new(0); + private readonly TtsResponseManager _responseManager = new(); + private readonly RecyclableMemoryStreamManager _memoryStreamPool = new(); - private static readonly ConcurrentDictionary Locks = new(); + private static readonly ConcurrentDictionary ResponsesInProgress = new(); private float _timeout = 1; - private int _maxCachedCount = 200; private string _apiUrl = string.Empty; private string _apiToken = string.Empty; @@ -71,7 +78,7 @@ public void Initialize() _sawmill = Logger.GetSawmill("tts"); _cfg.OnValueChanged(CCCVars.TTSMaxCache, val => { - _maxCachedCount = val; + _cache.Limit = val; ResetCache(); }, true); _cfg.OnValueChanged(CCCVars.TTSRequestTimeout, val => _timeout = val, true); @@ -82,6 +89,9 @@ public void Initialize() _apiToken = v; }, true); + + _netManager.RegisterNetMessage(); + _netManager.RegisterNetMessage(); } /// @@ -89,25 +99,15 @@ public void Initialize() /// /// Identifier of speaker /// SSML formatted text - /// WAV audio bytes or null if failed - public async Task ConvertTextToSpeech(string speaker, string text, bool isRadio , bool isAnnounce) + /// File audio bytes or empty if failed + public async Task.Handle?> ConvertTextToSpeech(string speaker, string text, TtsKind kind) { WantedCount.Inc(); - var cacheKey = GenerateCacheKey(speaker, text, isRadio); - return await ExecuteWithNamedLockAsync(cacheKey, async () => + return await StartTtsRequest(new(speaker, text, kind), + async (request, response) => { - if (_cache.TryGetValue(cacheKey, out var data)) - { - ReusedCount.Inc(); - _sawmill.Debug($"Use cached sound for '{text}' speech by '{speaker}' speaker"); - return data; - } - - if (isRadio) - _sawmill.Debug($"Generate new radio sound for '{text}' speech by '{speaker}' speaker"); - else - _sawmill.Debug($"Generate new audio for '{text}' speech by '{speaker}' speaker"); + _sawmill.Debug($"Generate new sound for '{text}' speech by '{speaker}' speaker with kind '{kind}'"); var reqTime = DateTime.UtcNow; try @@ -117,101 +117,86 @@ public void Initialize() var requestUrl = $"{_apiUrl}" + ToQueryString(new NameValueCollection() { { "speaker", speaker }, { "text", text }, - { "ext", "wav" }}); + { "ext", AudioFileExtension }}); - if (isRadio) + if (kind == TtsKind.Radio) { requestUrl += "&effect=radio"; } - if (isAnnounce) + if (kind == TtsKind.Announce) { requestUrl += "&effect=announce"; } - var response = await _httpClient.GetAsync(requestUrl, cts.Token); - if (!response.IsSuccessStatusCode) + var httpRequest = new HttpRequestMessage(HttpMethod.Get, requestUrl); + var httpResponse = await _httpClient.SendAsync(httpRequest, cts.Token); + if (!httpResponse.IsSuccessStatusCode) { - if (response.StatusCode == HttpStatusCode.TooManyRequests) + if (httpResponse.StatusCode == HttpStatusCode.TooManyRequests) { _sawmill.Warning("TTS request was rate limited"); - return null; + return false; } - _sawmill.Error($"TTS request returned bad status code: {response.StatusCode}"); - return null; + _sawmill.Error($"TTS request returned bad status code: {httpResponse.StatusCode}"); + return false; } - var soundData = await response.Content.ReadAsByteArrayAsync(); + using var memoryStream = _memoryStreamPool.GetStream("TtsStream", 1024 * 64); - _cache.AddOrUpdate(cacheKey, soundData, (_, __) => soundData); - _cacheKeysSeq.Add(cacheKey); - if (_cache.Count > _maxCachedCount) - { - var firstKey = _cacheKeysSeq.First(); - _cache.TryRemove(firstKey, out _); - _cacheKeysSeq.Remove(firstKey); - } + memoryStream.Position = 0; + memoryStream.SetLength(0); - if (isRadio) - _sawmill.Debug($"Generated new radio sound for '{text}' speech by '{speaker}' speaker ({soundData.Length} bytes)"); - else - _sawmill.Debug($"Generated new sound for '{text}' speech by '{speaker}' speaker ({soundData.Length} bytes)"); - RequestTimings.WithLabels("Success").Observe((DateTime.UtcNow - reqTime).TotalSeconds); + await httpResponse.Content.CopyToAsync(memoryStream, cts.Token); + _responseManager.AllocBuffer(response, (int)memoryStream.Length); - return soundData; + memoryStream.Position = 0; + memoryStream.ReadExactly(response.Value.Buffer, 0, response.Value.Length); + + _sawmill.Debug($"Generated new sound for '{text}' speech by '{speaker}' speaker with kind '{kind}' ({response.Value.Length} bytes)"); + RequestTimings.WithLabels("Success").Observe((DateTime.UtcNow - reqTime).TotalSeconds); + return true; } catch (TaskCanceledException) { RequestTimings.WithLabels("Timeout").Observe((DateTime.UtcNow - reqTime).TotalSeconds); _sawmill.Error($"Timeout of request generation new audio for '{text}' speech by '{speaker}' speaker"); - return null; + return false; } catch (Exception e) { RequestTimings.WithLabels("Error").Observe((DateTime.UtcNow - reqTime).TotalSeconds); _sawmill.Error( $"Failed of request generation new sound for '{text}' speech by '{speaker}' speaker\n{e}"); - return null; + return false; } }); } - private static string ToQueryString(NameValueCollection nvc) - { - var array = ( - from key in nvc.AllKeys - from value in nvc.GetValues(key) ?? Array.Empty() - select $"{key}={HttpUtility.UrlEncode(value)}" - ).ToArray(); - - return "?" + string.Join("&", array); - } - - public async Task ConvertTextToSpeechRadio(string speaker, string text) + public async Task.Handle?> ConvertTextToSpeechRadio(string speaker, string text) { WantedRadioCount.Inc(); - var cacheKey = GenerateCacheKey($"radio-{speaker}", text, isRadio: true); - return await ExecuteWithNamedLockAsync(cacheKey, async () => + return await StartTtsRequest(new($"radio-{speaker}", text, TtsKind.Radio), + async (request, response) => { - if (_cacheRadio.TryGetValue(cacheKey, out var cachedSoundData)) - { - ReusedRadioCount.Inc(); - _sawmill.Debug($"Use cached radio sound for '{text}' speech by '{speaker}' speaker"); - return cachedSoundData; - } + using var innerResponse = await ConvertTextToSpeech(speaker, text, TtsKind.Radio); - var soundData = await ConvertTextToSpeech(speaker, text, isRadio: true, isAnnounce: false); - if (soundData == null) - return null; + if (innerResponse is null + || innerResponse.Value.TryGetValue(out var innerBuffer)) + return false; + + using var memoryStream = _memoryStreamPool.GetStream("TtsStream", innerBuffer.Length); + memoryStream.Write(innerBuffer.AsMemory().Span); + memoryStream.Position = 0; var reqTime = DateTime.UtcNow; try { - var outputFilename = Path.GetTempPath() + Guid.NewGuid() + ".wav"; + var outputFilename = $"{Path.GetTempPath()}{Guid.NewGuid()}.{AudioFileExtension}"; await FFMpegArguments - .FromPipeInput(new StreamPipeSource(new MemoryStream(soundData))) + .FromPipeInput(new StreamPipeSource(memoryStream)) .OutputToFile(outputFilename, true, options => options.WithAudioFilters(filterOptions => { @@ -224,26 +209,32 @@ await FFMpegArguments } ) ).ProcessAsynchronously(); - soundData = await File.ReadAllBytesAsync(outputFilename); + + memoryStream.SetLength(0); + memoryStream.Position = 0; + try { - File.Delete(outputFilename); + using var file = new FileStream(outputFilename, FileMode.Open); + await file.CopyToAsync(memoryStream); + _responseManager.AllocBuffer(response, (int)memoryStream.Length); + memoryStream.ReadExact(response.Value.AsMemory().Span); } - catch (Exception _) + catch (Exception) { // ignored } - _cacheRadio.AddOrUpdate(cacheKey, soundData, (_, __) => soundData); - _cacheRadioKeysSeq.Add(cacheKey); - if (_cacheRadio.Count > _maxCachedCount) + try + { + File.Delete(outputFilename); + } + catch (Exception) { - var firstKey = _cacheRadioKeysSeq.First(); - _cacheRadio.TryRemove(firstKey, out _); - _cacheRadioKeysSeq.Remove(firstKey); + // ignored } - return soundData; + return true; } catch (TaskCanceledException) { @@ -252,7 +243,7 @@ await FFMpegArguments $"Timeout of request generation new radio sound for '{text}' speech by '{speaker}' speaker"); throw new Exception("TTS request timeout"); } - catch (Win32Exception e) + catch (Win32Exception) { _sawmill.Error($"FFMpeg is not installed"); throw new Exception("ffmpeg is not installed!"); @@ -270,31 +261,62 @@ await FFMpegArguments public void ResetCache() { _cache.Clear(); - _cacheKeysSeq.Clear(); - _cacheRadio.Clear(); - _cacheRadioKeysSeq.Clear(); } - private static string GenerateCacheKey(string speaker, string text, bool isRadio) + private static string ToQueryString(NameValueCollection nvc) { - var key = $"{speaker}/{text}/{isRadio}"; + var array = ( + from key in nvc.AllKeys + from value in nvc.GetValues(key) ?? Array.Empty() + select $"{key}={HttpUtility.UrlEncode(value)}" + ).ToArray(); + + return "?" + string.Join("&", array); + } + + private static string GenerateCacheKey(string speaker, string text, TtsKind kind) + { + var key = $"{speaker}/{text}/{(int)kind}"; var keyData = Encoding.UTF8.GetBytes(key); var bytes = System.Security.Cryptography.SHA256.HashData(keyData); return Convert.ToHexString(bytes); } - private async Task ExecuteWithNamedLockAsync(string key, Func> function) + private async Task.Handle?> StartTtsRequest(TtsRequest request, Func> core) { - var semaphore = Locks.GetOrAdd(key, new SemaphoreSlim(1, 1)); - await semaphore.WaitAsync(); + if (_cache.TryGet(request.Key, out var data)) + { + ReusedCount.Inc(); + _sawmill.Debug($"Use cached sound for '{request.Text}' speech by '{request.Speaker}' speaker"); + return data.GetHandle(); + } + try { - return await function(); + if (!ResponsesInProgress.TryGetValue(request.Key, out var response) || response.Task is null) + { + response = _responseManager.Rent(); + var task = core(request, response); + response.Task = task; + ResponsesInProgress[request.Key] = response; + } + + var isSuccess = await response.Task; + + if (isSuccess) + { + _cache.Cache(request.Key, response); + + return response.GetHandle(); + } + else + { + return null; + } } finally { - semaphore.Release(); - Locks.TryRemove(key, out _); + ResponsesInProgress.TryRemove(request.Key, out _); } } @@ -320,4 +342,201 @@ public CrusherFilterArgument( public string Value => string.Join(":", _arguments.Select, string>(pair => pair.Key + "=" + pair.Value)); } + + private readonly struct TtsRequest + { + public string Speaker { get; } + public string Text { get; } + public TtsKind Kind { get; } + public string Key { get; } + + public TtsRequest(string speaker, string text, TtsKind kind) : this() + { + Speaker = speaker; + Text = text; + Kind = kind; + Key = GenerateCacheKey(speaker, text, kind); + } + } + + private sealed class TtsCache + { + private readonly ConcurrentDictionary _lookup = new(); + private readonly ConcurrentQueue _keysQueue = new(); + + public int Limit { get; set; } + + public TtsCache(int limit) + { + Limit = limit; + } + + public void Cache(string key, TtsResponse value) + { + var currentCount = _lookup.Count; + while (currentCount > 0 && currentCount + 1 > Limit) + { + if (_keysQueue.TryDequeue(out var firstKey) + && _lookup.TryRemove(firstKey, out var reuseBuffer)) + { + reuseBuffer.GetHandle().Dispose(); + } + currentCount = _lookup.Count; + } + if (Limit != 0) + { + value.GetHandle(); + _lookup[key] = value; + _keysQueue.Enqueue(key); + } + } + + public bool TryGet(string key, [NotNullWhen(true)] out TtsResponse? buffer) + { + if (Limit == 0) + { + buffer = null; + return false; + } + return _lookup.TryGetValue(key, out buffer); + } + + public void Clear() + { + _lookup.Clear(); + _keysQueue.Clear(); + } + } +} + +public sealed class TtsResponseManager +{ + private readonly Stack _responsePool = new(); + private readonly ArrayPool _arrayPool; + + public TtsResponseManager() : this(ArrayPool.Shared) { } + + public TtsResponseManager(ArrayPool arrayPool) + { + _arrayPool = arrayPool; + } + + public TtsResponse Rent() + { + if (!_responsePool.TryPop(out var response)) + { + response = new(this); + } + + return response; + } + + public void Return(TtsResponse response) + { + FreeBuffer(response); + _responsePool.Push(response); + } + + public void AllocBuffer(TtsResponse response, int length) + { + response.Value = new(_arrayPool.Rent(length), length); + } + + public void FreeBuffer(TtsResponse response) + { + if (response.Value.Buffer.Length == 0) + return; + _arrayPool.Return(response.Value.Buffer); + response.Value = new(); + } +} + +public sealed class TtsResponse : ReferenceCounter +{ + public Task? Task { get; set; } + + private readonly TtsResponseManager _manager; + + public TtsResponse(TtsResponseManager manager) : base(new()) + { + _manager = manager; + } + + protected override void OnHandleDisposed() + { + base.OnHandleDisposed(); + if (ReferenceCount == 0) + { + _manager.Return(this); + } + } + + public void Dereference() + { + OnHandleDisposed(); + } +} + +[Virtual] +public class ReferenceCounter +{ + public T Value { get; set; } + public int ReferenceCount => _referenceCount; + + private int _referenceCount = 0; + + public ReferenceCounter(T value) + { + Value = value; + } + + public Handle GetHandle() + { + _referenceCount++; + return new(this); + } + + protected virtual void OnHandleDisposed() + { + _referenceCount--; + } + + public struct Handle : IDisposable + { + private readonly ReferenceCounter _counter; + private bool _isValid; + + public Handle(ReferenceCounter counter) + { + _counter = counter; + _isValid = true; + } + + public void Dispose() + { + if (!_isValid) return; + _isValid = false; + _counter.OnHandleDisposed(); + } + + public Handle GetHandle() + { + return _counter.GetHandle(); + } + + public bool TryGetValue([NotNullWhen(true)] out T value) + { + value = _counter.Value; + return _isValid; + } + } +} + +public static class ReferenceCounterExtensions +{ + public static bool TryGetValue(this ReferenceCounter.Handle? handle, [NotNullWhen(true)] out T? value) + { + value = default; + return handle.HasValue && handle.Value.TryGetValue(out value); + } } diff --git a/Content.Server/SS220/TTS/TTSSystem.cs b/Content.Server/SS220/TTS/TTSSystem.cs index cba7ae9da73f..61034fa4ac30 100644 --- a/Content.Server/SS220/TTS/TTSSystem.cs +++ b/Content.Server/SS220/TTS/TTSSystem.cs @@ -1,3 +1,5 @@ +// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt + using System.Diagnostics.CodeAnalysis; using System.Threading.Tasks; using Content.Server.Chat.Systems; @@ -6,12 +8,12 @@ using Content.Shared.Inventory; using Content.Shared.SS220.TTS; using Content.Shared.GameTicking; -using Content.Shared.SS220.AnnounceTTS; using Robust.Shared.Configuration; using Robust.Shared.Player; using Robust.Shared.Prototypes; using Robust.Shared.Random; -using Serilog; +using Robust.Shared.Network; +using Robust.Server.Player; namespace Content.Server.SS220.TTS; @@ -25,6 +27,8 @@ public sealed partial class TTSSystem : EntitySystem [Dependency] private readonly IRobustRandom _random = default!; [Dependency] private readonly InventorySystem _inventory = default!; [Dependency] private readonly ILogManager _log = default!; + [Dependency] private readonly IServerNetManager _netManager = default!; + [Dependency] private readonly IPlayerManager _playerManager = default!; private ISawmill _sawmill = default!; @@ -120,17 +124,32 @@ private async void OnAnnouncementSpoke(AnnouncementSpokeEvent args) } } - if (!_isEnabled || - args.Message.Length > MaxMessageChars * 2 || - string.IsNullOrWhiteSpace(voice)) + ReferenceCounter.Handle? ttsResponse = default; + + if (_isEnabled + && args.Message.Length <= MaxMessageChars * 2 + && !string.IsNullOrWhiteSpace(voice)) { - RaiseNetworkEvent(new AnnounceTTSEvent([], args.AnnouncementSound, args.AnnouncementSoundParams), args.Source); - return; + ttsResponse = await GenerateTts(args.Message, voice, TtsKind.Announce); + } + + var message = new MsgPlayAnnounceTts + { + AnnouncementSound = args.AnnouncementSound, + AnnouncementParams = args.AnnouncementSoundParams, + }; + + if (ttsResponse.TryGetValue(out var audioData)) + { + message.Data = audioData; } - var soundData = await GenerateTTS(args.Message, voice, isAnnounce: true) ?? []; + foreach (var session in args.Source.Recipients) + { + _netManager.ServerSendMessage(message, session.Channel); + } - RaiseNetworkEvent(new AnnounceTTSEvent(soundData, args.AnnouncementSound, args.AnnouncementSoundParams), args.Source); + ttsResponse?.Dispose(); } private void OnRoundRestartCleanup(RoundRestartCleanupEvent ev) @@ -162,11 +181,11 @@ private async void OnRequestGlobalTTS(RequestGlobalTTSEvent ev, EntitySessionEve !GetVoicePrototype(ev.VoiceId, out var protoVoice)) return; - var soundData = await GenerateTTS(ev.Text, protoVoice.Speaker); - if (soundData is null) + using var ttsResponse = await GenerateTts(ev.Text, protoVoice.Speaker, TtsKind.Default); + if (!ttsResponse.TryGetValue(out var audioData)) return; - RaiseNetworkEvent(new PlayTTSEvent(soundData), Filter.SinglePlayer(args.SenderSession)); + _netManager.ServerSendMessage(new MsgPlayTts { Data = audioData }, args.SenderSession.Channel); } private async void OnEntitySpoke(EntityUid uid, TTSComponent component, EntitySpokeEvent args) @@ -200,27 +219,50 @@ private async void OnEntitySpoke(EntityUid uid, TTSComponent component, EntitySp private async void HandleSay(EntityUid uid, string message, string speaker) { - var soundData = await GenerateTTS(message, speaker); - if (soundData is null) return; - RaiseNetworkEvent(new PlayTTSEvent(soundData, GetNetEntity(uid)), Filter.Pvs(uid)); + using var ttsResponse = await GenerateTts(message, speaker, TtsKind.Default); + if (!ttsResponse.TryGetValue(out var audioData)) return; + var playTtsMessage = new MsgPlayTts + { + Data = audioData, + SourceUid = GetNetEntity(uid), + }; + foreach (var receiver in Filter.Pvs(uid).Recipients) + { + _netManager.ServerSendMessage(playTtsMessage, receiver.Channel); + } } private async void HandleWhisper(EntityUid uid, string message, string obfMessage, string speaker, bool isRadio) { // If it's a whisper into a radio, generate speech without whisper // attributes to prevent an additional speech synthesis event - var soundData = await GenerateTTS(message, speaker, isWhisper: true); - if (soundData is null) + using var ttsResponse = await GenerateTts(message, speaker, TtsKind.Whisper); + if (!ttsResponse.TryGetValue(out var audioData)) return; - var obfSoundData = await GenerateTTS(obfMessage, speaker, isWhisper: true); - if (obfSoundData is null) + using var obfTtsResponse = await GenerateTts(obfMessage, speaker, TtsKind.Whisper); + if (!obfTtsResponse.TryGetValue(out var obfAudioData)) return; // TODO: Check obstacles var xformQuery = GetEntityQuery(); var sourcePos = _xforms.GetWorldPosition(xformQuery.GetComponent(uid), xformQuery); var receptions = Filter.Pvs(uid).Recipients; + + var fullTtsMessage = new MsgPlayTts + { + Data = audioData, + SourceUid = GetNetEntity(uid), + Kind = TtsKind.Whisper, + }; + + var obfuscatedTtsMessage = new MsgPlayTts + { + Data = obfAudioData, + SourceUid = GetNetEntity(uid), + Kind = TtsKind.Whisper, + }; + foreach (var session in receptions) { if (!session.AttachedEntity.HasValue) @@ -232,46 +274,48 @@ private async void HandleWhisper(EntityUid uid, string message, string obfMessag if (distance > ChatSystem.WhisperMuffledRange) continue; - var fullTtsEvent = new PlayTTSEvent( - soundData, - GetNetEntity(uid), - isWhisper: true); + var netMessageToSend = distance > ChatSystem.WhisperClearRange ? obfuscatedTtsMessage : fullTtsMessage; - var obfTtsEvent = new PlayTTSEvent(obfSoundData, GetNetEntity(uid), isWhisper: true); - - RaiseNetworkEvent(distance > ChatSystem.WhisperClearRange ? obfTtsEvent : fullTtsEvent, session); + _netManager.ServerSendMessage(netMessageToSend, session.Channel); } } private async void HandleRadio(RadioEventReceiver[] receivers, string message, string speaker) { - var soundData = await GenerateTTS(message, speaker, false, true); + using var soundData = await GenerateTts(message, speaker, TtsKind.Radio); if (soundData is null) return; foreach (var receiver in receivers) { - RaiseNetworkEvent(new PlayTTSEvent(soundData, GetNetEntity(receiver.PlayTarget.EntityId), true), receiver.Actor); + if (!_playerManager.TryGetSessionByEntity(receiver.Actor, out var session) + || !soundData.TryGetValue(out var audioData)) + continue; + _netManager.ServerSendMessage(new MsgPlayTts + { + Data = audioData, + SourceUid = GetNetEntity(receiver.PlayTarget.EntityId), + Kind = TtsKind.Radio + }, session.Channel); } } - // ReSharper disable once InconsistentNaming - private async Task GenerateTTS(string text, string speaker, bool isWhisper = false, bool isRadio = false, bool isAnnounce = false) + private async Task.Handle?> GenerateTts(string text, string speaker, TtsKind kind) { try { var textSanitized = Sanitize(text); - if (textSanitized == "") return null; + if (textSanitized == "") return default; if (char.IsLetter(textSanitized[^1])) textSanitized += "."; var ssmlTraits = SoundTraits.RateFast; - if (isWhisper) + if (kind == TtsKind.Whisper) ssmlTraits |= SoundTraits.PitchVerylow; var textSsml = ToSsmlText(textSanitized, ssmlTraits); - return await _ttsManager.ConvertTextToSpeech(speaker, textSanitized, isRadio, isAnnounce); + return await _ttsManager.ConvertTextToSpeech(speaker, textSanitized, kind); //return isRadio // ? await _ttsManager.ConvertTextToSpeechRadio(speaker, textSanitized) @@ -283,7 +327,7 @@ private async void HandleRadio(RadioEventReceiver[] receivers, string message, s _sawmill.Error($"TTS System error: {e.Message}"); } - return null; + return default; } } diff --git a/Content.Shared/SS220/AnnounceTTS/AnnounceTTSEvent.cs b/Content.Shared/SS220/AnnounceTTS/AnnounceTTSEvent.cs deleted file mode 100644 index f21431c3fe30..000000000000 --- a/Content.Shared/SS220/AnnounceTTS/AnnounceTTSEvent.cs +++ /dev/null @@ -1,21 +0,0 @@ -// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt - -using Robust.Shared.Audio; -using Robust.Shared.Serialization; - -namespace Content.Shared.SS220.AnnounceTTS; - -[Serializable, NetSerializable] -// ReSharper disable once InconsistentNaming -public sealed class AnnounceTTSEvent : EntityEventArgs -{ - public AnnounceTTSEvent(byte[] data, string announcementSound, AudioParams announcementParams) - { - Data = data; - AnnouncementSound = announcementSound; - AnnouncementParams = announcementParams; - } - public byte[] Data { get; } - public string AnnouncementSound { get; } - public AudioParams AnnouncementParams{ get; } -} diff --git a/Content.Shared/SS220/TTS/MsgPlayAnnounceTts.cs b/Content.Shared/SS220/TTS/MsgPlayAnnounceTts.cs new file mode 100644 index 000000000000..a23202aaebcc --- /dev/null +++ b/Content.Shared/SS220/TTS/MsgPlayAnnounceTts.cs @@ -0,0 +1,47 @@ +// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt + +using System.IO; +using Lidgren.Network; +using Robust.Shared.Audio; +using Robust.Shared.Network; +using Robust.Shared.Serialization; + +namespace Content.Shared.SS220.TTS; + +public sealed class MsgPlayAnnounceTts : NetMessage +{ + public TtsAudioData Data { get; set; } + public string AnnouncementSound { get; set; } = ""; + public AudioParams AnnouncementParams { get; set; } = AudioParams.Default; + + public override MsgGroups MsgGroup => MsgGroups.Command; + + public override void ReadFromBuffer(NetIncomingMessage buffer, IRobustSerializer serializer) + { + var data = new TtsAudioData(); + data.ReadFromNetBuffer(buffer); + Data = data; + AnnouncementSound = buffer.ReadString(); + + var streamLength = buffer.ReadVariableInt32(); + using var stream = new MemoryStream(streamLength); + buffer.ReadAlignedMemory(stream, streamLength); + { + AnnouncementParams = serializer.Deserialize(stream); + } + } + + public override void WriteToBuffer(NetOutgoingMessage buffer, IRobustSerializer serializer) + { + Data.WriteToNetBuffer(buffer); + buffer.Write(AnnouncementSound); + + using var stream = new MemoryStream(); + { + serializer.Serialize(stream, AnnouncementParams); + } + var streamLength = (int)stream.Length; + buffer.WriteVariableInt32(streamLength); + buffer.Write(stream.GetBuffer().AsSpan(0, streamLength)); + } +} diff --git a/Content.Shared/SS220/TTS/MsgPlayTts.cs b/Content.Shared/SS220/TTS/MsgPlayTts.cs new file mode 100644 index 000000000000..8920c0cf29ee --- /dev/null +++ b/Content.Shared/SS220/TTS/MsgPlayTts.cs @@ -0,0 +1,43 @@ +// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt + +using Lidgren.Network; +using Robust.Shared.Network; +using Robust.Shared.Serialization; + +namespace Content.Shared.SS220.TTS; + +public sealed class MsgPlayTts : NetMessage +{ + public TtsAudioData Data { get; set; } + public NetEntity? SourceUid { get; set; } + public TtsKind Kind { get; set; } + public float VolumeModifier { get; set; } = 1f; + + public override MsgGroups MsgGroup => MsgGroups.Command; + public override NetDeliveryMethod DeliveryMethod => NetDeliveryMethod.ReliableUnordered; + + public void SetVolumeModifier(float modifier) + { + VolumeModifier = Math.Clamp(modifier, 0, 3); + } + + public override void ReadFromBuffer(NetIncomingMessage buffer, IRobustSerializer serializer) + { + var data = new TtsAudioData(); + data.ReadFromNetBuffer(buffer); + Data = data; + SourceUid = buffer.ReadNetEntity(); + if (SourceUid is { Valid: false }) + SourceUid = null; + Kind = (TtsKind)buffer.ReadInt32(); + VolumeModifier = buffer.ReadFloat(); + } + + public override void WriteToBuffer(NetOutgoingMessage buffer, IRobustSerializer serializer) + { + Data.WriteToNetBuffer(buffer); + buffer.Write(SourceUid ?? NetEntity.Invalid); + buffer.Write((int)Kind); + buffer.Write(VolumeModifier); + } +} diff --git a/Content.Shared/SS220/TTS/PlayTTSEvent.cs b/Content.Shared/SS220/TTS/PlayTTSEvent.cs deleted file mode 100644 index 1f4ca2934ee8..000000000000 --- a/Content.Shared/SS220/TTS/PlayTTSEvent.cs +++ /dev/null @@ -1,28 +0,0 @@ -using Robust.Shared.Serialization; - -namespace Content.Shared.SS220.TTS; - -[Serializable, NetSerializable] -// ReSharper disable once InconsistentNaming -public sealed class PlayTTSEvent : EntityEventArgs -{ - public byte[] Data { get; } - public NetEntity? SourceUid { get; } - public bool IsRadio { get; } - public float VolumeModifier { get; set; } - public bool IsWhisper { get; } - - public PlayTTSEvent(byte[] data, NetEntity? sourceUid = null, bool isRadio = false, bool isWhisper = false, float volumeModifier = 1f) - { - Data = data; - SourceUid = sourceUid; - IsRadio = isRadio; - VolumeModifier = volumeModifier; - IsWhisper = isWhisper; - } - - public void SetVolumeModifier(float modifier) - { - VolumeModifier = Math.Clamp(modifier, 0, 3); - } -} diff --git a/Content.Shared/SS220/TTS/TtsAudioData.cs b/Content.Shared/SS220/TTS/TtsAudioData.cs new file mode 100644 index 000000000000..ac90dc73d1da --- /dev/null +++ b/Content.Shared/SS220/TTS/TtsAudioData.cs @@ -0,0 +1,35 @@ +// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt + +using Lidgren.Network; +using Robust.Shared.Utility; + +namespace Content.Shared.SS220.TTS; + +public struct TtsAudioData +{ + public byte[] Buffer = Array.Empty(); + public int Length; + + public readonly bool IsEmpty => Length == 0; + + public TtsAudioData(byte[] bytes, int length) + { + Buffer = bytes; + Length = length; + DebugTools.Assert(Length <= Buffer.Length); + } + + public void ReadFromNetBuffer(NetIncomingMessage buffer) + { + Length = buffer.ReadInt32(); + Buffer = buffer.ReadBytes(Length); + } + + public void WriteToNetBuffer(NetOutgoingMessage buffer) + { + buffer.Write(Length); + buffer.Write(new ReadOnlySpan(Buffer, 0, Length)); + } + + public Memory AsMemory() => new(Buffer, 0, Length); +} diff --git a/Content.Shared/SS220/TTS/TtsKind.cs b/Content.Shared/SS220/TTS/TtsKind.cs new file mode 100644 index 000000000000..f2516e94d8ea --- /dev/null +++ b/Content.Shared/SS220/TTS/TtsKind.cs @@ -0,0 +1,11 @@ +// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt + +namespace Content.Shared.SS220.TTS; + +public enum TtsKind +{ + Default = 0, + Radio, + Whisper, + Announce, +} diff --git a/ToolsSS220/ttsServer/tts_processor.py b/ToolsSS220/ttsServer/tts_processor.py index 20899a47816c..311b6a56d3d7 100644 --- a/ToolsSS220/ttsServer/tts_processor.py +++ b/ToolsSS220/ttsServer/tts_processor.py @@ -23,3 +23,11 @@ def make_wav(self, text, speaker, sample_rate): sample_rate=sample_rate) with open(audio_paths, 'rb') as f: return f.read() + + def make_ogg(self, text, speaker, sample_rate): + audio_paths = self.model.save_wav(text=text, + speaker=speaker, + sample_rate=sample_rate) + AudioSegment.from_wav(audio_paths).export('result.ogg', format='ogg') + with open("result.ogg", 'rb') as f: + return f.read() \ No newline at end of file diff --git a/ToolsSS220/ttsServer/tts_server.py b/ToolsSS220/ttsServer/tts_server.py index 6fd0ed00c7f1..a565e3dfd292 100644 --- a/ToolsSS220/ttsServer/tts_server.py +++ b/ToolsSS220/ttsServer/tts_server.py @@ -7,16 +7,20 @@ host = "127.0.0.1" port = 5000 +tts_module = tts_creator() #Get request, consume text, make tts, build response, return to sender. @api.route('/tts/', methods=['GET']) def process_tts(): text = request.args.get('text') original_speaker = request.args.get('speaker') + extension = request.args.get('ext') print(f'Got request with text "{text}" and speaker: "{original_speaker}"') #Strictly debugging thing, uncomment if uncomfortable. speaker = primary_speaker - tts_module = tts_creator() - payload = tts_module.make_wav(text=text, speaker=speaker, sample_rate=24000) + if extension == 'ogg': + payload = tts_module.make_ogg(text=text, speaker=speaker, sample_rate=24000) + else: + payload = tts_module.make_wav(text=text, speaker=speaker, sample_rate=24000) return payload if __name__ == '__main__':