From de7a6829dd384842ab629897763b45b7ddfce162 Mon Sep 17 00:00:00 2001
From: stalengd <p.zamorskiy@yandex.ru>
Date: Fri, 31 Jan 2025 02:50:56 +0300
Subject: [PATCH 1/2] TTS Optimization

---
 Content.Client/Entry/EntryPoint.cs            |   3 +
 Content.Client/IoC/ClientContentIoC.cs        |   2 +
 Content.Client/SS220/TTS/TTSManager.cs        |  20 +
 .../SS220/TTS/TTSSystem.Announce.cs           |  13 +-
 Content.Client/SS220/TTS/TTSSystem.cs         |  34 +-
 Content.Server/SS220/TTS/TTSManager.cs        | 419 +++++++++++++-----
 Content.Server/SS220/TTS/TTSSystem.cs         | 112 +++--
 .../SS220/AnnounceTTS/AnnounceTTSEvent.cs     |  21 -
 .../SS220/TTS/MsgPlayAnnounceTts.cs           |  45 ++
 Content.Shared/SS220/TTS/MsgPlayTts.cs        |  43 ++
 Content.Shared/SS220/TTS/PlayTTSEvent.cs      |  28 --
 Content.Shared/SS220/TTS/TtsAudioData.cs      |  35 ++
 Content.Shared/SS220/TTS/TtsKind.cs           |  11 +
 ToolsSS220/ttsServer/tts_processor.py         |   8 +
 ToolsSS220/ttsServer/tts_server.py            |   8 +-
 15 files changed, 597 insertions(+), 205 deletions(-)
 create mode 100644 Content.Client/SS220/TTS/TTSManager.cs
 delete mode 100644 Content.Shared/SS220/AnnounceTTS/AnnounceTTSEvent.cs
 create mode 100644 Content.Shared/SS220/TTS/MsgPlayAnnounceTts.cs
 create mode 100644 Content.Shared/SS220/TTS/MsgPlayTts.cs
 delete mode 100644 Content.Shared/SS220/TTS/PlayTTSEvent.cs
 create mode 100644 Content.Shared/SS220/TTS/TtsAudioData.cs
 create mode 100644 Content.Shared/SS220/TTS/TtsKind.cs
diff --git a/Content.Client/Entry/EntryPoint.cs b/Content.Client/Entry/EntryPoint.cs
index cff8bd326d57..acb98563b913 100644
--- a/Content.Client/Entry/EntryPoint.cs
+++ b/Content.Client/Entry/EntryPoint.cs
@@ -41,6 +41,7 @@
 using Robust.Shared.Replays;
 using Content.Client.SS220.Discord;
 using Robust.Shared.Timing;
+using Content.Client.SS220.TTS;
 
 namespace Content.Client.Entry
 {
@@ -81,6 +82,7 @@ public sealed class EntryPoint : GameClient
         [Dependency] private readonly DiscordPlayerInfoManager _discordPlayerInfoManager = default!; // SS220 discord info manager
         [Dependency] private readonly DebugMonitorManager _debugMonitorManager = default!;
         [Dependency] private readonly TitleWindowManager _titleWindowManager = default!;
+        [Dependency] private readonly TTSManager _ttsManager = default!; // SS220 TTS
 
         public override void Init()
         {
@@ -182,6 +184,7 @@ public override void PostInit()
             _documentParsingManager.Initialize();
             _discordPlayerInfoManager.Initialize(); // SS220 tier info
             _titleWindowManager.Initialize();
+            _ttsManager.Initialize(); // SS220 TTS
 
             _baseClient.RunLevelChanged += (_, args) =>
             {
diff --git a/Content.Client/IoC/ClientContentIoC.cs b/Content.Client/IoC/ClientContentIoC.cs
index b670f9225702..9f909ef17ff3 100644
--- a/Content.Client/IoC/ClientContentIoC.cs
+++ b/Content.Client/IoC/ClientContentIoC.cs
@@ -28,6 +28,7 @@
 using Content.Shared.Chat;
 using Content.Shared.Players.PlayTimeTracking;
 using Content.Shared.Players.RateLimiting;
+using Content.Client.SS220.TTS;
 
 namespace Content.Client.IoC
 {
@@ -67,6 +68,7 @@ public static void Register()
             collection.Register<PlayerRateLimitManager>();
             collection.Register<SharedPlayerRateLimitManager, PlayerRateLimitManager>();
             collection.Register<TitleWindowManager>();
+            collection.Register<TTSManager>(); // SS220 TTS
         }
     }
 }
diff --git a/Content.Client/SS220/TTS/TTSManager.cs b/Content.Client/SS220/TTS/TTSManager.cs
new file mode 100644
index 000000000000..36adf1f3b010
--- /dev/null
+++ b/Content.Client/SS220/TTS/TTSManager.cs
@@ -0,0 +1,20 @@
+// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt
+
+using Content.Shared.SS220.TTS;
+using Robust.Shared.Network;
+
+namespace Content.Client.SS220.TTS;
+
+public sealed class TTSManager
+{
+    [Dependency] private readonly IClientNetManager _netManager = default!;
+
+    public event Action<MsgPlayTts>? PlayTtsReceived;
+    public event Action<MsgPlayAnnounceTts>? PlayAnnounceTtsReceived;
+
+    public void Initialize()
+    {
+        _netManager.RegisterNetMessage<MsgPlayTts>(x => PlayTtsReceived?.Invoke(x));
+        _netManager.RegisterNetMessage<MsgPlayAnnounceTts>(x => PlayAnnounceTtsReceived?.Invoke(x));
+    }
+}
diff --git a/Content.Client/SS220/TTS/TTSSystem.Announce.cs b/Content.Client/SS220/TTS/TTSSystem.Announce.cs
index 309abae2a3d8..fd3505e42b89 100644
--- a/Content.Client/SS220/TTS/TTSSystem.Announce.cs
+++ b/Content.Client/SS220/TTS/TTSSystem.Announce.cs
@@ -1,7 +1,7 @@
 // © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt
 
 using Content.Shared.Corvax.CCCVars;
-using Content.Shared.SS220.AnnounceTTS;
+using Content.Shared.SS220.TTS;
 using Robust.Shared.Audio;
 using Robust.Shared.Utility;
 
@@ -16,30 +16,31 @@ public sealed partial class TTSSystem : EntitySystem
     private void InitializeAnnounces()
     {
         _cfg.OnValueChanged(CCCVars.TTSAnnounceVolume, OnTtsAnnounceVolumeChanged, true);
-        SubscribeNetworkEvent<AnnounceTTSEvent>(OnAnnounceTTSPlay);
+        _ttsManager.PlayAnnounceTtsReceived += OnAnnounceTtsPlay;
     }
 
     private void ShutdownAnnounces()
     {
         _cfg.UnsubValueChanged(CCCVars.TTSAnnounceVolume, OnTtsAnnounceVolumeChanged);
+        _ttsManager.PlayAnnounceTtsReceived -= OnAnnounceTtsPlay;
     }
 
-    private void OnAnnounceTTSPlay(AnnounceTTSEvent ev)
+    private void OnAnnounceTtsPlay(MsgPlayAnnounceTts msg)
     {
         // Early creation of entities can lead to crashes, so we postpone it as much as possible
         if (AnnouncementUid == EntityUid.Invalid)
             AnnouncementUid = Spawn(null);
 
-        var volume = AdjustVolume(isRadio: false, isAnounce: true, isWhisper: false);
+        var volume = AdjustVolume(TtsKind.Announce);
 
         var audioParams = AudioParams.Default.WithVolume(volume);
 
         // Play announcement sound
-        var announcementSoundPath = new ResPath(ev.AnnouncementSound);
+        var announcementSoundPath = new ResPath(msg.AnnouncementSound);
         PlaySoundQueued(AnnouncementUid, announcementSoundPath, audioParams, true);
 
         // Play announcement itself
-        PlayTTSBytes(ev.Data, AnnouncementUid, audioParams, true);
+        PlayTtsBytes(msg.Data, AnnouncementUid, audioParams, true);
     }
 
     private void OnTtsAnnounceVolumeChanged(float volume)
diff --git a/Content.Client/SS220/TTS/TTSSystem.cs b/Content.Client/SS220/TTS/TTSSystem.cs
index f6617378875f..7bc3ab854871 100644
--- a/Content.Client/SS220/TTS/TTSSystem.cs
+++ b/Content.Client/SS220/TTS/TTSSystem.cs
@@ -3,8 +3,6 @@
 using Content.Shared.Corvax.CCCVars;
 using Content.Shared.SS220.TTS;
 using Content.Shared.SS220.TTS.Commands;
-using Robust.Client.Audio;
-using Robust.Client.GameObjects;
 using Robust.Client.ResourceManagement;
 using Robust.Shared.Audio;
 using Robust.Shared.Audio.Components;
@@ -26,6 +24,7 @@ public sealed partial class TTSSystem : EntitySystem
     [Dependency] private readonly IResourceCache _resourceCache = default!;
     [Dependency] private readonly SharedAudioSystem _audio = default!;
     [Dependency] private readonly IDependencyCollection _dependencyCollection = default!;
+    [Dependency] private readonly TTSManager _ttsManager = default!;
 
     private ISawmill _sawmill = default!;
 
@@ -62,9 +61,10 @@ public override void Initialize()
         _cfg.OnValueChanged(CCCVars.TTSVolume, OnTtsVolumeChanged, true);
         _cfg.OnValueChanged(CCCVars.TTSRadioVolume, OnTtsRadioVolumeChanged, true);
 
-        SubscribeNetworkEvent<PlayTTSEvent>(OnPlayTTS);
         SubscribeNetworkEvent<TtsQueueResetMessage>(OnQueueResetRequest);
 
+        _ttsManager.PlayTtsReceived += OnPlayTts;
+
         InitializeAnnounces();
     }
 
@@ -77,6 +77,8 @@ public override void Shutdown()
         // clear virtual files
         ContentRoot.Clear();
 
+        _ttsManager.PlayTtsReceived -= OnPlayTts;
+
         ShutdownAnnounces();
         ResetQueuesAndEndStreams();
     }
@@ -157,7 +159,7 @@ public override void FrameUpdate(float frameTime)
             SoundPathSpecifier soundPath;
             if (request is PlayRequestById requestById)
             {
-                tempFilePath = new ResPath($"{requestById.FileIdx}.wav");
+                tempFilePath = new ResPath($"{requestById.FileIdx}.ogg");
                 soundPath = new SoundPathSpecifier(Prefix / tempFilePath.Value, requestById.Params);
             }
             else if (request is PlayRequestByPath requestByPath)
@@ -229,7 +231,7 @@ private void PlaySoundQueued(EntityUid entity, ResPath sound, AudioParams? audio
         TryQueueRequest(entity, request);
     }
 
-    private void PlayTTSBytes(byte[] data, EntityUid? sourceUid = null, AudioParams? audioParams = null, bool globally = false)
+    private void PlayTtsBytes(TtsAudioData data, EntityUid? sourceUid = null, AudioParams? audioParams = null, bool globally = false)
     {
         _sawmill.Debug($"Play TTS audio {data.Length} bytes from {sourceUid} entity");
         if (data.Length == 0)
@@ -237,8 +239,8 @@ private void PlayTTSBytes(byte[] data, EntityUid? sourceUid = null, AudioParams?
 
         var finalParams = audioParams ?? AudioParams.Default;
 
-        var filePath = new ResPath($"{_fileIdx}.wav");
-        ContentRoot.AddOrUpdateFile(filePath, data);
+        var filePath = new ResPath($"{_fileIdx}.ogg");
+        ContentRoot.AddOrUpdateFile(filePath, data.Buffer);
 
         // Cache does a funny.
         // If we have disconnected and reconnected, the Idx will be reset
@@ -266,22 +268,26 @@ private void PlayTTSBytes(byte[] data, EntityUid? sourceUid = null, AudioParams?
         _fileIdx++;
     }
 
-    private void OnPlayTTS(PlayTTSEvent ev)
+    private void OnPlayTts(MsgPlayTts msg)
     {
-        var volume = AdjustVolume(ev.IsRadio, isAnounce: false, ev.IsWhisper);
-
+        var volume = AdjustVolume(msg.Kind);
         var audioParams = AudioParams.Default.WithVolume(volume);
 
-        PlayTTSBytes(ev.Data, GetEntity(ev.SourceUid), audioParams);
+        PlayTtsBytes(msg.Data, GetEntity(msg.SourceUid), audioParams);
     }
 
-    private float AdjustVolume(bool isRadio, bool isAnounce, bool isWhisper)
+    private float AdjustVolume(TtsKind kind)
     {
-        var volume = isRadio ? _radioVolume : isAnounce ? VolumeAnnounce : _volume;
+        var volume = kind switch
+        {
+            TtsKind.Radio => _radioVolume,
+            TtsKind.Announce => VolumeAnnounce,
+            _ => _volume,
+        };
 
         volume = SharedAudioSystem.GainToVolume(volume);
 
-        if (isWhisper)
+        if (kind == TtsKind.Whisper)
         {
             volume -= SharedAudioSystem.GainToVolume(WhisperFade);
         }
diff --git a/Content.Server/SS220/TTS/TTSManager.cs b/Content.Server/SS220/TTS/TTSManager.cs
index 5184814e2789..3691ef5a6a29 100644
--- a/Content.Server/SS220/TTS/TTSManager.cs
+++ b/Content.Server/SS220/TTS/TTSManager.cs
@@ -1,23 +1,29 @@
+// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt
+
+using System.Buffers;
 using System.Collections.Concurrent;
+using System.Collections.Specialized;
+using System.ComponentModel;
+using System.Diagnostics.CodeAnalysis;
 using System.Globalization;
 using System.IO;
 using System.Linq;
 using System.Net;
 using System.Net.Http;
-using System.Net.Http.Json;
 using System.Text;
-using System.Text.Json.Serialization;
 using System.Threading;
 using System.Threading.Tasks;
+using System.Web;
 using Content.Shared.Corvax.CCCVars;
+using Content.Shared.SS220.TTS;
 using FFMpegCore;
 using FFMpegCore.Arguments;
 using FFMpegCore.Pipes;
+using Microsoft.IO;
 using Prometheus;
 using Robust.Shared.Configuration;
-using System.ComponentModel;
-using System.Collections.Specialized;
-using System.Web;
+using Robust.Shared.Network;
+using Robust.Shared.Utility;
 
 namespace Content.Server.SS220.TTS;
 
@@ -49,20 +55,21 @@ public sealed class TTSManager
         "tts_reused_radio_count",
         "Amount of reused TTS audio from cache.");
 
+    private const string AudioFileExtension = "ogg";
+
     [Dependency] private readonly IConfigurationManager _cfg = default!;
+    [Dependency] private readonly IServerNetManager _netManager = default!;
 
     private readonly HttpClient _httpClient = new();
 
     private ISawmill _sawmill = default!;
-    private readonly ConcurrentDictionary<string, byte[]> _cache = new();
-    private readonly HashSet<string> _cacheKeysSeq = new();
-    private readonly ConcurrentDictionary<string, byte[]> _cacheRadio = new();
-    private readonly HashSet<string> _cacheRadioKeysSeq = new();
+    private readonly TtsCache _cache = new(0);
+    private readonly TtsResponseManager _responseManager = new();
+    private readonly RecyclableMemoryStreamManager _memoryStreamPool = new();
 
-    private static readonly ConcurrentDictionary<string, SemaphoreSlim> Locks = new();
+    private static readonly ConcurrentDictionary<string, TtsResponse> ResponsesInProgress = new();
     private float _timeout = 1;
 
-    private int _maxCachedCount = 200;
     private string _apiUrl = string.Empty;
     private string _apiToken = string.Empty;
 
@@ -71,7 +78,7 @@ public void Initialize()
         _sawmill = Logger.GetSawmill("tts");
         _cfg.OnValueChanged(CCCVars.TTSMaxCache, val =>
         {
-            _maxCachedCount = val;
+            _cache.Limit = val;
             ResetCache();
         }, true);
         _cfg.OnValueChanged(CCCVars.TTSRequestTimeout, val => _timeout = val, true);
@@ -82,6 +89,9 @@ public void Initialize()
             _apiToken = v;
         },
         true);
+
+        _netManager.RegisterNetMessage<MsgPlayTts>();
+        _netManager.RegisterNetMessage<MsgPlayAnnounceTts>();
     }
 
     /// <summary>
@@ -89,25 +99,15 @@ public void Initialize()
     /// </summary>
     /// <param name="speaker">Identifier of speaker</param>
     /// <param name="text">SSML formatted text</param>
-    /// <returns>WAV audio bytes or null if failed</returns>
-    public async Task<byte[]?> ConvertTextToSpeech(string speaker, string text, bool isRadio , bool isAnnounce)
+    /// <returns>File audio bytes or empty if failed</returns>
+    public async Task<ReferenceCounter<TtsAudioData>.Handle?> ConvertTextToSpeech(string speaker, string text, TtsKind kind)
     {
         WantedCount.Inc();
-        var cacheKey = GenerateCacheKey(speaker, text, isRadio);
 
-        return await ExecuteWithNamedLockAsync(cacheKey, async () =>
+        return await StartTtsRequest(new(speaker, text, kind),
+            async (request, response) =>
         {
-            if (_cache.TryGetValue(cacheKey, out var data))
-            {
-                ReusedCount.Inc();
-                _sawmill.Debug($"Use cached sound for '{text}' speech by '{speaker}' speaker");
-                return data;
-            }
-
-            if (isRadio)
-                _sawmill.Debug($"Generate new radio sound for '{text}' speech by '{speaker}' speaker");
-            else
-                _sawmill.Debug($"Generate new audio for '{text}' speech by '{speaker}' speaker");
+            _sawmill.Debug($"Generate new sound for '{text}' speech by '{speaker}' speaker with kind '{kind}'");
 
             var reqTime = DateTime.UtcNow;
             try
@@ -117,101 +117,86 @@ public void Initialize()
                 var requestUrl = $"{_apiUrl}" + ToQueryString(new NameValueCollection() {
                     { "speaker", speaker },
                     { "text", text },
-                    { "ext", "wav" }});
+                    { "ext", AudioFileExtension }});
 
-                if (isRadio)
+                if (kind == TtsKind.Radio)
                 {
                     requestUrl += "&effect=radio";
                 }
 
-                if (isAnnounce)
+                if (kind == TtsKind.Announce)
                 {
                     requestUrl += "&effect=announce";
                 }
 
-                var response = await _httpClient.GetAsync(requestUrl, cts.Token);
-                if (!response.IsSuccessStatusCode)
+                var httpRequest = new HttpRequestMessage(HttpMethod.Get, requestUrl);
+                var httpResponse = await _httpClient.SendAsync(httpRequest, cts.Token);
+                if (!httpResponse.IsSuccessStatusCode)
                 {
-                    if (response.StatusCode == HttpStatusCode.TooManyRequests)
+                    if (httpResponse.StatusCode == HttpStatusCode.TooManyRequests)
                     {
                         _sawmill.Warning("TTS request was rate limited");
-                        return null;
+                        return false;
                     }
 
-                    _sawmill.Error($"TTS request returned bad status code: {response.StatusCode}");
-                    return null;
+                    _sawmill.Error($"TTS request returned bad status code: {httpResponse.StatusCode}");
+                    return false;
                 }
 
-                var soundData = await response.Content.ReadAsByteArrayAsync();
+                using var memoryStream = _memoryStreamPool.GetStream("TtsStream", 1024 * 64);
 
-                _cache.AddOrUpdate(cacheKey, soundData, (_, __) => soundData);
-                _cacheKeysSeq.Add(cacheKey);
-                if (_cache.Count > _maxCachedCount)
-                {
-                    var firstKey = _cacheKeysSeq.First();
-                    _cache.TryRemove(firstKey, out _);
-                    _cacheKeysSeq.Remove(firstKey);
-                }
+                memoryStream.Position = 0;
+                memoryStream.SetLength(0);
 
-                if (isRadio)
-                    _sawmill.Debug($"Generated new radio sound for '{text}' speech by '{speaker}' speaker ({soundData.Length} bytes)");
-                else
-                    _sawmill.Debug($"Generated new sound for '{text}' speech by '{speaker}' speaker ({soundData.Length} bytes)");
-                RequestTimings.WithLabels("Success").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
+                await httpResponse.Content.CopyToAsync(memoryStream, cts.Token);
+                _responseManager.AllocBuffer(response, (int)memoryStream.Length);
 
-                return soundData;
+                memoryStream.Position = 0;
+                memoryStream.ReadExactly(response.Value.Buffer, 0, response.Value.Length);
+
+                _sawmill.Debug($"Generated new sound for '{text}' speech by '{speaker}' speaker with kind '{kind}' ({response.Value.Length} bytes)");
+                RequestTimings.WithLabels("Success").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
+                return true;
             }
             catch (TaskCanceledException)
             {
                 RequestTimings.WithLabels("Timeout").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
                 _sawmill.Error($"Timeout of request generation new audio for '{text}' speech by '{speaker}' speaker");
-                return null;
+                return false;
             }
             catch (Exception e)
             {
                 RequestTimings.WithLabels("Error").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
                 _sawmill.Error(
                     $"Failed of request generation new sound for '{text}' speech by '{speaker}' speaker\n{e}");
-                return null;
+                return false;
             }
         });
     }
 
-    private static string ToQueryString(NameValueCollection nvc)
-    {
-        var array = (
-            from key in nvc.AllKeys
-            from value in nvc.GetValues(key) ?? Array.Empty<string>()
-            select $"{key}={HttpUtility.UrlEncode(value)}"
-            ).ToArray();
-
-        return "?" + string.Join("&", array);
-    }
-
-    public async Task<byte[]?> ConvertTextToSpeechRadio(string speaker, string text)
+    public async Task<ReferenceCounter<TtsAudioData>.Handle?> ConvertTextToSpeechRadio(string speaker, string text)
     {
         WantedRadioCount.Inc();
 
-        var cacheKey = GenerateCacheKey($"radio-{speaker}", text, isRadio: true);
-        return await ExecuteWithNamedLockAsync(cacheKey, async () =>
+        return await StartTtsRequest(new($"radio-{speaker}", text, TtsKind.Radio),
+            async (request, response) =>
         {
-            if (_cacheRadio.TryGetValue(cacheKey, out var cachedSoundData))
-            {
-                ReusedRadioCount.Inc();
-                _sawmill.Debug($"Use cached radio sound for '{text}' speech by '{speaker}' speaker");
-                return cachedSoundData;
-            }
+            using var innerResponse = await ConvertTextToSpeech(speaker, text, TtsKind.Radio);
 
-            var soundData = await ConvertTextToSpeech(speaker, text, isRadio: true, isAnnounce: false);
-            if (soundData == null)
-                return null;
+            if (innerResponse is null
+                || innerResponse.Value.TryGetValue(out var innerBuffer))
+                return false;
+
+            using var memoryStream = _memoryStreamPool.GetStream("TtsStream", innerBuffer.Length);
+            memoryStream.Write(innerBuffer.AsMemory().Span);
+            memoryStream.Position = 0;
 
             var reqTime = DateTime.UtcNow;
             try
             {
-                var outputFilename = Path.GetTempPath() + Guid.NewGuid() + ".wav";
+                var outputFilename = $"{Path.GetTempPath()}{Guid.NewGuid()}.{AudioFileExtension}";
                 await FFMpegArguments
-                    .FromPipeInput(new StreamPipeSource(new MemoryStream(soundData)))
+                    .FromPipeInput(new StreamPipeSource(memoryStream))
                     .OutputToFile(outputFilename, true, options =>
                         options.WithAudioFilters(filterOptions =>
                             {
@@ -224,26 +209,32 @@ await FFMpegArguments
                             }
                         )
                     ).ProcessAsynchronously();
-                soundData = await File.ReadAllBytesAsync(outputFilename);
+
+                memoryStream.SetLength(0);
+                memoryStream.Position = 0;
+
                 try
                 {
-                    File.Delete(outputFilename);
+                    using var file = new FileStream(outputFilename, FileMode.Open);
+                    await file.CopyToAsync(memoryStream);
+                    _responseManager.AllocBuffer(response, (int)memoryStream.Length);
+                    memoryStream.ReadExact(response.Value.AsMemory().Span);
                 }
-                catch (Exception _)
+                catch (Exception)
                 {
                     // ignored
                 }
 
-                _cacheRadio.AddOrUpdate(cacheKey, soundData, (_, __) => soundData);
-                _cacheRadioKeysSeq.Add(cacheKey);
-                if (_cacheRadio.Count > _maxCachedCount)
+                try
+                {
+                    File.Delete(outputFilename);
+                }
+                catch (Exception)
                 {
-                    var firstKey = _cacheRadioKeysSeq.First();
-                    _cacheRadio.TryRemove(firstKey, out _);
-                    _cacheRadioKeysSeq.Remove(firstKey);
+                    // ignored
                 }
 
-                return soundData;
+                return true;
             }
             catch (TaskCanceledException)
             {
@@ -252,7 +243,7 @@ await FFMpegArguments
                     $"Timeout of request generation new radio sound for '{text}' speech by '{speaker}' speaker");
                 throw new Exception("TTS request timeout");
             }
-            catch (Win32Exception e)
+            catch (Win32Exception)
             {
                 _sawmill.Error($"FFMpeg is not installed");
                 throw new Exception("ffmpeg is not installed!");
@@ -270,31 +261,62 @@ await FFMpegArguments
     public void ResetCache()
     {
         _cache.Clear();
-        _cacheKeysSeq.Clear();
-        _cacheRadio.Clear();
-        _cacheRadioKeysSeq.Clear();
     }
 
-    private static string GenerateCacheKey(string speaker, string text, bool isRadio)
+    private static string ToQueryString(NameValueCollection nvc)
     {
-        var key = $"{speaker}/{text}/{isRadio}";
+        var array = (
+            from key in nvc.AllKeys
+            from value in nvc.GetValues(key) ?? Array.Empty<string>()
+            select $"{key}={HttpUtility.UrlEncode(value)}"
+            ).ToArray();
+
+        return "?" + string.Join("&", array);
+    }
+
+    private static string GenerateCacheKey(string speaker, string text, TtsKind kind)
+    {
+        var key = $"{speaker}/{text}/{(int)kind}";
         var keyData = Encoding.UTF8.GetBytes(key);
         var bytes = System.Security.Cryptography.SHA256.HashData(keyData);
         return Convert.ToHexString(bytes);
     }
 
-    private async Task<TResult> ExecuteWithNamedLockAsync<TResult>(string key, Func<Task<TResult>> function)
+    private async Task<ReferenceCounter<TtsAudioData>.Handle?> StartTtsRequest(TtsRequest request, Func<TtsRequest, TtsResponse, Task<bool>> core)
     {
-        var semaphore = Locks.GetOrAdd(key, new SemaphoreSlim(1, 1));
-        await semaphore.WaitAsync();
+        if (_cache.TryGet(request.Key, out var data))
+        {
+            ReusedCount.Inc();
+            _sawmill.Debug($"Use cached sound for '{request.Text}' speech by '{request.Speaker}' speaker");
+            return data.GetHandle();
+        }
+
         try
         {
-            return await function();
+            if (!ResponsesInProgress.TryGetValue(request.Key, out var response) || response.Task is null)
+            {
+                response = _responseManager.Rent();
+                var task = core(request, response);
+                response.Task = task;
+                ResponsesInProgress[request.Key] = response;
+            }
+
+            var isSuccess = await response.Task;
+
+            if (isSuccess)
+            {
+                _cache.Cache(request.Key, response);
+
+                return response.GetHandle();
+            }
+            else
+            {
+                return null;
+            }
         }
         finally
         {
-            semaphore.Release();
-            Locks.TryRemove(key, out _);
+            ResponsesInProgress.TryRemove(request.Key, out _);
         }
     }
 
@@ -320,4 +342,201 @@ public CrusherFilterArgument(
 
         public string Value => string.Join(":", _arguments.Select<KeyValuePair<string, string>, string>(pair => pair.Key + "=" + pair.Value));
     }
+
+    private readonly struct TtsRequest
+    {
+        public string Speaker { get; }
+        public string Text { get; }
+        public TtsKind Kind { get; }
+        public string Key { get; }
+
+        public TtsRequest(string speaker, string text, TtsKind kind) : this()
+        {
+            Speaker = speaker;
+            Text = text;
+            Kind = kind;
+            Key = GenerateCacheKey(speaker, text, kind);
+        }
+    }
+
+    private sealed class TtsCache
+    {
+        private readonly ConcurrentDictionary<string, TtsResponse> _lookup = new();
+        private readonly ConcurrentQueue<string> _keysQueue = new();
+
+        public int Limit { get; set; }
+
+        public TtsCache(int limit)
+        {
+            Limit = limit;
+        }
+
+        public void Cache(string key, TtsResponse value)
+        {
+            var currentCount = _lookup.Count;
+            while (currentCount > 0 && currentCount + 1 > Limit)
+            {
+                if (_keysQueue.TryDequeue(out var firstKey)
+                    && _lookup.TryRemove(firstKey, out var reuseBuffer))
+                {
+                    reuseBuffer.GetHandle().Dispose();
+                }
+                currentCount = _lookup.Count;
+            }
+            if (Limit != 0)
+            {
+                value.GetHandle();
+                _lookup[key] = value;
+                _keysQueue.Enqueue(key);
+            }
+        }
+
+        public bool TryGet(string key, [NotNullWhen(true)] out TtsResponse? buffer)
+        {
+            if (Limit == 0)
+            {
+                buffer = null;
+                return false;
+            }
+            return _lookup.TryGetValue(key, out buffer);
+        }
+
+        public void Clear()
+        {
+            _lookup.Clear();
+            _keysQueue.Clear();
+        }
+    }
+}
+
+public sealed class TtsResponseManager
+{
+    private readonly Stack<TtsResponse> _responsePool = new();
+    private readonly ArrayPool<byte> _arrayPool;
+
+    public TtsResponseManager() : this(ArrayPool<byte>.Shared) { }
+
+    public TtsResponseManager(ArrayPool<byte> arrayPool)
+    {
+        _arrayPool = arrayPool;
+    }
+
+    public TtsResponse Rent()
+    {
+        if (!_responsePool.TryPop(out var response))
+        {
+            response = new(this);
+        }
+
+        return response;
+    }
+
+    public void Return(TtsResponse response)
+    {
+        FreeBuffer(response);
+        _responsePool.Push(response);
+    }
+
+    public void AllocBuffer(TtsResponse response, int length)
+    {
+        response.Value = new(_arrayPool.Rent(length), length);
+    }
+
+    public void FreeBuffer(TtsResponse response)
+    {
+        if (response.Value.Buffer.Length == 0)
+            return;
+        _arrayPool.Return(response.Value.Buffer);
+        response.Value = new();
+    }
+}
+
+public sealed class TtsResponse : ReferenceCounter<TtsAudioData>
+{
+    public Task<bool>? Task { get; set; }
+
+    private readonly TtsResponseManager _manager;
+
+    public TtsResponse(TtsResponseManager manager) : base(new())
+    {
+        _manager = manager;
+    }
+
+    protected override void OnHandleDisposed()
+    {
+        base.OnHandleDisposed();
+        if (ReferenceCount == 0)
+        {
+            _manager.Return(this);
+        }
+    }
+
+    public void Dereference()
+    {
+        OnHandleDisposed();
+    }
+}
+
+[Virtual]
+public class ReferenceCounter<T>
+{
+    public T Value { get; set; }
+    public int ReferenceCount => _referenceCount;
+
+    private int _referenceCount = 0;
+
+    public ReferenceCounter(T value)
+    {
+        Value = value;
+    }
+
+    public Handle GetHandle()
+    {
+        _referenceCount++;
+        return new(this);
+    }
+
+    protected virtual void OnHandleDisposed()
+    {
+        _referenceCount--;
+    }
+
+    public struct Handle : IDisposable
+    {
+        private readonly ReferenceCounter<T> _counter;
+        private bool _isValid;
+
+        public Handle(ReferenceCounter<T> counter)
+        {
+            _counter = counter;
+            _isValid = true;
+        }
+
+        public void Dispose()
+        {
+            if (!_isValid) return;
+            _isValid = false;
+            _counter.OnHandleDisposed();
+        }
+
+        public Handle GetHandle()
+        {
+            return _counter.GetHandle();
+        }
+
+        public bool TryGetValue([NotNullWhen(true)] out T value)
+        {
+            value = _counter.Value;
+            return _isValid;
+        }
+    }
+}
+
+public static class ReferenceCounterExtensions
+{
+    public static bool TryGetValue<T>(this ReferenceCounter<T>.Handle? handle, [NotNullWhen(true)] out T? value)
+    {
+        value = default;
+        return handle.HasValue && handle.Value.TryGetValue(out value);
+    }
 }
diff --git a/Content.Server/SS220/TTS/TTSSystem.cs b/Content.Server/SS220/TTS/TTSSystem.cs
index cba7ae9da73f..61034fa4ac30 100644
--- a/Content.Server/SS220/TTS/TTSSystem.cs
+++ b/Content.Server/SS220/TTS/TTSSystem.cs
@@ -1,3 +1,5 @@
+// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt
+
 using System.Diagnostics.CodeAnalysis;
 using System.Threading.Tasks;
 using Content.Server.Chat.Systems;
@@ -6,12 +8,12 @@
 using Content.Shared.Inventory;
 using Content.Shared.SS220.TTS;
 using Content.Shared.GameTicking;
-using Content.Shared.SS220.AnnounceTTS;
 using Robust.Shared.Configuration;
 using Robust.Shared.Player;
 using Robust.Shared.Prototypes;
 using Robust.Shared.Random;
-using Serilog;
+using Robust.Shared.Network;
+using Robust.Server.Player;
 
 namespace Content.Server.SS220.TTS;
 
@@ -25,6 +27,8 @@ public sealed partial class TTSSystem : EntitySystem
     [Dependency] private readonly IRobustRandom _random = default!;
     [Dependency] private readonly InventorySystem _inventory = default!;
     [Dependency] private readonly ILogManager _log = default!;
+    [Dependency] private readonly IServerNetManager _netManager = default!;
+    [Dependency] private readonly IPlayerManager _playerManager = default!;
 
     private ISawmill _sawmill = default!;
 
@@ -120,17 +124,32 @@ private async void OnAnnouncementSpoke(AnnouncementSpokeEvent args)
             }
         }
 
-        if (!_isEnabled ||
-            args.Message.Length > MaxMessageChars * 2 ||
-            string.IsNullOrWhiteSpace(voice))
+        ReferenceCounter<TtsAudioData>.Handle? ttsResponse = default;
+
+        if (_isEnabled
+            && args.Message.Length <= MaxMessageChars * 2
+            && !string.IsNullOrWhiteSpace(voice))
         {
-            RaiseNetworkEvent(new AnnounceTTSEvent([], args.AnnouncementSound, args.AnnouncementSoundParams), args.Source);
-            return;
+            ttsResponse = await GenerateTts(args.Message, voice, TtsKind.Announce);
+        }
+
+        var message = new MsgPlayAnnounceTts
+        {
+            AnnouncementSound = args.AnnouncementSound,
+            AnnouncementParams = args.AnnouncementSoundParams,
+        };
+
+        if (ttsResponse.TryGetValue(out var audioData))
+        {
+            message.Data = audioData;
         }
 
-        var soundData = await GenerateTTS(args.Message, voice, isAnnounce: true) ?? [];
+        foreach (var session in args.Source.Recipients)
+        {
+            _netManager.ServerSendMessage(message, session.Channel);
+        }
 
-        RaiseNetworkEvent(new AnnounceTTSEvent(soundData, args.AnnouncementSound, args.AnnouncementSoundParams), args.Source);
+        ttsResponse?.Dispose();
     }
 
     private void OnRoundRestartCleanup(RoundRestartCleanupEvent ev)
@@ -162,11 +181,11 @@ private async void OnRequestGlobalTTS(RequestGlobalTTSEvent ev, EntitySessionEve
             !GetVoicePrototype(ev.VoiceId, out var protoVoice))
             return;
 
-        var soundData = await GenerateTTS(ev.Text, protoVoice.Speaker);
-        if (soundData is null)
+        using var ttsResponse = await GenerateTts(ev.Text, protoVoice.Speaker, TtsKind.Default);
+        if (!ttsResponse.TryGetValue(out var audioData))
             return;
 
-        RaiseNetworkEvent(new PlayTTSEvent(soundData), Filter.SinglePlayer(args.SenderSession));
+        _netManager.ServerSendMessage(new MsgPlayTts { Data = audioData }, args.SenderSession.Channel);
     }
 
     private async void OnEntitySpoke(EntityUid uid, TTSComponent component, EntitySpokeEvent args)
@@ -200,27 +219,50 @@ private async void OnEntitySpoke(EntityUid uid, TTSComponent component, EntitySp
 
     private async void HandleSay(EntityUid uid, string message, string speaker)
     {
-        var soundData = await GenerateTTS(message, speaker);
-        if (soundData is null) return;
-        RaiseNetworkEvent(new PlayTTSEvent(soundData, GetNetEntity(uid)), Filter.Pvs(uid));
+        using var ttsResponse = await GenerateTts(message, speaker, TtsKind.Default);
+        if (!ttsResponse.TryGetValue(out var audioData)) return;
+        var playTtsMessage = new MsgPlayTts
+        {
+            Data = audioData,
+            SourceUid = GetNetEntity(uid),
+        };
+        foreach (var receiver in Filter.Pvs(uid).Recipients)
+        {
+            _netManager.ServerSendMessage(playTtsMessage, receiver.Channel);
+        }
     }
 
     private async void HandleWhisper(EntityUid uid, string message, string obfMessage, string speaker, bool isRadio)
     {
         // If it's a whisper into a radio, generate speech without whisper
         // attributes to prevent an additional speech synthesis event
-        var soundData = await GenerateTTS(message, speaker, isWhisper: true);
-        if (soundData is null)
+        using var ttsResponse = await GenerateTts(message, speaker, TtsKind.Whisper);
+        if (!ttsResponse.TryGetValue(out var audioData))
             return;
 
-        var obfSoundData = await GenerateTTS(obfMessage, speaker, isWhisper: true);
-        if (obfSoundData is null)
+        using var obfTtsResponse = await GenerateTts(obfMessage, speaker, TtsKind.Whisper);
+        if (!obfTtsResponse.TryGetValue(out var obfAudioData))
             return;
 
         // TODO: Check obstacles
         var xformQuery = GetEntityQuery<TransformComponent>();
         var sourcePos = _xforms.GetWorldPosition(xformQuery.GetComponent(uid), xformQuery);
         var receptions = Filter.Pvs(uid).Recipients;
+
+        var fullTtsMessage = new MsgPlayTts
+        {
+            Data = audioData,
+            SourceUid = GetNetEntity(uid),
+            Kind = TtsKind.Whisper,
+        };
+
+        var obfuscatedTtsMessage = new MsgPlayTts
+        {
+            Data = obfAudioData,
+            SourceUid = GetNetEntity(uid),
+            Kind = TtsKind.Whisper,
+        };
+
         foreach (var session in receptions)
         {
             if (!session.AttachedEntity.HasValue)
@@ -232,46 +274,48 @@ private async void HandleWhisper(EntityUid uid, string message, string obfMessag
             if (distance > ChatSystem.WhisperMuffledRange)
                 continue;
 
-            var fullTtsEvent = new PlayTTSEvent(
-                soundData,
-                GetNetEntity(uid),
-                isWhisper: true);
+            var netMessageToSend = distance > ChatSystem.WhisperClearRange ? obfuscatedTtsMessage : fullTtsMessage;
 
-            var obfTtsEvent = new PlayTTSEvent(obfSoundData, GetNetEntity(uid), isWhisper: true);
-
-            RaiseNetworkEvent(distance > ChatSystem.WhisperClearRange ? obfTtsEvent : fullTtsEvent, session);
+            _netManager.ServerSendMessage(netMessageToSend, session.Channel);
         }
     }
 
     private async void HandleRadio(RadioEventReceiver[] receivers, string message, string speaker)
     {
-        var soundData = await GenerateTTS(message, speaker, false, true);
+        using var soundData = await GenerateTts(message, speaker, TtsKind.Radio);
         if (soundData is null)
             return;
 
         foreach (var receiver in receivers)
         {
-            RaiseNetworkEvent(new PlayTTSEvent(soundData, GetNetEntity(receiver.PlayTarget.EntityId), true), receiver.Actor);
+            if (!_playerManager.TryGetSessionByEntity(receiver.Actor, out var session)
+                || !soundData.TryGetValue(out var audioData))
+                continue;
+            _netManager.ServerSendMessage(new MsgPlayTts
+            {
+                Data = audioData,
+                SourceUid = GetNetEntity(receiver.PlayTarget.EntityId),
+                Kind = TtsKind.Radio
+            }, session.Channel);
         }
     }
 
-    // ReSharper disable once InconsistentNaming
-    private async Task<byte[]?> GenerateTTS(string text, string speaker, bool isWhisper = false, bool isRadio = false, bool isAnnounce = false)
+    private async Task<ReferenceCounter<TtsAudioData>.Handle?> GenerateTts(string text, string speaker, TtsKind kind)
     {
         try
         {
             var textSanitized = Sanitize(text);
-            if (textSanitized == "") return null;
+            if (textSanitized == "") return default;
             if (char.IsLetter(textSanitized[^1]))
                 textSanitized += ".";
 
             var ssmlTraits = SoundTraits.RateFast;
-            if (isWhisper)
+            if (kind == TtsKind.Whisper)
                 ssmlTraits |= SoundTraits.PitchVerylow;
 
             var textSsml = ToSsmlText(textSanitized, ssmlTraits);
 
-            return await _ttsManager.ConvertTextToSpeech(speaker, textSanitized, isRadio, isAnnounce);
+            return await _ttsManager.ConvertTextToSpeech(speaker, textSanitized, kind);
 
             //return isRadio
             //    ? await _ttsManager.ConvertTextToSpeechRadio(speaker, textSanitized)
@@ -283,7 +327,7 @@ private async void HandleRadio(RadioEventReceiver[] receivers, string message, s
             _sawmill.Error($"TTS System error: {e.Message}");
         }
 
-        return null;
+        return default;
     }
 }
 
diff --git a/Content.Shared/SS220/AnnounceTTS/AnnounceTTSEvent.cs b/Content.Shared/SS220/AnnounceTTS/AnnounceTTSEvent.cs
deleted file mode 100644
index f21431c3fe30..000000000000
--- a/Content.Shared/SS220/AnnounceTTS/AnnounceTTSEvent.cs
+++ /dev/null
@@ -1,21 +0,0 @@
-﻿// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt
-
-using Robust.Shared.Audio;
-using Robust.Shared.Serialization;
-
-namespace Content.Shared.SS220.AnnounceTTS;
-
-[Serializable, NetSerializable]
-// ReSharper disable once InconsistentNaming
-public sealed class AnnounceTTSEvent : EntityEventArgs
-{
-    public AnnounceTTSEvent(byte[] data, string announcementSound, AudioParams announcementParams)
-    {
-        Data = data;
-        AnnouncementSound = announcementSound;
-        AnnouncementParams = announcementParams;
-    }
-    public byte[] Data { get; }
-    public string AnnouncementSound { get; }
-    public AudioParams AnnouncementParams{ get; }
-}
diff --git a/Content.Shared/SS220/TTS/MsgPlayAnnounceTts.cs b/Content.Shared/SS220/TTS/MsgPlayAnnounceTts.cs
new file mode 100644
index 000000000000..496320f30f32
--- /dev/null
+++ b/Content.Shared/SS220/TTS/MsgPlayAnnounceTts.cs
@@ -0,0 +1,45 @@
+// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt
+
+using System.IO;
+using Lidgren.Network;
+using Robust.Shared.Audio;
+using Robust.Shared.Network;
+using Robust.Shared.Serialization;
+
+namespace Content.Shared.SS220.TTS;
+
+public sealed class MsgPlayAnnounceTts : NetMessage
+{
+    public TtsAudioData Data { get; set; }
+    public string AnnouncementSound { get; set; } = "";
+    public AudioParams AnnouncementParams { get; set; } = AudioParams.Default;
+
+    public override MsgGroups MsgGroup => MsgGroups.Command;
+
+    public override void ReadFromBuffer(NetIncomingMessage buffer, IRobustSerializer serializer)
+    {
+        Data.ReadFromNetBuffer(buffer);
+        AnnouncementSound = buffer.ReadString();
+
+        var streamLength = buffer.ReadVariableInt32();
+        using var stream = new MemoryStream(streamLength);
+        buffer.ReadAlignedMemory(stream, streamLength);
+        {
+            AnnouncementParams = serializer.Deserialize<AudioParams>(stream);
+        }
+    }
+
+    public override void WriteToBuffer(NetOutgoingMessage buffer, IRobustSerializer serializer)
+    {
+        Data.WriteToNetBuffer(buffer);
+        buffer.Write(AnnouncementSound);
+
+        using var stream = new MemoryStream();
+        {
+            serializer.Serialize(stream, AnnouncementParams);
+        }
+        var streamLength = (int)stream.Length;
+        buffer.WriteVariableInt32(streamLength);
+        buffer.Write(stream.GetBuffer().AsSpan(0, streamLength));
+    }
+}
diff --git a/Content.Shared/SS220/TTS/MsgPlayTts.cs b/Content.Shared/SS220/TTS/MsgPlayTts.cs
new file mode 100644
index 000000000000..8920c0cf29ee
--- /dev/null
+++ b/Content.Shared/SS220/TTS/MsgPlayTts.cs
@@ -0,0 +1,43 @@
+// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt
+
+using Lidgren.Network;
+using Robust.Shared.Network;
+using Robust.Shared.Serialization;
+
+namespace Content.Shared.SS220.TTS;
+
+public sealed class MsgPlayTts : NetMessage
+{
+    public TtsAudioData Data { get; set; }
+    public NetEntity? SourceUid { get; set; }
+    public TtsKind Kind { get; set; }
+    public float VolumeModifier { get; set; } = 1f;
+
+    public override MsgGroups MsgGroup => MsgGroups.Command;
+    public override NetDeliveryMethod DeliveryMethod => NetDeliveryMethod.ReliableUnordered;
+
+    public void SetVolumeModifier(float modifier)
+    {
+        VolumeModifier = Math.Clamp(modifier, 0, 3);
+    }
+
+    public override void ReadFromBuffer(NetIncomingMessage buffer, IRobustSerializer serializer)
+    {
+        var data = new TtsAudioData();
+        data.ReadFromNetBuffer(buffer);
+        Data = data;
+        SourceUid = buffer.ReadNetEntity();
+        if (SourceUid is { Valid: false })
+            SourceUid = null;
+        Kind = (TtsKind)buffer.ReadInt32();
+        VolumeModifier = buffer.ReadFloat();
+    }
+
+    public override void WriteToBuffer(NetOutgoingMessage buffer, IRobustSerializer serializer)
+    {
+        Data.WriteToNetBuffer(buffer);
+        buffer.Write(SourceUid ?? NetEntity.Invalid);
+        buffer.Write((int)Kind);
+        buffer.Write(VolumeModifier);
+    }
+}
diff --git a/Content.Shared/SS220/TTS/PlayTTSEvent.cs b/Content.Shared/SS220/TTS/PlayTTSEvent.cs
deleted file mode 100644
index 1f4ca2934ee8..000000000000
--- a/Content.Shared/SS220/TTS/PlayTTSEvent.cs
+++ /dev/null
@@ -1,28 +0,0 @@
-using Robust.Shared.Serialization;
-
-namespace Content.Shared.SS220.TTS;
-
-[Serializable, NetSerializable]
-// ReSharper disable once InconsistentNaming
-public sealed class PlayTTSEvent : EntityEventArgs
-{
-    public byte[] Data { get; }
-    public NetEntity? SourceUid { get; }
-    public bool IsRadio { get; }
-    public float VolumeModifier { get; set; }
-    public bool IsWhisper { get; }
-
-    public PlayTTSEvent(byte[] data, NetEntity? sourceUid = null, bool isRadio = false, bool isWhisper = false, float volumeModifier = 1f)
-    {
-        Data = data;
-        SourceUid = sourceUid;
-        IsRadio = isRadio;
-        VolumeModifier = volumeModifier;
-        IsWhisper = isWhisper;
-    }
-
-    public void SetVolumeModifier(float modifier)
-    {
-        VolumeModifier = Math.Clamp(modifier, 0, 3);
-    }
-}
diff --git a/Content.Shared/SS220/TTS/TtsAudioData.cs b/Content.Shared/SS220/TTS/TtsAudioData.cs
new file mode 100644
index 000000000000..ac90dc73d1da
--- /dev/null
+++ b/Content.Shared/SS220/TTS/TtsAudioData.cs
@@ -0,0 +1,35 @@
+// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt
+
+using Lidgren.Network;
+using Robust.Shared.Utility;
+
+namespace Content.Shared.SS220.TTS;
+
+public struct TtsAudioData
+{
+    public byte[] Buffer = Array.Empty<byte>();
+    public int Length;
+
+    public readonly bool IsEmpty => Length == 0;
+
+    public TtsAudioData(byte[] bytes, int length)
+    {
+        Buffer = bytes;
+        Length = length;
+        DebugTools.Assert(Length <= Buffer.Length);
+    }
+
+    public void ReadFromNetBuffer(NetIncomingMessage buffer)
+    {
+        Length = buffer.ReadInt32();
+        Buffer = buffer.ReadBytes(Length);
+    }
+
+    public void WriteToNetBuffer(NetOutgoingMessage buffer)
+    {
+        buffer.Write(Length);
+        buffer.Write(new ReadOnlySpan<byte>(Buffer, 0, Length));
+    }
+
+    public Memory<byte> AsMemory() => new(Buffer, 0, Length);
+}
diff --git a/Content.Shared/SS220/TTS/TtsKind.cs b/Content.Shared/SS220/TTS/TtsKind.cs
new file mode 100644
index 000000000000..f2516e94d8ea
--- /dev/null
+++ b/Content.Shared/SS220/TTS/TtsKind.cs
@@ -0,0 +1,11 @@
+// © SS220, An EULA/CLA with a hosting restriction, full text: https://raw.githubusercontent.com/SerbiaStrong-220/space-station-14/master/CLA.txt
+
+namespace Content.Shared.SS220.TTS;
+
+public enum TtsKind
+{
+    Default = 0,
+    Radio,
+    Whisper,
+    Announce,
+}
diff --git a/ToolsSS220/ttsServer/tts_processor.py b/ToolsSS220/ttsServer/tts_processor.py
index 20899a47816c..311b6a56d3d7 100644
--- a/ToolsSS220/ttsServer/tts_processor.py
+++ b/ToolsSS220/ttsServer/tts_processor.py
@@ -23,3 +23,11 @@ def make_wav(self, text, speaker, sample_rate):
                              sample_rate=sample_rate)
         with open(audio_paths, 'rb') as f:
             return f.read()
+
+    def make_ogg(self, text, speaker, sample_rate):
+        audio_paths = self.model.save_wav(text=text,
+                        speaker=speaker,
+                        sample_rate=sample_rate)
+        AudioSegment.from_wav(audio_paths).export('result.ogg', format='ogg')
+        with open("result.ogg", 'rb') as f:
+            return f.read()
\ No newline at end of file
diff --git a/ToolsSS220/ttsServer/tts_server.py b/ToolsSS220/ttsServer/tts_server.py
index 6fd0ed00c7f1..a565e3dfd292 100644
--- a/ToolsSS220/ttsServer/tts_server.py
+++ b/ToolsSS220/ttsServer/tts_server.py
@@ -7,16 +7,20 @@
 
 host = "127.0.0.1"
 port = 5000
+tts_module = tts_creator()
 
 #Get request, consume text, make tts, build response, return to sender.
 @api.route('/tts/', methods=['GET'])
 def process_tts():
    text = request.args.get('text')
    original_speaker = request.args.get('speaker')
+   extension = request.args.get('ext')
    print(f'Got request with text "{text}" and speaker: "{original_speaker}"') #Strictly debugging thing, uncomment if uncomfortable.
    speaker = primary_speaker
-   tts_module = tts_creator()
-   payload = tts_module.make_wav(text=text, speaker=speaker, sample_rate=24000)
+   if extension == 'ogg':
+      payload = tts_module.make_ogg(text=text, speaker=speaker, sample_rate=24000)
+   else:
+      payload = tts_module.make_wav(text=text, speaker=speaker, sample_rate=24000)
    return payload
 
 if __name__ == '__main__':

From 19fc896c1bda0474f51da1ae049fcf4a77751c23 Mon Sep 17 00:00:00 2001
From: stalengd <p.zamorskiy@yandex.ru>
Date: Sat, 1 Feb 2025 03:34:17 +0300
Subject: [PATCH 2/2] Fix announce TTS

---
 Content.Shared/SS220/TTS/MsgPlayAnnounceTts.cs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Content.Shared/SS220/TTS/MsgPlayAnnounceTts.cs b/Content.Shared/SS220/TTS/MsgPlayAnnounceTts.cs
index 496320f30f32..a23202aaebcc 100644
--- a/Content.Shared/SS220/TTS/MsgPlayAnnounceTts.cs
+++ b/Content.Shared/SS220/TTS/MsgPlayAnnounceTts.cs
@@ -18,7 +18,9 @@ public sealed class MsgPlayAnnounceTts : NetMessage
 
     public override void ReadFromBuffer(NetIncomingMessage buffer, IRobustSerializer serializer)
     {
-        Data.ReadFromNetBuffer(buffer);
+        var data = new TtsAudioData();
+        data.ReadFromNetBuffer(buffer);
+        Data = data;
         AnnouncementSound = buffer.ReadString();
 
         var streamLength = buffer.ReadVariableInt32();