Skip to content

Commit

Permalink
语音合成功能实现
Browse files Browse the repository at this point in the history
  • Loading branch information
haruhi committed Sep 30, 2024
1 parent 7e7cb07 commit 9971668
Show file tree
Hide file tree
Showing 14 changed files with 258 additions and 42 deletions.
8 changes: 8 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"files.exclude": {
"**/*.rpyc": true,
"**/*.rpa": true,
"**/*.rpymc": true,
"**/cache/": true
}
}
26 changes: 25 additions & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@
import requests

from const import CURRENT_VERSION
from corelib.exception import ConvertException, SaveFileException
from corelib.exception import ConvertException, SaveFileException, VoiceException
from handler.converter import Converter
from handler.parser import Parser
from handler.writer import RpyFileWriter
from tools.image_data import *
from handler.tts import TTS


class Application_ui(Frame):
Expand Down Expand Up @@ -56,6 +57,10 @@ def createWidgets(self):
style='ConvertButton.TButton')
self.ConvertButton.place(relx=0.788, rely=0.7, relwidth=0.146, relheight=0.236)

self.style.configure('SynthesizeButton.TButton', font=('宋体', 9))
self.SynthesizeButton = Button(self.top, text='合成音频', command=self.synthesize_audio, style='SynthesizeButton.TButton')
self.SynthesizeButton.place(relx=0.5, rely=0.7, relwidth=0.146, relheight=0.073)

self.style.configure('OutputLabel.TLabel', anchor='w', font=('宋体', 9))
self.OutputLabel = Label(self.top, text='保存目录:', style='OutputLabel.TLabel')
self.OutputLabel.place(relx=0.066, rely=0.84, relwidth=0.107, relheight=0.05)
Expand Down Expand Up @@ -137,6 +142,7 @@ def ConvertButton_Cmd(self, event=None):
parser = Parser(path)
conveter = Converter(parser)
convert_results = conveter.generate_rpy_elements()

print(conveter.side_characters)
for res in convert_results:
self.convert(self.saveAddr.get(), res, conveter.role_name_mapping, conveter.side_characters)
Expand All @@ -148,6 +154,24 @@ def ConvertButton_Cmd(self, event=None):
self.saveAddr.delete('0', 'end')
self.Text.delete('0.0', 'end')

def synthesize_audio(self, event=None):
success_flag = True
for path in self.getTlist():
try:
parser = Parser(path)
conveter = Converter(parser)
convert_results = conveter.generate_rpy_elements()
tts = TTS(conveter)
parsed_sheets_tts = tts.filter_parsed_sheets_tts()
tts.synthesize_voice(parsed_sheets_tts)
except VoiceException as err:
success_flag = False
showerror("合成错误", err.msg)
if success_flag:
showinfo("合成成功", "合成完成")
self.saveAddr.delete('0', 'end')
self.Text.delete('0.0', 'end')

def open_url(self, url):
webbrowser.open(url, new=0)

Expand Down
1 change: 1 addition & 0 deletions const/converter_setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
'voice': 27,
'menu': 28,
'side_character': 29,
'voice_cmd':30,
}

# 元素映射
Expand Down
2 changes: 1 addition & 1 deletion const/parser_setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
EXCEL_PARSE_START_ROW = 7

# excel解析列数
EXCEL_PARSE_START_COL = 30
EXCEL_PARSE_START_COL = 31
30 changes: 30 additions & 0 deletions const/tts_setting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
role_model_mapping = {
"长门": {
"gpt": "GPT_weights_v2/nagato_yuki-e15.ckpt",
"sovits": "SoVITS_weights_v2/nagato_yuki_e15_s2160.pth"
},
"角色2": {
"gpt": "gpt_model_2_path",
"sovits": "sovits_model_2_path"
},
# 添加更多角色...
}

voice_cmd_mapping = {
"voice_cmd_1": {
"ref_audio_path": "path_to_reference_1.wav",
"prompt_text": "Prompt text for voice_cmd_1"
},
"voice_cmd_2": {
"ref_audio_path": "path_to_reference_2.wav",
"prompt_text": "Prompt text for voice_cmd_2"
},
# 添加更多映射...
}

default_prompt_audio = "D:/GPT-SoVITS-v2-240821/predef_ref/正常有希/01_有希_平静.wav"
default_prompt_text = "私が再び異常動作を起こさないという確証はない。"

API_BASE_URL = {
'base': 'http://127.0.0.1:9880/'
}
5 changes: 5 additions & 0 deletions corelib/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,8 @@ class SaveFileException(Exception):
def __init__(self, msg):
super(SaveFileException, self).__init__(msg)
self.msg = msg

class VoiceException(Exception):
def __init__(self, msg):
super(VoiceException, self).__init__(msg)
self.msg = msg
Binary file modified dist/Excel2RpyScript.exe
Binary file not shown.
37 changes: 29 additions & 8 deletions handler/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def add_role(self, name):
self.role_name_mapping[name] = role
return role

#创建一个元组,存有工作表标签及对应工作表下的多行转换后数据
def generate_rpy_elements(self):
result = []
parsed_sheets = self.parser.get_parsed_sheets()
Expand All @@ -54,7 +55,7 @@ def generate_rpy_elements(self):
label = 'start'
else:
label = parsed_sheet.name
result.append(SheetConvertResult(label=label, data=self.parse_by_sheet(parsed_sheet.row_values)))
result.append(SheetConvertResult(label=label, data=self.parse_by_sheet(parsed_sheet.row_values, idx)))
return result

@classmethod
Expand All @@ -66,23 +67,35 @@ def generate_character(cls, img_str):
else:
return Image(img_str.replace(last_word, "").strip(), ImageCmdMapping.get(last_word, "hide"))

def parse_by_sheet(self, values):
#循环调用parse_by_row_value方法,返回拼接多行转换后信息的列表
def parse_by_sheet(self, values, sheet_index):
result = []
for row_value in values:
result.append(self.parse_by_row_value(row_value))
current_role_name = None # 用于跟踪最近的有效 role_name
for row_index, row_value in enumerate(values):
role_name = row_value[ElementColNumMapping.get('role_name')]
if role_name.strip():
current_role_name = role_name # 更新最近的有效 role_name
else:
role_name = current_role_name # 如果当前 role_name 为空,使用最近的有效值
result.append(self.parse_by_row_value(row_value, role_name, sheet_index, row_index))
return result

def parse_by_row_value(self, row):
row_converter = RowConverter(row, self)
#调用RowConverter的convert方法,返回存有单行转换后信息的元组
def parse_by_row_value(self, row, role_name, sheet_index, row_index):
row_converter = RowConverter(row, self, role_name, sheet_index, row_index)
return row_converter.convert()


class RowConverter(object):

def __init__(self, row, converter):
def __init__(self, row, converter, role_name, sheet_index, row_index):
self.row = row
self.converter = converter

self.role_name = role_name
self.row_index = row_index
self.sheet_index = sheet_index

#该方法返回存有单行转换后信息的元组
def convert(self):
return RowConvertResult(
mode=self._converter_mode(),
Expand Down Expand Up @@ -202,6 +215,11 @@ def _converter_voice(self):
voice_str = str(self.row[ElementColNumMapping.get('voice')]).strip()
if not voice_str:
return None

# 检查是否包含 "tts"
if voice_str.lower().strip() == "tts":
return Voice(f"{self.role_name}_sheet{self.sheet_index+1}_row{self.row_index+8}_synthesized.wav")

if voice_str.split(" ")[-1] == "sustain":
voice_name = voice_str.split(" ")[0]
return Voice(voice_name, sustain=True)
Expand Down Expand Up @@ -235,3 +253,6 @@ def _converter_side_character(self):
return None
self.converter.side_characters[self.converter.current_role.pronoun] = character_str
return None

def _converter_voice_cmd(self):
pass
116 changes: 116 additions & 0 deletions handler/tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from collections import namedtuple

from const.converter_setting import ElementColNumMapping, PositionMapping, ImageCmdMapping, TransitionMapping, \
ReplaceCharacterMapping

from const.tts_setting import role_model_mapping, API_BASE_URL, voice_cmd_mapping, default_prompt_audio, default_prompt_text

import requests, os

class TTS(object):
def __init__(self,conveter):
self.conveter = conveter
self.parser = conveter.parser
self.last_role_name = None

def filter_parsed_sheets_tts(self):
parsed_sheets = self.parser.get_parsed_sheets()
parsed_sheets_tts = []

current_role_name = None # 用于跟踪最近的有效 role_name

for parsed_sheet in parsed_sheets:
filtered_rows = []
for row in parsed_sheet.row_values:
# 检查当前行的 role_name
role_name = row[ElementColNumMapping.get('role_name')]
if role_name.strip():
current_role_name = role_name # 更新最近的有效 role_name
else:
role_name = current_role_name # 如果当前 role_name 为空,使用最近的有效值

if row[ElementColNumMapping.get('voice')].strip().lower() == 'tts':
# 只保留 role_name, text, 和 voice_cmd 列
filtered_row = {
'role_name': role_name,
'text': row[ElementColNumMapping.get('text')],
'voice_cmd': row[ElementColNumMapping.get('voice_cmd')]
}
filtered_rows.append(filtered_row)

filtered_rows.sort(key=lambda x: x['role_name'])

if filtered_rows:
parsed_sheets_tts.append({
'name': parsed_sheet.name,
'rows': filtered_rows
})

return parsed_sheets_tts


def switch_models(self, role_name):

# 切换到对应的GPT和SoVITS模型

if role_name == self.last_role_name:
return # 如果角色名相同,则无需切换

models = role_model_mapping.get(role_name)

if models:
gpt_model = models['gpt']
sovits_model = models['sovits']

# 切换到对应的GPT模型
requests.get(f"{API_BASE_URL['base']}set_gpt_weights?weights_path={gpt_model}")

# 切换到对应的SoVITS模型
requests.get(f"{API_BASE_URL['base']}set_sovits_weights?weights_path={sovits_model}")

self.last_role_name = role_name # 更新上一个角色名
else:
print(f"No model found for role: {role_name}")


def synthesize_voice(self,voice_tts_sheets):
for sheet_index, sheet in enumerate(voice_tts_sheets):
for row_index, row in enumerate(sheet['rows']):
role_name = row['role_name'] # 获取角色名
text = row['text'] # 获取文本
voice_cmd = row['voice_cmd'] # 获取语音指令

# 获取对应的 ref_audio_path 和 prompt_text
audio_params = voice_cmd_mapping.get(voice_cmd, {})
ref_audio_path = audio_params.get("ref_audio_path", f"{default_prompt_audio}") # 默认值
prompt_text = audio_params.get("prompt_text", f"{default_prompt_text}") # 默认值

self.switch_models(role_name)

# 发送合成请求
response = requests.post(
f"{API_BASE_URL['base']}tts",
json={
"text": text,
"text_lang": "auto",
"ref_audio_path": ref_audio_path, # 参考音频路径
"prompt_text": prompt_text, # 参考音频文本
"prompt_lang": "auto",
"text_split_method": "cut0", # 可选的文本分割方法
"batch_size": 1, # 每次请求一行
}
)
# 确保audio文件夹存在
audio_folder = "audio"
os.makedirs(audio_folder, exist_ok=True)
# 处理响应
if response.status_code == 200:
# 处理成功的音频流
audio_stream = response.content
audio_file_path = os.path.join(audio_folder, f"{role_name}_sheet{sheet_index+1}_row{row_index+8}_synthesized.wav")
with open(audio_file_path, "wb") as f:
f.write(audio_stream)
else:
print(f"Error for {role_name}: {response.json()}")


17 changes: 7 additions & 10 deletions test/Sheet2.rpy
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
define role1 = Character('aaa', color="#c8c8ff", image="role1")
define role2 = Character('aaabv', color="#c8c8ff", image="role2")
define role1 = Character('长门', color="#c8c8ff", image="role1")
define role2 = Character('春日', color="#c8c8ff", image="role2")
define role3 = Character('阿虚', color="#c8c8ff", image="role3")
define role4 = Character('春日', color="#c8c8ff", image="role4")
define role5 = Character('春日bbbb', color="#c8c8ff", image="role5")
define narrator_nvl = Character(None, kind=nvl)
define narrator_adv = Character(None, kind=adv)
define config.voice_filename_format = "audio/{filename}"
image side role1 = "images/haruhi 1.png"

label Sheet2:
play music "audio/19.mp3"
Expand All @@ -16,8 +13,8 @@ show haruhi 1 at left
show kyon 1 at right
scene bg34
with dissolve
voice "images/test1.jpg"
narrator_nvl "\"sheet2 喂,你们知道肾上腺素这种东西吗?\""
narrator_nvl "\"喂,你们知道肾上腺素这种东西吗?\""
voice "test.mp3"
narrator_nvl "\"没错,就是那种当人遭遇某种突如其来的刺激时才会分泌,可以使人呼吸加快、心跳加速、瞳孔放大,激发身体潜能好应对可能发生的事情的那种激素。\""
narrator_nvl "\"如果现在我的身上有那么一个用来测量这种激素浓度的仪器,那么上面的数字恐怕一定是在不断地飙升的吧——\""
nvl clear
Expand All @@ -35,6 +32,7 @@ with dissolve
narrator_adv "\"包裹在右手上的外套一直摩擦着粗糙的岩壁,回音在隧道里相互叠加不断传递 ,最终汇聚成了一股丝毫都不悦耳、反而让人愈加烦躁的声音。 \""
narrator_adv "\"如果这是一个恐怖游戏的关卡,那么我一定要提名它拿今年科隆游戏展的最佳恐怖游戏配音奖——如果有这个奖项的话。\""
play music "audio/20.mp3"
voice "test sustain.mp3"
narrator_adv "\"就在我试图用一些轻松一点的想法冲淡自己的紧张,尽量不让自己在逃出去之前就被自己的肾上腺素毒死之时,我那紧紧握着春日右手的左手,又一次感受到了来自春日那微微地握力。 \""
narrator_adv "\"不仅是我,就连平日里元气十足、坐镇北高文学部部室四处征伐的团长大人,此刻也通过手心里微微渗出的汗滴来委婉地告诉我她的不安。\""
narrator_adv "\"而这时不时传来的、看似微弱但却异常敏感的身体信号,则更是从频率上在暗示着她和我一样不断加重的负面情绪。\""
Expand All @@ -47,7 +45,6 @@ hide kyon 5
show haruhi 3 at left
show kyon 5 at center
stop music
voice "images/test1.jpg"
role2 "\"啊! \""
hide haruhi 3
hide kyon 5
Expand All @@ -70,7 +67,7 @@ show haruhi 2 at center
narrator_adv "\"借着手机屏幕的亮光,我勉强看清了她的表情——涨红了脸,流露出一股由惊慌、不满和似乎是害羞的情绪所组成的复杂表情。看样子她也快到极限了。 \""
hide haruhi 2
show haruhi 6 at center
role4 "\"没,没事。\""
role2 "\"没,没事。\""
hide haruhi 6
hide haruhi 1
narrator_adv "\"只是不小心被石头绊了一下,\想不\%到竟\"\"差点\'\'{{[[\""
narrator_adv "\"只是不小心被石头绊了一下,想不到竟然差点摔了下去\""
Loading

0 comments on commit 9971668

Please sign in to comment.