Skip to content

Commit

Permalink
small updates for libritts
Browse files Browse the repository at this point in the history
  • Loading branch information
yerfor committed May 16, 2022
1 parent d4f300e commit 7bbc7c0
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 5 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ CUDA_VISIBLE_DEVICES=0 python tasks/run.py --config egs/tts/biaobei/synta.yaml -

Audio samples in the paper can be found in our [demo page](https://syntaspeech.github.io/).

We also provide [HuggingFace Demo Page](https://huggingface.co/spaces/NATSpeech/PortaSpeech) for LJSpeech. Try your interesting sentences there!
We also provide [HuggingFace Demo Page](https://huggingface.co/spaces/yerfor/SyntaSpeech) for LJSpeech. Try your interesting sentences there!

## Citation

Expand Down
2 changes: 1 addition & 1 deletion egs/datasets/audio/libritts/base_text2mel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ binarization_args:
train_range: [ 871, -1 ]
test_range: [ 0, 523 ]
valid_range: [ 523, 871 ]
shuffle: false
shuffle: true
with_spk_id: true
with_spk_embed: false
test_ids: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Expand Down
3 changes: 2 additions & 1 deletion egs/datasets/audio/lj/base_text2mel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@ test_ids: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
316, 324, 402, 422, 485, 500, 505, 508, 509, 519 ]
f0_min: 80
f0_max: 600
vocoder_ckpt: checkpoints/hifi_lj
vocoder_ckpt: checkpoints/hifi_lj
num_valid_plots: 30
1 change: 1 addition & 0 deletions modules/tts/syntaspeech/syntaspeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def forward(self, txt_tokens, word_tokens, ph2word, word_len, mel2word=None, mel
style_embed = self.forward_style_embed(spk_embed, spk_id) # speaker embedding, [B, 1, C]
x, tgt_nonpadding = self.run_text_encoder(
txt_tokens, word_tokens, ph2word, word_len, mel2word, mel2ph, style_embed, ret, graph_lst=graph_lst, etypes_lst=etypes_lst)
x = x + style_embed # it maybe necessary to achieve multi-speaker
x = x * tgt_nonpadding
ret['nonpadding'] = tgt_nonpadding
if self.hparams['use_pitch_embed']:
Expand Down
13 changes: 11 additions & 2 deletions tasks/tts/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,17 @@ def __init__(self, prefix, shuffle=False, items=None, data_dir=None):
self.avail_idxs = list(range(len(self.sizes)))
if prefix == 'train' and hparams['min_frames'] > 0:
self.avail_idxs = [x for x in self.avail_idxs if self.sizes[x] >= hparams['min_frames']]
self.sizes = [self.sizes[i] for i in self.avail_idxs]

try:
self.sizes = [self.sizes[i] for i in self.avail_idxs]
except:
tmp_sizes = []
for i in self.avail_idxs:
try:
tmp_sizes.append(self.sizes[i])
except:
continue
self.sizes = tmp_sizes

def _get_item(self, index):
if hasattr(self, 'avail_idxs') and self.avail_idxs is not None:
index = self.avail_idxs[index]
Expand Down

0 comments on commit 7bbc7c0

Please sign in to comment.