mirror of
https://github.com/babysor/MockingBird.git
synced 2024-03-22 13:11:31 +08:00
Merge pull request #13 from babysor/chineseinputsupport
Toolbox add Chinese character input support
This commit is contained in:
commit
e66d29872f
|
@ -45,6 +45,8 @@
|
|||
然后您可以尝试使用工具箱:
|
||||
`python demo_toolbox.py -d <datasets_root>`
|
||||
|
||||
> Good news🤩: 可直接使用中文
|
||||
|
||||
## TODO
|
||||
- [X] 允许直接使用中文
|
||||
- [X] 添加演示视频
|
||||
|
|
|
@ -51,6 +51,8 @@ You can then try the toolbox:
|
|||
or
|
||||
`python demo_toolbox.py`
|
||||
|
||||
> Good news🤩: Chinese Characters are supported
|
||||
|
||||
## TODO
|
||||
- [x] Add demo video
|
||||
- [X] Add support for more dataset
|
||||
|
|
|
@ -9,7 +9,7 @@ from pathlib import Path
|
|||
from typing import Union, List
|
||||
import numpy as np
|
||||
import librosa
|
||||
|
||||
from pypinyin import lazy_pinyin, Style
|
||||
|
||||
class Synthesizer:
|
||||
sample_rate = hparams.sample_rate
|
||||
|
@ -91,6 +91,10 @@ class Synthesizer:
|
|||
simple_table([("Tacotron", str(tts_k) + "k"),
|
||||
("r", self._model.r)])
|
||||
|
||||
#convert chinese char to pinyin
|
||||
list_of_pinyin = lazy_pinyin(texts, style=Style.TONE3)
|
||||
texts = [" ".join([v for v in list_of_pinyin if v.strip()])]
|
||||
|
||||
# Preprocess text inputs
|
||||
inputs = [text_to_sequence(text.strip(), hparams.tts_cleaner_names) for text in texts]
|
||||
if not isinstance(embeddings, list):
|
||||
|
|
|
@ -36,17 +36,8 @@ colormap = np.array([
|
|||
], dtype=np.float) / 255
|
||||
|
||||
default_text = \
|
||||
"Welcome to the toolbox! To begin, load an utterance from your datasets or record one " \
|
||||
"yourself.\nOnce its embedding has been created, you can synthesize any text written here.\n" \
|
||||
"The synthesizer expects to generate " \
|
||||
"outputs that are somewhere between 5 and 12 seconds.\nTo mark breaks, write a new line. " \
|
||||
"Each line will be treated separately.\nThen, they are joined together to make the final " \
|
||||
"spectrogram. Use the vocoder to generate audio.\nThe vocoder generates almost in constant " \
|
||||
"time, so it will be more time efficient for longer inputs like this one.\nOn the left you " \
|
||||
"have the embedding projections. Load or record more utterances to see them.\nIf you have " \
|
||||
"at least 2 or 3 utterances from a same speaker, a cluster should form.\nSynthesized " \
|
||||
"utterances are of the same color as the speaker whose voice was used, but they're " \
|
||||
"represented with a cross."
|
||||
"欢迎使用工具箱, 现已支持中文输入!"
|
||||
|
||||
|
||||
|
||||
class UI(QDialog):
|
||||
|
|
Loading…
Reference in New Issue
Block a user