From 28e6bce570454ae130dd88d64ffe9b11431d8ce6 Mon Sep 17 00:00:00 2001 From: Vega Date: Fri, 24 Sep 2021 09:47:51 +0800 Subject: [PATCH] Web server: Add latest changes (#96) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau Co-authored-by: babysor00 * Run web.py! Run web.py! * Restruct readme and add instruction to use web server * fix training preprocess of vocoder Co-authored-by: balala Co-authored-by: auau --- README-CN.md | 8 ++++++-- README.md | 3 ++- synthesizer/hparams.py | 2 +- vocoder_preprocess.py | 4 ++-- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/README-CN.md b/README-CN.md index 004b4d6..edd105c 100644 --- a/README-CN.md +++ b/README-CN.md @@ -53,7 +53,9 @@ #### 2.3训练声码器 (可选) 对效果影响不大,已经预置3款,如果希望自己训练可以参考以下命令。 * 预处理数据: -`python vocoder_preprocess.py ` +`python vocoder_preprocess.py -m ` +> ``替换为你的数据集目录,``替换为一个你最好的synthesizer模型目录,例如 *sythensizer\saved_mode\xxx* + * 训练wavernn声码器: `python vocoder_train.py ` @@ -70,7 +72,6 @@ `python web.py` 运行成功后在浏览器打开地址, 默认为 `http://localhost:8080` bd64cd80385754afa599e3840504f45 - > 注:目前界面比较buggy, > * 第一次点击`录制`要等待几秒浏览器正常启动录音,否则会有重音 > * 录制结束不要再点`录制`而是`停止` @@ -80,6 +81,7 @@ ### 3.2 启动工具箱: `python demo_toolbox.py -d ` > 请指定一个可用的数据集文件路径,如果有支持的数据集则会自动加载供调试,也同时会作为手动录制音频的存储目录。 + d48ea37adf3660e657cfb047c10edbc ## Release Note @@ -168,4 +170,6 @@ voc_pad =2 #### 7.什么时候算训练完成? 首先一定要出现注意力模型,其次是loss足够低,取决于硬件设备和数据集。拿本人的供参考,我的注意力是在 18k 步之后出现的,并且在 50k 步之后损失变得低于 0.4 ![attention_step_20500_sample_1](https://user-images.githubusercontent.com/7423248/128587252-f669f05a-f411-4811-8784-222156ea5e9d.png) + ![step-135500-mel-spectrogram_sample_1](https://user-images.githubusercontent.com/7423248/128587255-4945faa0-5517-46ea-b173-928eff999330.png) + diff --git a/README.md b/README.md index 27a6cec..2699cb0 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,8 @@ Allowing parameter `--dataset {dataset}` to support aidatatang_200zh, magicdata, #### 2.3 Train vocoder (Optional) > note: vocoder has little difference in effect, so you may not need to train a new one. * Preprocess the data: -`python vocoder_preprocess.py ` +`python vocoder_preprocess.py -m ` +> `` replace with your dataset root,``replace with directory of your best trained models of sythensizer, e.g. *sythensizer\saved_mode\xxx* * Train the wavernn vocoder: `python vocoder_train.py mandarin ` diff --git a/synthesizer/hparams.py b/synthesizer/hparams.py index 897b6d4..a779c69 100644 --- a/synthesizer/hparams.py +++ b/synthesizer/hparams.py @@ -41,7 +41,7 @@ hparams = HParams( tts_lstm_dims = 1024, tts_postnet_K = 5, tts_num_highways = 4, - tts_dropout = 0.5, + tts_dropout = 0.2, tts_cleaner_names = ["basic_cleaners"], tts_stop_threshold = -3.4, # Value below which audio generation ends. # For example, for a range of [-4, 4], this diff --git a/vocoder_preprocess.py b/vocoder_preprocess.py index b8951a3..95f9e5a 100644 --- a/vocoder_preprocess.py +++ b/vocoder_preprocess.py @@ -16,8 +16,8 @@ if __name__ == "__main__": parser.add_argument("datasets_root", type=str, help=\ "Path to the directory containing your SV2TTS directory. If you specify both --in_dir and " "--out_dir, this argument won't be used.") - parser.add_argument("--model_dir", type=str, - default="synthesizer/saved_models/train3/", help=\ + parser.add_argument("-m", "--model_dir", type=str, + default="synthesizer/saved_models/mandarin/", help=\ "Path to the pretrained model directory.") parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \ "Path to the synthesizer directory that contains the mel spectrograms, the wavs and the "