Support training your own vocoder

2024-03-22 13:11:31 +08:00 · 2021-08-29 15:43:54 +08:00 · 2021-08-29 15:43:54 +08:00 · 5950eea895
commit 5950eea895
parent 630023c7b2
5 changed files with 37 additions and 12 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -0,0 +1,28 @@
+{
+    // 使用 IntelliSense 了解相关属性。 
+    // 悬停以查看现有属性的描述。
+    // 欲了解更多信息，请访问: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Vocoder Preprocess",
+            "type": "python",
+            "request": "launch",
+            "program": "vocoder_preprocess.py",
+            "console": "integratedTerminal",
+            "args": [
+                "..\\..\\chs1"
+            ],
+        },
+        {
+            "name": "Python: Vocoder Train",
+            "type": "python",
+            "request": "launch",
+            "program": "vocoder_train.py",
+            "console": "integratedTerminal",
+            "args": [
+                "dev", "..\\..\\chs1"
+            ],
+        }
+    ]
+}
--- a/synthesizer/hparams.py
+++ b/synthesizer/hparams.py
@ -87,6 +87,3 @@ hparams = HParams(
        silence_min_duration_split = 0.4,           # Duration in seconds of a silence for an utterance to be split
        utterance_min_duration = 1.6,               # Duration in seconds below which utterances are discarded
        )
-
-def hparams_debug_string():
-    return str(hparams)
--- a/synthesizer/synthesize.py
+++ b/synthesizer/synthesize.py
@ -1,6 +1,5 @@
 import torch
 from torch.utils.data import DataLoader
-from synthesizer.hparams import hparams_debug_string
 from synthesizer.synthesizer_dataset import SynthesizerDataset, collate_synthesizer
 from synthesizer.models.tacotron import Tacotron
 from synthesizer.utils.text import text_to_sequence
@ -8,13 +7,14 @@ from synthesizer.utils.symbols import symbols
 import numpy as np
 from pathlib import Path
 from tqdm import tqdm
+import sys


 def run_synthesis(in_dir, out_dir, model_dir, hparams):
    # This generates ground truth-aligned mels for vocoder training
    synth_dir = Path(out_dir).joinpath("mels_gta")
-    synth_dir.mkdir(exist_ok=True)
-    print(hparams_debug_string(hparams))
+    synth_dir.mkdir(parents=True, exist_ok=True)
+    print(str(hparams))

    # Check for GPU
    if torch.cuda.is_available():
@ -59,12 +59,12 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams):
    metadata_fpath = in_dir.joinpath("train.txt")
    mel_dir = in_dir.joinpath("mels")
    embed_dir = in_dir.joinpath("embeds")
-
+    num_workers = 0 if sys.platform.startswith("win") else 2;
    dataset = SynthesizerDataset(metadata_fpath, mel_dir, embed_dir, hparams)
    data_loader = DataLoader(dataset,
-                             collate_fn=lambda batch: collate_synthesizer(batch, r),
+                             collate_fn=lambda batch: collate_synthesizer(batch),
                             batch_size=hparams.synthesis_batch_size,
-                             num_workers=2,
+                             num_workers=num_workers,
                             shuffle=False,
                             pin_memory=True)

@ -78,9 +78,9 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams):

            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == "cuda" and torch.cuda.device_count() > 1:
-                _, mels_out, _ = data_parallel_workaround(model, texts, mels, embeds)
+                _, mels_out, _ , _ = data_parallel_workaround(model, texts, mels, embeds)
            else:
-                _, mels_out, _ = model(texts, mels, embeds)
+                _, mels_out, _, _  = model(texts, mels, embeds)

            for j, k in enumerate(idx):
                # Note: outputs mel-spectrogram files and target ones have same names, just different folders
--- a/archived_untest_files/vocoder_preprocess.py
+++ b/archived_untest_files/vocoder_preprocess.py
@ -17,7 +17,7 @@ if __name__ == "__main__":
        "Path to the directory containing your SV2TTS directory. If you specify both --in_dir and "
        "--out_dir, this argument won't be used.")
    parser.add_argument("--model_dir", type=str, 
-                        default="synthesizer/saved_models/pretrained/", help=\
+                        default="synthesizer/saved_models/train3/", help=\
        "Path to the pretrained model directory.")
    parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \
        "Path to the synthesizer directory that contains the mel spectrograms, the wavs and the "
--- a/archived_untest_files/vocoder_train.py
+++ b/archived_untest_files/vocoder_train.py