Support training your own vocoder

This commit is contained in:
babysor00 2021-08-29 15:43:54 +08:00
parent 630023c7b2
commit 5950eea895
5 changed files with 37 additions and 12 deletions

28
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,28 @@
{
// 使 IntelliSense
//
// 访: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Vocoder Preprocess",
"type": "python",
"request": "launch",
"program": "vocoder_preprocess.py",
"console": "integratedTerminal",
"args": [
"..\\..\\chs1"
],
},
{
"name": "Python: Vocoder Train",
"type": "python",
"request": "launch",
"program": "vocoder_train.py",
"console": "integratedTerminal",
"args": [
"dev", "..\\..\\chs1"
],
}
]
}

View File

@ -87,6 +87,3 @@ hparams = HParams(
silence_min_duration_split = 0.4, # Duration in seconds of a silence for an utterance to be split silence_min_duration_split = 0.4, # Duration in seconds of a silence for an utterance to be split
utterance_min_duration = 1.6, # Duration in seconds below which utterances are discarded utterance_min_duration = 1.6, # Duration in seconds below which utterances are discarded
) )
def hparams_debug_string():
return str(hparams)

View File

@ -1,6 +1,5 @@
import torch import torch
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from synthesizer.hparams import hparams_debug_string
from synthesizer.synthesizer_dataset import SynthesizerDataset, collate_synthesizer from synthesizer.synthesizer_dataset import SynthesizerDataset, collate_synthesizer
from synthesizer.models.tacotron import Tacotron from synthesizer.models.tacotron import Tacotron
from synthesizer.utils.text import text_to_sequence from synthesizer.utils.text import text_to_sequence
@ -8,13 +7,14 @@ from synthesizer.utils.symbols import symbols
import numpy as np import numpy as np
from pathlib import Path from pathlib import Path
from tqdm import tqdm from tqdm import tqdm
import sys
def run_synthesis(in_dir, out_dir, model_dir, hparams): def run_synthesis(in_dir, out_dir, model_dir, hparams):
# This generates ground truth-aligned mels for vocoder training # This generates ground truth-aligned mels for vocoder training
synth_dir = Path(out_dir).joinpath("mels_gta") synth_dir = Path(out_dir).joinpath("mels_gta")
synth_dir.mkdir(exist_ok=True) synth_dir.mkdir(parents=True, exist_ok=True)
print(hparams_debug_string(hparams)) print(str(hparams))
# Check for GPU # Check for GPU
if torch.cuda.is_available(): if torch.cuda.is_available():
@ -59,12 +59,12 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams):
metadata_fpath = in_dir.joinpath("train.txt") metadata_fpath = in_dir.joinpath("train.txt")
mel_dir = in_dir.joinpath("mels") mel_dir = in_dir.joinpath("mels")
embed_dir = in_dir.joinpath("embeds") embed_dir = in_dir.joinpath("embeds")
num_workers = 0 if sys.platform.startswith("win") else 2;
dataset = SynthesizerDataset(metadata_fpath, mel_dir, embed_dir, hparams) dataset = SynthesizerDataset(metadata_fpath, mel_dir, embed_dir, hparams)
data_loader = DataLoader(dataset, data_loader = DataLoader(dataset,
collate_fn=lambda batch: collate_synthesizer(batch, r), collate_fn=lambda batch: collate_synthesizer(batch),
batch_size=hparams.synthesis_batch_size, batch_size=hparams.synthesis_batch_size,
num_workers=2, num_workers=num_workers,
shuffle=False, shuffle=False,
pin_memory=True) pin_memory=True)
@ -78,9 +78,9 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams):
# Parallelize model onto GPUS using workaround due to python bug # Parallelize model onto GPUS using workaround due to python bug
if device.type == "cuda" and torch.cuda.device_count() > 1: if device.type == "cuda" and torch.cuda.device_count() > 1:
_, mels_out, _ = data_parallel_workaround(model, texts, mels, embeds) _, mels_out, _ , _ = data_parallel_workaround(model, texts, mels, embeds)
else: else:
_, mels_out, _ = model(texts, mels, embeds) _, mels_out, _, _ = model(texts, mels, embeds)
for j, k in enumerate(idx): for j, k in enumerate(idx):
# Note: outputs mel-spectrogram files and target ones have same names, just different folders # Note: outputs mel-spectrogram files and target ones have same names, just different folders

View File

@ -17,7 +17,7 @@ if __name__ == "__main__":
"Path to the directory containing your SV2TTS directory. If you specify both --in_dir and " "Path to the directory containing your SV2TTS directory. If you specify both --in_dir and "
"--out_dir, this argument won't be used.") "--out_dir, this argument won't be used.")
parser.add_argument("--model_dir", type=str, parser.add_argument("--model_dir", type=str,
default="synthesizer/saved_models/pretrained/", help=\ default="synthesizer/saved_models/train3/", help=\
"Path to the pretrained model directory.") "Path to the pretrained model directory.")
parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \ parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \
"Path to the synthesizer directory that contains the mel spectrograms, the wavs and the " "Path to the synthesizer directory that contains the mel spectrograms, the wavs and the "