Support training your own vocoder

pull/75/head
babysor00 2021-08-29 15:43:54 +08:00
parent 630023c7b2
commit 5950eea895
5 changed files with 37 additions and 12 deletions

28
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,28 @@
{
// 使 IntelliSense
//
// 访: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Vocoder Preprocess",
"type": "python",
"request": "launch",
"program": "vocoder_preprocess.py",
"console": "integratedTerminal",
"args": [
"..\\..\\chs1"
],
},
{
"name": "Python: Vocoder Train",
"type": "python",
"request": "launch",
"program": "vocoder_train.py",
"console": "integratedTerminal",
"args": [
"dev", "..\\..\\chs1"
],
}
]
}

View File

@ -87,6 +87,3 @@ hparams = HParams(
silence_min_duration_split = 0.4, # Duration in seconds of a silence for an utterance to be split
utterance_min_duration = 1.6, # Duration in seconds below which utterances are discarded
)
def hparams_debug_string():
return str(hparams)

View File

@ -1,6 +1,5 @@
import torch
from torch.utils.data import DataLoader
from synthesizer.hparams import hparams_debug_string
from synthesizer.synthesizer_dataset import SynthesizerDataset, collate_synthesizer
from synthesizer.models.tacotron import Tacotron
from synthesizer.utils.text import text_to_sequence
@ -8,13 +7,14 @@ from synthesizer.utils.symbols import symbols
import numpy as np
from pathlib import Path
from tqdm import tqdm
import sys
def run_synthesis(in_dir, out_dir, model_dir, hparams):
# This generates ground truth-aligned mels for vocoder training
synth_dir = Path(out_dir).joinpath("mels_gta")
synth_dir.mkdir(exist_ok=True)
print(hparams_debug_string(hparams))
synth_dir.mkdir(parents=True, exist_ok=True)
print(str(hparams))
# Check for GPU
if torch.cuda.is_available():
@ -59,12 +59,12 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams):
metadata_fpath = in_dir.joinpath("train.txt")
mel_dir = in_dir.joinpath("mels")
embed_dir = in_dir.joinpath("embeds")
num_workers = 0 if sys.platform.startswith("win") else 2;
dataset = SynthesizerDataset(metadata_fpath, mel_dir, embed_dir, hparams)
data_loader = DataLoader(dataset,
collate_fn=lambda batch: collate_synthesizer(batch, r),
collate_fn=lambda batch: collate_synthesizer(batch),
batch_size=hparams.synthesis_batch_size,
num_workers=2,
num_workers=num_workers,
shuffle=False,
pin_memory=True)
@ -78,9 +78,9 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams):
# Parallelize model onto GPUS using workaround due to python bug
if device.type == "cuda" and torch.cuda.device_count() > 1:
_, mels_out, _ = data_parallel_workaround(model, texts, mels, embeds)
_, mels_out, _ , _ = data_parallel_workaround(model, texts, mels, embeds)
else:
_, mels_out, _ = model(texts, mels, embeds)
_, mels_out, _, _ = model(texts, mels, embeds)
for j, k in enumerate(idx):
# Note: outputs mel-spectrogram files and target ones have same names, just different folders

View File

@ -17,7 +17,7 @@ if __name__ == "__main__":
"Path to the directory containing your SV2TTS directory. If you specify both --in_dir and "
"--out_dir, this argument won't be used.")
parser.add_argument("--model_dir", type=str,
default="synthesizer/saved_models/pretrained/", help=\
default="synthesizer/saved_models/train3/", help=\
"Path to the pretrained model directory.")
parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \
"Path to the synthesizer directory that contains the mel spectrograms, the wavs and the "