mirror of
https://github.com/babysor/MockingBird.git
synced 2024-03-22 13:11:31 +08:00
Support training your own vocoder
This commit is contained in:
parent
630023c7b2
commit
5950eea895
28
.vscode/launch.json
vendored
Normal file
28
.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
{
|
||||||
|
// 使用 IntelliSense 了解相关属性。
|
||||||
|
// 悬停以查看现有属性的描述。
|
||||||
|
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python: Vocoder Preprocess",
|
||||||
|
"type": "python",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "vocoder_preprocess.py",
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"args": [
|
||||||
|
"..\\..\\chs1"
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Python: Vocoder Train",
|
||||||
|
"type": "python",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "vocoder_train.py",
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"args": [
|
||||||
|
"dev", "..\\..\\chs1"
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -87,6 +87,3 @@ hparams = HParams(
|
||||||
silence_min_duration_split = 0.4, # Duration in seconds of a silence for an utterance to be split
|
silence_min_duration_split = 0.4, # Duration in seconds of a silence for an utterance to be split
|
||||||
utterance_min_duration = 1.6, # Duration in seconds below which utterances are discarded
|
utterance_min_duration = 1.6, # Duration in seconds below which utterances are discarded
|
||||||
)
|
)
|
||||||
|
|
||||||
def hparams_debug_string():
|
|
||||||
return str(hparams)
|
|
|
@ -1,6 +1,5 @@
|
||||||
import torch
|
import torch
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from synthesizer.hparams import hparams_debug_string
|
|
||||||
from synthesizer.synthesizer_dataset import SynthesizerDataset, collate_synthesizer
|
from synthesizer.synthesizer_dataset import SynthesizerDataset, collate_synthesizer
|
||||||
from synthesizer.models.tacotron import Tacotron
|
from synthesizer.models.tacotron import Tacotron
|
||||||
from synthesizer.utils.text import text_to_sequence
|
from synthesizer.utils.text import text_to_sequence
|
||||||
|
@ -8,13 +7,14 @@ from synthesizer.utils.symbols import symbols
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
def run_synthesis(in_dir, out_dir, model_dir, hparams):
|
def run_synthesis(in_dir, out_dir, model_dir, hparams):
|
||||||
# This generates ground truth-aligned mels for vocoder training
|
# This generates ground truth-aligned mels for vocoder training
|
||||||
synth_dir = Path(out_dir).joinpath("mels_gta")
|
synth_dir = Path(out_dir).joinpath("mels_gta")
|
||||||
synth_dir.mkdir(exist_ok=True)
|
synth_dir.mkdir(parents=True, exist_ok=True)
|
||||||
print(hparams_debug_string(hparams))
|
print(str(hparams))
|
||||||
|
|
||||||
# Check for GPU
|
# Check for GPU
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
|
@ -59,12 +59,12 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams):
|
||||||
metadata_fpath = in_dir.joinpath("train.txt")
|
metadata_fpath = in_dir.joinpath("train.txt")
|
||||||
mel_dir = in_dir.joinpath("mels")
|
mel_dir = in_dir.joinpath("mels")
|
||||||
embed_dir = in_dir.joinpath("embeds")
|
embed_dir = in_dir.joinpath("embeds")
|
||||||
|
num_workers = 0 if sys.platform.startswith("win") else 2;
|
||||||
dataset = SynthesizerDataset(metadata_fpath, mel_dir, embed_dir, hparams)
|
dataset = SynthesizerDataset(metadata_fpath, mel_dir, embed_dir, hparams)
|
||||||
data_loader = DataLoader(dataset,
|
data_loader = DataLoader(dataset,
|
||||||
collate_fn=lambda batch: collate_synthesizer(batch, r),
|
collate_fn=lambda batch: collate_synthesizer(batch),
|
||||||
batch_size=hparams.synthesis_batch_size,
|
batch_size=hparams.synthesis_batch_size,
|
||||||
num_workers=2,
|
num_workers=num_workers,
|
||||||
shuffle=False,
|
shuffle=False,
|
||||||
pin_memory=True)
|
pin_memory=True)
|
||||||
|
|
||||||
|
@ -78,9 +78,9 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams):
|
||||||
|
|
||||||
# Parallelize model onto GPUS using workaround due to python bug
|
# Parallelize model onto GPUS using workaround due to python bug
|
||||||
if device.type == "cuda" and torch.cuda.device_count() > 1:
|
if device.type == "cuda" and torch.cuda.device_count() > 1:
|
||||||
_, mels_out, _ = data_parallel_workaround(model, texts, mels, embeds)
|
_, mels_out, _ , _ = data_parallel_workaround(model, texts, mels, embeds)
|
||||||
else:
|
else:
|
||||||
_, mels_out, _ = model(texts, mels, embeds)
|
_, mels_out, _, _ = model(texts, mels, embeds)
|
||||||
|
|
||||||
for j, k in enumerate(idx):
|
for j, k in enumerate(idx):
|
||||||
# Note: outputs mel-spectrogram files and target ones have same names, just different folders
|
# Note: outputs mel-spectrogram files and target ones have same names, just different folders
|
||||||
|
|
|
@ -17,7 +17,7 @@ if __name__ == "__main__":
|
||||||
"Path to the directory containing your SV2TTS directory. If you specify both --in_dir and "
|
"Path to the directory containing your SV2TTS directory. If you specify both --in_dir and "
|
||||||
"--out_dir, this argument won't be used.")
|
"--out_dir, this argument won't be used.")
|
||||||
parser.add_argument("--model_dir", type=str,
|
parser.add_argument("--model_dir", type=str,
|
||||||
default="synthesizer/saved_models/pretrained/", help=\
|
default="synthesizer/saved_models/train3/", help=\
|
||||||
"Path to the pretrained model directory.")
|
"Path to the pretrained model directory.")
|
||||||
parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \
|
parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \
|
||||||
"Path to the synthesizer directory that contains the mel spectrograms, the wavs and the "
|
"Path to the synthesizer directory that contains the mel spectrograms, the wavs and the "
|
Loading…
Reference in New Issue
Block a user