mirror of
https://github.com/babysor/MockingBird.git
synced 2024-03-22 13:11:31 +08:00
Solved the problem that the existing model could not be loaded when training the GAN model (#549)
* The new vocoder Fre-GAN is now supported * Improved some fregan details * Fixed the problem that the existing model could not be loaded to continue training when training GAN * Updated reference papers
This commit is contained in:
parent
0caed984e3
commit
350b190662
|
@ -141,6 +141,7 @@
|
||||||
| --- | ----------- | ----- | --------------------- |
|
| --- | ----------- | ----- | --------------------- |
|
||||||
| [1803.09017](https://arxiv.org/abs/1803.09017) | GlobalStyleToken (synthesizer)| Style Tokens: Unsupervised Style Modeling, Control and Transfer in End-to-End Speech Synthesis | 本代码库 |
|
| [1803.09017](https://arxiv.org/abs/1803.09017) | GlobalStyleToken (synthesizer)| Style Tokens: Unsupervised Style Modeling, Control and Transfer in End-to-End Speech Synthesis | 本代码库 |
|
||||||
| [2010.05646](https://arxiv.org/abs/2010.05646) | HiFi-GAN (vocoder)| Generative Adversarial Networks for Efficient and High Fidelity Speech Synthesis | 本代码库 |
|
| [2010.05646](https://arxiv.org/abs/2010.05646) | HiFi-GAN (vocoder)| Generative Adversarial Networks for Efficient and High Fidelity Speech Synthesis | 本代码库 |
|
||||||
|
| [2106.02297](https://arxiv.org/abs/2106.02297) | Fre-GAN (vocoder)| Fre-GAN: Adversarial Frequency-consistent Audio Synthesis | 本代码库 |
|
||||||
|[**1806.04558**](https://arxiv.org/pdf/1806.04558.pdf) | SV2TTS | Transfer Learning from Speaker Verification to Multispeaker Text-To-Speech Synthesis | 本代码库 |
|
|[**1806.04558**](https://arxiv.org/pdf/1806.04558.pdf) | SV2TTS | Transfer Learning from Speaker Verification to Multispeaker Text-To-Speech Synthesis | 本代码库 |
|
||||||
|[1802.08435](https://arxiv.org/pdf/1802.08435.pdf) | WaveRNN (vocoder) | Efficient Neural Audio Synthesis | [fatchord/WaveRNN](https://github.com/fatchord/WaveRNN) |
|
|[1802.08435](https://arxiv.org/pdf/1802.08435.pdf) | WaveRNN (vocoder) | Efficient Neural Audio Synthesis | [fatchord/WaveRNN](https://github.com/fatchord/WaveRNN) |
|
||||||
|[1703.10135](https://arxiv.org/pdf/1703.10135.pdf) | Tacotron (synthesizer) | Tacotron: Towards End-to-End Speech Synthesis | [fatchord/WaveRNN](https://github.com/fatchord/WaveRNN)
|
|[1703.10135](https://arxiv.org/pdf/1703.10135.pdf) | Tacotron (synthesizer) | Tacotron: Towards End-to-End Speech Synthesis | [fatchord/WaveRNN](https://github.com/fatchord/WaveRNN)
|
||||||
|
|
|
@ -97,6 +97,7 @@ You can then try the toolbox:
|
||||||
| --- | ----------- | ----- | --------------------- |
|
| --- | ----------- | ----- | --------------------- |
|
||||||
| [1803.09017](https://arxiv.org/abs/1803.09017) | GlobalStyleToken (synthesizer)| Style Tokens: Unsupervised Style Modeling, Control and Transfer in End-to-End Speech Synthesis | This repo |
|
| [1803.09017](https://arxiv.org/abs/1803.09017) | GlobalStyleToken (synthesizer)| Style Tokens: Unsupervised Style Modeling, Control and Transfer in End-to-End Speech Synthesis | This repo |
|
||||||
| [2010.05646](https://arxiv.org/abs/2010.05646) | HiFi-GAN (vocoder)| Generative Adversarial Networks for Efficient and High Fidelity Speech Synthesis | This repo |
|
| [2010.05646](https://arxiv.org/abs/2010.05646) | HiFi-GAN (vocoder)| Generative Adversarial Networks for Efficient and High Fidelity Speech Synthesis | This repo |
|
||||||
|
| [2106.02297](https://arxiv.org/abs/2106.02297) | Fre-GAN (vocoder)| Fre-GAN: Adversarial Frequency-consistent Audio Synthesis | This repo |
|
||||||
|[**1806.04558**](https://arxiv.org/pdf/1806.04558.pdf) | **SV2TTS** | **Transfer Learning from Speaker Verification to Multispeaker Text-To-Speech Synthesis** | This repo |
|
|[**1806.04558**](https://arxiv.org/pdf/1806.04558.pdf) | **SV2TTS** | **Transfer Learning from Speaker Verification to Multispeaker Text-To-Speech Synthesis** | This repo |
|
||||||
|[1802.08435](https://arxiv.org/pdf/1802.08435.pdf) | WaveRNN (vocoder) | Efficient Neural Audio Synthesis | [fatchord/WaveRNN](https://github.com/fatchord/WaveRNN) |
|
|[1802.08435](https://arxiv.org/pdf/1802.08435.pdf) | WaveRNN (vocoder) | Efficient Neural Audio Synthesis | [fatchord/WaveRNN](https://github.com/fatchord/WaveRNN) |
|
||||||
|[1703.10135](https://arxiv.org/pdf/1703.10135.pdf) | Tacotron (synthesizer) | Tacotron: Towards End-to-End Speech Synthesis | [fatchord/WaveRNN](https://github.com/fatchord/WaveRNN)
|
|[1703.10135](https://arxiv.org/pdf/1703.10135.pdf) | Tacotron (synthesizer) | Tacotron: Towards End-to-End Speech Synthesis | [fatchord/WaveRNN](https://github.com/fatchord/WaveRNN)
|
||||||
|
|
|
@ -51,8 +51,8 @@ def train(rank, a, h):
|
||||||
print("checkpoints directory : ", a.checkpoint_path)
|
print("checkpoints directory : ", a.checkpoint_path)
|
||||||
|
|
||||||
if os.path.isdir(a.checkpoint_path):
|
if os.path.isdir(a.checkpoint_path):
|
||||||
cp_g = scan_checkpoint(a.checkpoint_path, 'g_')
|
cp_g = scan_checkpoint(a.checkpoint_path, 'g_fregan_')
|
||||||
cp_do = scan_checkpoint(a.checkpoint_path, 'do_')
|
cp_do = scan_checkpoint(a.checkpoint_path, 'do_fregan_')
|
||||||
|
|
||||||
steps = 0
|
steps = 0
|
||||||
if cp_g is None or cp_do is None:
|
if cp_g is None or cp_do is None:
|
||||||
|
|
|
@ -58,7 +58,7 @@ def save_checkpoint(filepath, obj):
|
||||||
|
|
||||||
|
|
||||||
def scan_checkpoint(cp_dir, prefix):
|
def scan_checkpoint(cp_dir, prefix):
|
||||||
pattern = os.path.join(cp_dir, prefix + '????????')
|
pattern = os.path.join(cp_dir, prefix + '????????.pt')
|
||||||
cp_list = glob.glob(pattern)
|
cp_list = glob.glob(pattern)
|
||||||
if len(cp_list) == 0:
|
if len(cp_list) == 0:
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -51,8 +51,8 @@ def train(rank, a, h):
|
||||||
print("checkpoints directory : ", a.checkpoint_path)
|
print("checkpoints directory : ", a.checkpoint_path)
|
||||||
|
|
||||||
if os.path.isdir(a.checkpoint_path):
|
if os.path.isdir(a.checkpoint_path):
|
||||||
cp_g = scan_checkpoint(a.checkpoint_path, 'g_')
|
cp_g = scan_checkpoint(a.checkpoint_path, 'g_hifigan_')
|
||||||
cp_do = scan_checkpoint(a.checkpoint_path, 'do_')
|
cp_do = scan_checkpoint(a.checkpoint_path, 'do_hifigan_')
|
||||||
|
|
||||||
steps = 0
|
steps = 0
|
||||||
if cp_g is None or cp_do is None:
|
if cp_g is None or cp_do is None:
|
||||||
|
@ -181,10 +181,10 @@ def train(rank, a, h):
|
||||||
|
|
||||||
# checkpointing
|
# checkpointing
|
||||||
if steps % a.checkpoint_interval == 0 and steps != 0:
|
if steps % a.checkpoint_interval == 0 and steps != 0:
|
||||||
checkpoint_path = "{}/g_{:08d}.pt".format(a.checkpoint_path, steps)
|
checkpoint_path = "{}/g_hifigan_{:08d}.pt".format(a.checkpoint_path, steps)
|
||||||
save_checkpoint(checkpoint_path,
|
save_checkpoint(checkpoint_path,
|
||||||
{'generator': (generator.module if h.num_gpus > 1 else generator).state_dict()})
|
{'generator': (generator.module if h.num_gpus > 1 else generator).state_dict()})
|
||||||
checkpoint_path = "{}/do_{:08d}.pt".format(a.checkpoint_path, steps)
|
checkpoint_path = "{}/do_hifigan_{:08d}.pt".format(a.checkpoint_path, steps)
|
||||||
save_checkpoint(checkpoint_path,
|
save_checkpoint(checkpoint_path,
|
||||||
{'mpd': (mpd.module if h.num_gpus > 1 else mpd).state_dict(),
|
{'mpd': (mpd.module if h.num_gpus > 1 else mpd).state_dict(),
|
||||||
'msd': (msd.module if h.num_gpus > 1 else msd).state_dict(),
|
'msd': (msd.module if h.num_gpus > 1 else msd).state_dict(),
|
||||||
|
|
|
@ -50,7 +50,7 @@ def save_checkpoint(filepath, obj):
|
||||||
|
|
||||||
|
|
||||||
def scan_checkpoint(cp_dir, prefix):
|
def scan_checkpoint(cp_dir, prefix):
|
||||||
pattern = os.path.join(cp_dir, prefix + 'hifigan.pt')
|
pattern = os.path.join(cp_dir, prefix + '????????.pt')
|
||||||
cp_list = glob.glob(pattern)
|
cp_list = glob.glob(pattern)
|
||||||
if len(cp_list) == 0:
|
if len(cp_list) == 0:
|
||||||
return None
|
return None
|
||||||
|
|
Loading…
Reference in New Issue
Block a user