Solved the problem that the existing model could not be loaded when training the GAN model (#549)

* The new vocoder Fre-GAN is now supported

* Improved some fregan details

* Fixed the problem that the existing model could not be loaded to continue training when training GAN

* Updated reference papers
This commit is contained in:
flysmart 2022-05-13 13:41:03 +08:00 committed by GitHub
parent 0caed984e3
commit 350b190662
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 10 additions and 8 deletions

View File

@ -141,6 +141,7 @@
| --- | ----------- | ----- | --------------------- |
| [1803.09017](https://arxiv.org/abs/1803.09017) | GlobalStyleToken (synthesizer)| Style Tokens: Unsupervised Style Modeling, Control and Transfer in End-to-End Speech Synthesis | 本代码库 |
| [2010.05646](https://arxiv.org/abs/2010.05646) | HiFi-GAN (vocoder)| Generative Adversarial Networks for Efficient and High Fidelity Speech Synthesis | 本代码库 |
| [2106.02297](https://arxiv.org/abs/2106.02297) | Fre-GAN (vocoder)| Fre-GAN: Adversarial Frequency-consistent Audio Synthesis | 本代码库 |
|[**1806.04558**](https://arxiv.org/pdf/1806.04558.pdf) | SV2TTS | Transfer Learning from Speaker Verification to Multispeaker Text-To-Speech Synthesis | 本代码库 |
|[1802.08435](https://arxiv.org/pdf/1802.08435.pdf) | WaveRNN (vocoder) | Efficient Neural Audio Synthesis | [fatchord/WaveRNN](https://github.com/fatchord/WaveRNN) |
|[1703.10135](https://arxiv.org/pdf/1703.10135.pdf) | Tacotron (synthesizer) | Tacotron: Towards End-to-End Speech Synthesis | [fatchord/WaveRNN](https://github.com/fatchord/WaveRNN)

View File

@ -97,6 +97,7 @@ You can then try the toolbox:
| --- | ----------- | ----- | --------------------- |
| [1803.09017](https://arxiv.org/abs/1803.09017) | GlobalStyleToken (synthesizer)| Style Tokens: Unsupervised Style Modeling, Control and Transfer in End-to-End Speech Synthesis | This repo |
| [2010.05646](https://arxiv.org/abs/2010.05646) | HiFi-GAN (vocoder)| Generative Adversarial Networks for Efficient and High Fidelity Speech Synthesis | This repo |
| [2106.02297](https://arxiv.org/abs/2106.02297) | Fre-GAN (vocoder)| Fre-GAN: Adversarial Frequency-consistent Audio Synthesis | This repo |
|[**1806.04558**](https://arxiv.org/pdf/1806.04558.pdf) | **SV2TTS** | **Transfer Learning from Speaker Verification to Multispeaker Text-To-Speech Synthesis** | This repo |
|[1802.08435](https://arxiv.org/pdf/1802.08435.pdf) | WaveRNN (vocoder) | Efficient Neural Audio Synthesis | [fatchord/WaveRNN](https://github.com/fatchord/WaveRNN) |
|[1703.10135](https://arxiv.org/pdf/1703.10135.pdf) | Tacotron (synthesizer) | Tacotron: Towards End-to-End Speech Synthesis | [fatchord/WaveRNN](https://github.com/fatchord/WaveRNN)

View File

@ -51,8 +51,8 @@ def train(rank, a, h):
print("checkpoints directory : ", a.checkpoint_path)
if os.path.isdir(a.checkpoint_path):
cp_g = scan_checkpoint(a.checkpoint_path, 'g_')
cp_do = scan_checkpoint(a.checkpoint_path, 'do_')
cp_g = scan_checkpoint(a.checkpoint_path, 'g_fregan_')
cp_do = scan_checkpoint(a.checkpoint_path, 'do_fregan_')
steps = 0
if cp_g is None or cp_do is None:

View File

@ -58,7 +58,7 @@ def save_checkpoint(filepath, obj):
def scan_checkpoint(cp_dir, prefix):
pattern = os.path.join(cp_dir, prefix + '????????')
pattern = os.path.join(cp_dir, prefix + '????????.pt')
cp_list = glob.glob(pattern)
if len(cp_list) == 0:
return None

View File

@ -51,8 +51,8 @@ def train(rank, a, h):
print("checkpoints directory : ", a.checkpoint_path)
if os.path.isdir(a.checkpoint_path):
cp_g = scan_checkpoint(a.checkpoint_path, 'g_')
cp_do = scan_checkpoint(a.checkpoint_path, 'do_')
cp_g = scan_checkpoint(a.checkpoint_path, 'g_hifigan_')
cp_do = scan_checkpoint(a.checkpoint_path, 'do_hifigan_')
steps = 0
if cp_g is None or cp_do is None:
@ -181,10 +181,10 @@ def train(rank, a, h):
# checkpointing
if steps % a.checkpoint_interval == 0 and steps != 0:
checkpoint_path = "{}/g_{:08d}.pt".format(a.checkpoint_path, steps)
checkpoint_path = "{}/g_hifigan_{:08d}.pt".format(a.checkpoint_path, steps)
save_checkpoint(checkpoint_path,
{'generator': (generator.module if h.num_gpus > 1 else generator).state_dict()})
checkpoint_path = "{}/do_{:08d}.pt".format(a.checkpoint_path, steps)
checkpoint_path = "{}/do_hifigan_{:08d}.pt".format(a.checkpoint_path, steps)
save_checkpoint(checkpoint_path,
{'mpd': (mpd.module if h.num_gpus > 1 else mpd).state_dict(),
'msd': (msd.module if h.num_gpus > 1 else msd).state_dict(),

View File

@ -50,7 +50,7 @@ def save_checkpoint(filepath, obj):
def scan_checkpoint(cp_dir, prefix):
pattern = os.path.join(cp_dir, prefix + 'hifigan.pt')
pattern = os.path.join(cp_dir, prefix + '????????.pt')
cp_list = glob.glob(pattern)
if len(cp_list) == 0:
return None