parent
b402f9dbdf
commit
74a3fc97d0
|
@ -14,8 +14,8 @@
|
||||||
*.bcf
|
*.bcf
|
||||||
*.toc
|
*.toc
|
||||||
*.sh
|
*.sh
|
||||||
*/saved_models
|
data/ckpt
|
||||||
!vocoder/saved_models/pretrained/**
|
!data/ckpt/vocoder/pretrained/**
|
||||||
!encoder/saved_models/pretrained.pt
|
!data/ckpt/encoder/pretrained.pt
|
||||||
wavs
|
wavs
|
||||||
log
|
log
|
|
@ -1,9 +1,9 @@
|
||||||
from encoder.params_model import model_embedding_size as speaker_embedding_size
|
from models.encoder.params_model import model_embedding_size as speaker_embedding_size
|
||||||
from utils.argutils import print_args
|
from utils.argutils import print_args
|
||||||
from utils.modelutils import check_model_paths
|
from utils.modelutils import check_model_paths
|
||||||
from synthesizer.inference import Synthesizer
|
from models.synthesizer.inference import Synthesizer
|
||||||
from encoder import inference as encoder
|
from models.encoder import inference as encoder
|
||||||
from vocoder import inference as vocoder
|
from models.vocoder import inference as vocoder
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
from encoder.preprocess import preprocess_librispeech, preprocess_voxceleb1, preprocess_voxceleb2, preprocess_aidatatang_200zh
|
|
||||||
from utils.argutils import print_args
|
|
||||||
from pathlib import Path
|
|
||||||
import argparse
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from models.encoder.preprocess import (preprocess_aidatatang_200zh,
|
||||||
|
preprocess_librispeech, preprocess_voxceleb1,
|
||||||
|
preprocess_voxceleb2)
|
||||||
|
from utils.argutils import print_args
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
|
class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
|
|
@ -1,5 +1,5 @@
|
||||||
from utils.argutils import print_args
|
from utils.argutils import print_args
|
||||||
from encoder.train import train
|
from models.encoder.train import train
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import argparse
|
import argparse
|
||||||
|
|
|
@ -3,7 +3,7 @@ import torch
|
||||||
import argparse
|
import argparse
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from utils.load_yaml import HpsYaml
|
from utils.load_yaml import HpsYaml
|
||||||
from ppg2mel.train.train_linglf02mel_seq2seq_oneshotvc import Solver
|
from models.ppg2mel.train.train_linglf02mel_seq2seq_oneshotvc import Solver
|
||||||
|
|
||||||
# For reproducibility, comment these may speed up training
|
# For reproducibility, comment these may speed up training
|
||||||
torch.backends.cudnn.deterministic = True
|
torch.backends.cudnn.deterministic = True
|
|
@ -1,7 +1,7 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from ppg2mel.preprocess import preprocess_dataset
|
from models.ppg2mel.preprocess import preprocess_dataset
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import argparse
|
import argparse
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from synthesizer.preprocess import preprocess_dataset
|
from models.synthesizer.preprocess import preprocess_dataset
|
||||||
from synthesizer.hparams import hparams
|
from models.synthesizer.hparams import hparams
|
||||||
from utils.argutils import print_args
|
from utils.argutils import print_args
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import argparse
|
import argparse
|
|
@ -1,4 +1,4 @@
|
||||||
from synthesizer.preprocess import create_embeddings
|
from models.synthesizer.preprocess import create_embeddings
|
||||||
from utils.argutils import print_args
|
from utils.argutils import print_args
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import argparse
|
import argparse
|
|
@ -1,5 +1,5 @@
|
||||||
from synthesizer.hparams import hparams
|
from models.synthesizer.hparams import hparams
|
||||||
from synthesizer.train import train
|
from models.synthesizer.train import train
|
||||||
from utils.argutils import print_args
|
from utils.argutils import print_args
|
||||||
import argparse
|
import argparse
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from synthesizer.synthesize import run_synthesis
|
from models.synthesizer.synthesize import run_synthesis
|
||||||
from synthesizer.hparams import hparams
|
from models.synthesizer.hparams import hparams
|
||||||
from utils.argutils import print_args
|
from utils.argutils import print_args
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
|
@ -1,7 +1,7 @@
|
||||||
from utils.argutils import print_args
|
from utils.argutils import print_args
|
||||||
from vocoder.wavernn.train import train
|
from models.vocoder.wavernn.train import train
|
||||||
from vocoder.hifigan.train import train as train_hifigan
|
from models.vocoder.hifigan.train import train as train_hifigan
|
||||||
from vocoder.fregan.train import train as train_fregan
|
from models.vocoder.fregan.train import train as train_fregan
|
||||||
from utils.util import AttrDict
|
from utils.util import AttrDict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import argparse
|
import argparse
|
|
@ -2,22 +2,22 @@ from pydantic import BaseModel, Field
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from encoder import inference as encoder
|
from models.encoder import inference as encoder
|
||||||
import librosa
|
import librosa
|
||||||
from scipy.io.wavfile import write
|
from scipy.io.wavfile import write
|
||||||
import re
|
import re
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from mkgui.base.components.types import FileContent
|
from control.mkgui.base.components.types import FileContent
|
||||||
from vocoder.hifigan import inference as gan_vocoder
|
from models.vocoder.hifigan import inference as gan_vocoder
|
||||||
from synthesizer.inference import Synthesizer
|
from models.synthesizer.inference import Synthesizer
|
||||||
from typing import Any, Tuple
|
from typing import Any, Tuple
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
AUDIO_SAMPLES_DIR = f"samples{os.sep}"
|
AUDIO_SAMPLES_DIR = f"data{os.sep}samples{os.sep}"
|
||||||
SYN_MODELS_DIRT = f"synthesizer{os.sep}saved_models"
|
SYN_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}synthesizer"
|
||||||
ENC_MODELS_DIRT = f"encoder{os.sep}saved_models"
|
ENC_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}encoder"
|
||||||
VOC_MODELS_DIRT = f"vocoder{os.sep}saved_models"
|
VOC_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}vocoder"
|
||||||
TEMP_SOURCE_AUDIO = f"wavs{os.sep}temp_source.wav"
|
TEMP_SOURCE_AUDIO = f"wavs{os.sep}temp_source.wav"
|
||||||
TEMP_RESULT_AUDIO = f"wavs{os.sep}temp_result.wav"
|
TEMP_RESULT_AUDIO = f"wavs{os.sep}temp_result.wav"
|
||||||
if not os.path.isdir("wavs"):
|
if not os.path.isdir("wavs"):
|
||||||
|
@ -31,7 +31,7 @@ if os.path.isdir(SYN_MODELS_DIRT):
|
||||||
synthesizers = Enum('synthesizers', list((file.name, file) for file in Path(SYN_MODELS_DIRT).glob("**/*.pt")))
|
synthesizers = Enum('synthesizers', list((file.name, file) for file in Path(SYN_MODELS_DIRT).glob("**/*.pt")))
|
||||||
print("Loaded synthesizer models: " + str(len(synthesizers)))
|
print("Loaded synthesizer models: " + str(len(synthesizers)))
|
||||||
else:
|
else:
|
||||||
raise Exception(f"Model folder {SYN_MODELS_DIRT} doesn't exist.")
|
raise Exception(f"Model folder {SYN_MODELS_DIRT} doesn't exist. 请将模型文件位置移动到上述位置中进行重试!")
|
||||||
|
|
||||||
if os.path.isdir(ENC_MODELS_DIRT):
|
if os.path.isdir(ENC_MODELS_DIRT):
|
||||||
encoders = Enum('encoders', list((file.name, file) for file in Path(ENC_MODELS_DIRT).glob("**/*.pt")))
|
encoders = Enum('encoders', list((file.name, file) for file in Path(ENC_MODELS_DIRT).glob("**/*.pt")))
|
|
@ -1,27 +1,26 @@
|
||||||
from synthesizer.inference import Synthesizer
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
from encoder import inference as speacker_encoder
|
|
||||||
import torch
|
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import ppg_extractor as Extractor
|
from pathlib import Path
|
||||||
import ppg2mel as Convertor
|
|
||||||
import librosa
|
|
||||||
from scipy.io.wavfile import write
|
|
||||||
import re
|
|
||||||
import numpy as np
|
|
||||||
from mkgui.base.components.types import FileContent
|
|
||||||
from vocoder.hifigan import inference as gan_vocoder
|
|
||||||
from typing import Any, Tuple
|
from typing import Any, Tuple
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
|
|
||||||
|
import librosa
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import torch
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from scipy.io.wavfile import write
|
||||||
|
|
||||||
|
import models.ppg2mel as Convertor
|
||||||
|
import models.ppg_extractor as Extractor
|
||||||
|
from control.mkgui.base.components.types import FileContent
|
||||||
|
from models.encoder import inference as speacker_encoder
|
||||||
|
from models.synthesizer.inference import Synthesizer
|
||||||
|
from models.vocoder.hifigan import inference as gan_vocoder
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
AUDIO_SAMPLES_DIR = f'samples{os.sep}'
|
AUDIO_SAMPLES_DIR = f'data{os.sep}samples{os.sep}'
|
||||||
EXT_MODELS_DIRT = f'ppg_extractor{os.sep}saved_models'
|
EXT_MODELS_DIRT = f'data{os.sep}ckpt{os.sep}ppg_extractor'
|
||||||
CONV_MODELS_DIRT = f'ppg2mel{os.sep}saved_models'
|
CONV_MODELS_DIRT = f'data{os.sep}ckpt{os.sep}ppg2mel'
|
||||||
VOC_MODELS_DIRT = f'vocoder{os.sep}saved_models'
|
VOC_MODELS_DIRT = f'data{os.sep}ckpt{os.sep}vocoder'
|
||||||
TEMP_SOURCE_AUDIO = f'wavs{os.sep}temp_source.wav'
|
TEMP_SOURCE_AUDIO = f'wavs{os.sep}temp_source.wav'
|
||||||
TEMP_TARGET_AUDIO = f'wavs{os.sep}temp_target.wav'
|
TEMP_TARGET_AUDIO = f'wavs{os.sep}temp_target.wav'
|
||||||
TEMP_RESULT_AUDIO = f'wavs{os.sep}temp_result.wav'
|
TEMP_RESULT_AUDIO = f'wavs{os.sep}temp_result.wav'
|
||||||
|
@ -132,9 +131,10 @@ def convert(input: Input) -> Output:
|
||||||
|
|
||||||
ppg = extractor.extract_from_wav(src_wav)
|
ppg = extractor.extract_from_wav(src_wav)
|
||||||
# Import necessary dependency of Voice Conversion
|
# Import necessary dependency of Voice Conversion
|
||||||
from utils.f0_utils import compute_f0, f02lf0, compute_mean_std, get_converted_lf0uv
|
from utils.f0_utils import (compute_f0, compute_mean_std, f02lf0,
|
||||||
|
get_converted_lf0uv)
|
||||||
ref_lf0_mean, ref_lf0_std = compute_mean_std(f02lf0(compute_f0(ref_wav)))
|
ref_lf0_mean, ref_lf0_std = compute_mean_std(f02lf0(compute_f0(ref_wav)))
|
||||||
speacker_encoder.load_model(Path(f"encoder{os.sep}saved_models{os.sep}pretrained_bak_5805000.pt"))
|
speacker_encoder.load_model(Path(f"data{os.sep}ckpt{os.sep}encoder{os.sep}pretrained_bak_5805000.pt"))
|
||||||
embed = speacker_encoder.embed_utterance(ref_wav)
|
embed = speacker_encoder.embed_utterance(ref_wav)
|
||||||
lf0_uv = get_converted_lf0uv(src_wav, ref_lf0_mean, ref_lf0_std, convert=True)
|
lf0_uv = get_converted_lf0uv(src_wav, ref_lf0_mean, ref_lf0_std, convert=True)
|
||||||
min_len = min(ppg.shape[1], len(lf0_uv))
|
min_len = min(ppg.shape[1], len(lf0_uv))
|
|
@ -14,14 +14,13 @@ from fastapi.encoders import jsonable_encoder
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from pydantic import BaseModel, ValidationError, parse_obj_as
|
from pydantic import BaseModel, ValidationError, parse_obj_as
|
||||||
|
|
||||||
from mkgui.base import Opyrator
|
from control.mkgui.base import Opyrator
|
||||||
from mkgui.base.core import name_to_title
|
from control.mkgui.base.core import name_to_title
|
||||||
from mkgui.base.ui import schema_utils
|
from . import schema_utils
|
||||||
from mkgui.base.ui.streamlit_utils import CUSTOM_STREAMLIT_CSS
|
from .streamlit_utils import CUSTOM_STREAMLIT_CSS
|
||||||
|
|
||||||
STREAMLIT_RUNNER_SNIPPET = """
|
STREAMLIT_RUNNER_SNIPPET = """
|
||||||
from mkgui.base.ui import render_streamlit_ui
|
from control.mkgui.base.ui import render_streamlit_ui
|
||||||
from mkgui.base import Opyrator
|
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
|
||||||
|
@ -807,18 +806,18 @@ class OutputUI:
|
||||||
|
|
||||||
def getOpyrator(mode: str) -> Opyrator:
|
def getOpyrator(mode: str) -> Opyrator:
|
||||||
if mode == None or mode.startswith('VC'):
|
if mode == None or mode.startswith('VC'):
|
||||||
from mkgui.app_vc import convert
|
from control.mkgui.app_vc import convert
|
||||||
return Opyrator(convert)
|
return Opyrator(convert)
|
||||||
if mode == None or mode.startswith('预处理'):
|
if mode == None or mode.startswith('预处理'):
|
||||||
from mkgui.preprocess import preprocess
|
from control.mkgui.preprocess import preprocess
|
||||||
return Opyrator(preprocess)
|
return Opyrator(preprocess)
|
||||||
if mode == None or mode.startswith('模型训练'):
|
if mode == None or mode.startswith('模型训练'):
|
||||||
from mkgui.train import train
|
from control.mkgui.train import train
|
||||||
return Opyrator(train)
|
return Opyrator(train)
|
||||||
if mode == None or mode.startswith('模型训练(VC)'):
|
if mode == None or mode.startswith('模型训练(VC)'):
|
||||||
from mkgui.train_vc import train_vc
|
from control.mkgui.train_vc import train_vc
|
||||||
return Opyrator(train_vc)
|
return Opyrator(train_vc)
|
||||||
from mkgui.app import synthesize
|
from control.mkgui.app import synthesize
|
||||||
return Opyrator(synthesize)
|
return Opyrator(synthesize)
|
||||||
|
|
||||||
|
|
||||||
|
@ -845,7 +844,7 @@ def render_streamlit_ui() -> None:
|
||||||
col2.title(title)
|
col2.title(title)
|
||||||
col2.markdown("欢迎使用MockingBird Web 2")
|
col2.markdown("欢迎使用MockingBird Web 2")
|
||||||
|
|
||||||
image = Image.open(path.join('mkgui', 'static', 'mb.png'))
|
image = Image.open(path.join('control','mkgui', 'static', 'mb.png'))
|
||||||
col1.image(image)
|
col1.image(image)
|
||||||
|
|
||||||
st.markdown("---")
|
st.markdown("---")
|
|
@ -6,8 +6,8 @@ from typing import Any, Tuple
|
||||||
|
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
EXT_MODELS_DIRT = f"ppg_extractor{os.sep}saved_models"
|
EXT_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}ppg_extractor"
|
||||||
ENC_MODELS_DIRT = f"encoder{os.sep}saved_models"
|
ENC_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}encoder"
|
||||||
|
|
||||||
|
|
||||||
if os.path.isdir(EXT_MODELS_DIRT):
|
if os.path.isdir(EXT_MODELS_DIRT):
|
||||||
|
@ -83,7 +83,7 @@ def preprocess(input: Input) -> Output:
|
||||||
"""Preprocess(预处理)"""
|
"""Preprocess(预处理)"""
|
||||||
finished = 0
|
finished = 0
|
||||||
if input.model == Model.VC_PPG2MEL:
|
if input.model == Model.VC_PPG2MEL:
|
||||||
from ppg2mel.preprocess import preprocess_dataset
|
from models.ppg2mel.preprocess import preprocess_dataset
|
||||||
finished = preprocess_dataset(
|
finished = preprocess_dataset(
|
||||||
datasets_root=Path(input.datasets_root),
|
datasets_root=Path(input.datasets_root),
|
||||||
dataset=input.dataset,
|
dataset=input.dataset,
|
Before Width: | Height: | Size: 5.6 KiB After Width: | Height: | Size: 5.6 KiB |
|
@ -3,17 +3,17 @@ import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from synthesizer.hparams import hparams
|
from models.synthesizer.hparams import hparams
|
||||||
from synthesizer.train import train as synt_train
|
from models.synthesizer.train import train as synt_train
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
SYN_MODELS_DIRT = f"synthesizer{os.sep}saved_models"
|
SYN_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}synthesizer"
|
||||||
ENC_MODELS_DIRT = f"encoder{os.sep}saved_models"
|
ENC_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}encoder"
|
||||||
|
|
||||||
|
|
||||||
# EXT_MODELS_DIRT = f"ppg_extractor{os.sep}saved_models"
|
# EXT_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}ppg_extractor"
|
||||||
# CONV_MODELS_DIRT = f"ppg2mel{os.sep}saved_models"
|
# CONV_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}ppg2mel"
|
||||||
# ENC_MODELS_DIRT = f"encoder{os.sep}saved_models"
|
# ENC_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}encoder"
|
||||||
|
|
||||||
# Pre-Load models
|
# Pre-Load models
|
||||||
if os.path.isdir(SYN_MODELS_DIRT):
|
if os.path.isdir(SYN_MODELS_DIRT):
|
||||||
|
@ -96,7 +96,7 @@ def train(input: Input) -> Output:
|
||||||
synt_train(
|
synt_train(
|
||||||
input.run_id,
|
input.run_id,
|
||||||
input.input_root,
|
input.input_root,
|
||||||
f"synthesizer{os.sep}saved_models",
|
f"data{os.sep}ckpt{os.sep}synthesizer",
|
||||||
input.save_every,
|
input.save_every,
|
||||||
input.backup_every,
|
input.backup_every,
|
||||||
input.log_every,
|
input.log_every,
|
|
@ -9,9 +9,9 @@ from utils.util import AttrDict
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
EXT_MODELS_DIRT = f"ppg_extractor{os.sep}saved_models"
|
EXT_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}ppg_extractor"
|
||||||
CONV_MODELS_DIRT = f"ppg2mel{os.sep}saved_models"
|
CONV_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}ppg2mel"
|
||||||
ENC_MODELS_DIRT = f"encoder{os.sep}saved_models"
|
ENC_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}encoder"
|
||||||
|
|
||||||
|
|
||||||
if os.path.isdir(EXT_MODELS_DIRT):
|
if os.path.isdir(EXT_MODELS_DIRT):
|
||||||
|
@ -144,7 +144,7 @@ def train_vc(input: Input) -> Output:
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
torch.cuda.manual_seed_all(input.seed)
|
torch.cuda.manual_seed_all(input.seed)
|
||||||
mode = "train"
|
mode = "train"
|
||||||
from ppg2mel.train.train_linglf02mel_seq2seq_oneshotvc import Solver
|
from models.ppg2mel.train.train_linglf02mel_seq2seq_oneshotvc import Solver
|
||||||
solver = Solver(config, params, mode)
|
solver = Solver(config, params, mode)
|
||||||
solver.load_data()
|
solver.load_data()
|
||||||
solver.set_model()
|
solver.set_model()
|
|
@ -1,12 +1,12 @@
|
||||||
from toolbox.ui import UI
|
from control.toolbox.ui import UI
|
||||||
from encoder import inference as encoder
|
from models.encoder import inference as encoder
|
||||||
from synthesizer.inference import Synthesizer
|
from models.synthesizer.inference import Synthesizer
|
||||||
from vocoder.wavernn import inference as rnn_vocoder
|
from models.vocoder.wavernn import inference as rnn_vocoder
|
||||||
from vocoder.hifigan import inference as gan_vocoder
|
from models.vocoder.hifigan import inference as gan_vocoder
|
||||||
from vocoder.fregan import inference as fgan_vocoder
|
from models.vocoder.fregan import inference as fgan_vocoder
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from time import perf_counter as timer
|
from time import perf_counter as timer
|
||||||
from toolbox.utterance import Utterance
|
from control.toolbox.utterance import Utterance
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import traceback
|
import traceback
|
||||||
import sys
|
import sys
|
||||||
|
@ -396,7 +396,7 @@ class Toolbox:
|
||||||
self.ui.log("Loading the extractor %s... " % model_fpath)
|
self.ui.log("Loading the extractor %s... " % model_fpath)
|
||||||
self.ui.set_loading(1)
|
self.ui.set_loading(1)
|
||||||
start = timer()
|
start = timer()
|
||||||
import ppg_extractor as extractor
|
import models.ppg_extractor as extractor
|
||||||
self.extractor = extractor.load_model(model_fpath)
|
self.extractor = extractor.load_model(model_fpath)
|
||||||
self.ui.log("Done (%dms)." % int(1000 * (timer() - start)), "append")
|
self.ui.log("Done (%dms)." % int(1000 * (timer() - start)), "append")
|
||||||
self.ui.set_loading(0)
|
self.ui.set_loading(0)
|
||||||
|
@ -408,7 +408,7 @@ class Toolbox:
|
||||||
self.ui.log("Loading the convertor %s... " % model_fpath)
|
self.ui.log("Loading the convertor %s... " % model_fpath)
|
||||||
self.ui.set_loading(1)
|
self.ui.set_loading(1)
|
||||||
start = timer()
|
start = timer()
|
||||||
import ppg2mel as convertor
|
import models.ppg2mel as convertor
|
||||||
self.convertor = convertor.load_model( model_fpath)
|
self.convertor = convertor.load_model( model_fpath)
|
||||||
self.ui.log("Done (%dms)." % int(1000 * (timer() - start)), "append")
|
self.ui.log("Done (%dms)." % int(1000 * (timer() - start)), "append")
|
||||||
self.ui.set_loading(0)
|
self.ui.set_loading(0)
|
Before Width: | Height: | Size: 5.6 KiB After Width: | Height: | Size: 5.6 KiB |
|
@ -4,8 +4,8 @@ from PyQt5.QtWidgets import *
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
|
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
|
||||||
from matplotlib.figure import Figure
|
from matplotlib.figure import Figure
|
||||||
from encoder.inference import plot_embedding_as_heatmap
|
from models.encoder.inference import plot_embedding_as_heatmap
|
||||||
from toolbox.utterance import Utterance
|
from control.toolbox.utterance import Utterance
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Set
|
from typing import List, Set
|
||||||
import sounddevice as sd
|
import sounddevice as sd
|
|
@ -1,5 +1,5 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from toolbox import Toolbox
|
from control.toolbox import Toolbox
|
||||||
from utils.argutils import print_args
|
from utils.argutils import print_args
|
||||||
from utils.modelutils import check_model_paths
|
from utils.modelutils import check_model_paths
|
||||||
import argparse
|
import argparse
|
||||||
|
|
|
@ -1,2 +0,0 @@
|
||||||
from encoder.data_objects.speaker_verification_dataset import SpeakerVerificationDataset
|
|
||||||
from encoder.data_objects.speaker_verification_dataset import SpeakerVerificationDataLoader
|
|
Binary file not shown.
14
gen_voice.py
14
gen_voice.py
|
@ -1,23 +1,15 @@
|
||||||
from encoder.params_model import model_embedding_size as speaker_embedding_size
|
from models.synthesizer.inference import Synthesizer
|
||||||
from utils.argutils import print_args
|
from models.encoder import inference as encoder
|
||||||
from utils.modelutils import check_model_paths
|
from models.vocoder.hifigan import inference as gan_vocoder
|
||||||
from synthesizer.inference import Synthesizer
|
|
||||||
from encoder import inference as encoder
|
|
||||||
from vocoder.wavernn import inference as rnn_vocoder
|
|
||||||
from vocoder.hifigan import inference as gan_vocoder
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
import librosa
|
|
||||||
import argparse
|
|
||||||
import torch
|
import torch
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import cn2an
|
import cn2an
|
||||||
import glob
|
|
||||||
|
|
||||||
from audioread.exceptions import NoBackendError
|
|
||||||
vocoder = gan_vocoder
|
vocoder = gan_vocoder
|
||||||
|
|
||||||
def gen_one_wav(synthesizer, in_fpath, embed, texts, file_name, seq):
|
def gen_one_wav(synthesizer, in_fpath, embed, texts, file_name, seq):
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from scipy.ndimage.morphology import binary_dilation
|
from scipy.ndimage.morphology import binary_dilation
|
||||||
from encoder.params_data import *
|
from models.encoder.params_data import *
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union
|
||||||
from warnings import warn
|
from warnings import warn
|
|
@ -0,0 +1,2 @@
|
||||||
|
from models.encoder.data_objects.speaker_verification_dataset import SpeakerVerificationDataset
|
||||||
|
from models.encoder.data_objects.speaker_verification_dataset import SpeakerVerificationDataLoader
|
|
@ -1,5 +1,5 @@
|
||||||
from encoder.data_objects.random_cycler import RandomCycler
|
from models.encoder.data_objects.random_cycler import RandomCycler
|
||||||
from encoder.data_objects.utterance import Utterance
|
from models.encoder.data_objects.utterance import Utterance
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Contains the set of utterances of a single speaker
|
# Contains the set of utterances of a single speaker
|
|
@ -1,6 +1,6 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from typing import List
|
from typing import List
|
||||||
from encoder.data_objects.speaker import Speaker
|
from models.encoder.data_objects.speaker import Speaker
|
||||||
|
|
||||||
class SpeakerBatch:
|
class SpeakerBatch:
|
||||||
def __init__(self, speakers: List[Speaker], utterances_per_speaker: int, n_frames: int):
|
def __init__(self, speakers: List[Speaker], utterances_per_speaker: int, n_frames: int):
|
|
@ -1,7 +1,7 @@
|
||||||
from encoder.data_objects.random_cycler import RandomCycler
|
from models.encoder.data_objects.random_cycler import RandomCycler
|
||||||
from encoder.data_objects.speaker_batch import SpeakerBatch
|
from models.encoder.data_objects.speaker_batch import SpeakerBatch
|
||||||
from encoder.data_objects.speaker import Speaker
|
from models.encoder.data_objects.speaker import Speaker
|
||||||
from encoder.params_data import partials_n_frames
|
from models.encoder.params_data import partials_n_frames
|
||||||
from torch.utils.data import Dataset, DataLoader
|
from torch.utils.data import Dataset, DataLoader
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
from encoder.params_data import *
|
from models.encoder.params_data import *
|
||||||
from encoder.model import SpeakerEncoder
|
from models.encoder.model import SpeakerEncoder
|
||||||
from encoder.audio import preprocess_wav # We want to expose this function from here
|
from models.encoder.audio import preprocess_wav # We want to expose this function from here
|
||||||
from matplotlib import cm
|
from matplotlib import cm
|
||||||
from encoder import audio
|
from models.encoder import audio
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
|
@ -1,5 +1,5 @@
|
||||||
from encoder.params_model import *
|
from models.encoder.params_model import *
|
||||||
from encoder.params_data import *
|
from models.encoder.params_data import *
|
||||||
from scipy.interpolate import interp1d
|
from scipy.interpolate import interp1d
|
||||||
from sklearn.metrics import roc_curve
|
from sklearn.metrics import roc_curve
|
||||||
from torch.nn.utils import clip_grad_norm_
|
from torch.nn.utils import clip_grad_norm_
|
|
@ -1,8 +1,8 @@
|
||||||
from multiprocess.pool import ThreadPool
|
from multiprocess.pool import ThreadPool
|
||||||
from encoder.params_data import *
|
from models.encoder.params_data import *
|
||||||
from encoder.config import librispeech_datasets, anglophone_nationalites
|
from models.encoder.config import librispeech_datasets, anglophone_nationalites
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from encoder import audio
|
from models.encoder import audio
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -22,7 +22,7 @@ class DatasetLog:
|
||||||
self._log_params()
|
self._log_params()
|
||||||
|
|
||||||
def _log_params(self):
|
def _log_params(self):
|
||||||
from encoder import params_data
|
from models.encoder import params_data
|
||||||
self.write_line("Parameter values:")
|
self.write_line("Parameter values:")
|
||||||
for param_name in (p for p in dir(params_data) if not p.startswith("__")):
|
for param_name in (p for p in dir(params_data) if not p.startswith("__")):
|
||||||
value = getattr(params_data, param_name)
|
value = getattr(params_data, param_name)
|
|
@ -1,7 +1,7 @@
|
||||||
from encoder.visualizations import Visualizations
|
from models.encoder.visualizations import Visualizations
|
||||||
from encoder.data_objects import SpeakerVerificationDataLoader, SpeakerVerificationDataset
|
from models.encoder.data_objects import SpeakerVerificationDataLoader, SpeakerVerificationDataset
|
||||||
from encoder.params_model import *
|
from models.encoder.params_model import *
|
||||||
from encoder.model import SpeakerEncoder
|
from models.encoder.model import SpeakerEncoder
|
||||||
from utils.profiler import Profiler
|
from utils.profiler import Profiler
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import torch
|
import torch
|
|
@ -1,4 +1,4 @@
|
||||||
from encoder.data_objects.speaker_verification_dataset import SpeakerVerificationDataset
|
from models.encoder.data_objects.speaker_verification_dataset import SpeakerVerificationDataset
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from time import perf_counter as timer
|
from time import perf_counter as timer
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
@ -65,8 +65,8 @@ class Visualizations:
|
||||||
def log_params(self):
|
def log_params(self):
|
||||||
if self.disabled:
|
if self.disabled:
|
||||||
return
|
return
|
||||||
from encoder import params_data
|
from models.encoder import params_data
|
||||||
from encoder import params_model
|
from models.encoder import params_model
|
||||||
param_string = "<b>Model parameters</b>:<br>"
|
param_string = "<b>Model parameters</b>:<br>"
|
||||||
for param_name in (p for p in dir(params_model) if not p.startswith("__")):
|
for param_name in (p for p in dir(params_model) if not p.startswith("__")):
|
||||||
value = getattr(params_model, param_name)
|
value = getattr(params_model, param_name)
|
|
@ -7,10 +7,10 @@ from pathlib import Path
|
||||||
import soundfile
|
import soundfile
|
||||||
import resampy
|
import resampy
|
||||||
|
|
||||||
from ppg_extractor import load_model
|
from models.ppg_extractor import load_model
|
||||||
import encoder.inference as Encoder
|
import encoder.inference as Encoder
|
||||||
from encoder.audio import preprocess_wav
|
from models.encoder.audio import preprocess_wav
|
||||||
from encoder import audio
|
from models.encoder import audio
|
||||||
from utils.f0_utils import compute_f0
|
from utils.f0_utils import compute_f0
|
||||||
|
|
||||||
from torch.multiprocessing import Pool, cpu_count
|
from torch.multiprocessing import Pool, cpu_count
|
|
@ -3,7 +3,7 @@ import torch
|
||||||
import argparse
|
import argparse
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from utils.load_yaml import HpsYaml
|
from utils.load_yaml import HpsYaml
|
||||||
from ppg2mel.train.train_linglf02mel_seq2seq_oneshotvc import Solver
|
from models.ppg2mel.train.train_linglf02mel_seq2seq_oneshotvc import Solver
|
||||||
|
|
||||||
# For reproducibility, comment these may speed up training
|
# For reproducibility, comment these may speed up training
|
||||||
torch.backends.cudnn.deterministic = True
|
torch.backends.cudnn.deterministic = True
|
|
@ -14,7 +14,7 @@ from utils.data_load import OneshotVcDataset, MultiSpkVcCollate
|
||||||
from .loss import MaskedMSELoss
|
from .loss import MaskedMSELoss
|
||||||
from .optim import Optimizer
|
from .optim import Optimizer
|
||||||
from utils.util import human_format
|
from utils.util import human_format
|
||||||
from ppg2mel import MelDecoderMOLv2
|
from models.ppg2mel import MelDecoderMOLv2
|
||||||
|
|
||||||
|
|
||||||
class Solver(BaseSolver):
|
class Solver(BaseSolver):
|
|
@ -1,10 +1,10 @@
|
||||||
import torch
|
import torch
|
||||||
from synthesizer import audio
|
from models.synthesizer import audio
|
||||||
from synthesizer.hparams import hparams
|
from models.synthesizer.hparams import hparams
|
||||||
from synthesizer.models.tacotron import Tacotron
|
from models.synthesizer.models.tacotron import Tacotron
|
||||||
from synthesizer.utils.symbols import symbols
|
from models.synthesizer.utils.symbols import symbols
|
||||||
from synthesizer.utils.text import text_to_sequence
|
from models.synthesizer.utils.text import text_to_sequence
|
||||||
from vocoder.display import simple_table
|
from models.vocoder.display import simple_table
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Union, List
|
from typing import Union, List
|
||||||
import numpy as np
|
import numpy as np
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue