mirror of
https://github.com/babysor/MockingBird.git
synced 2024-03-22 13:11:31 +08:00
Refactor Project to 3 parts: Models, Control, Data
Need readme
This commit is contained in:
parent
b402f9dbdf
commit
74a3fc97d0
6
.gitignore
vendored
6
.gitignore
vendored
|
@ -14,8 +14,8 @@
|
|||
*.bcf
|
||||
*.toc
|
||||
*.sh
|
||||
*/saved_models
|
||||
!vocoder/saved_models/pretrained/**
|
||||
!encoder/saved_models/pretrained.pt
|
||||
data/ckpt
|
||||
!data/ckpt/vocoder/pretrained/**
|
||||
!data/ckpt/encoder/pretrained.pt
|
||||
wavs
|
||||
log
|
|
@ -1,9 +1,9 @@
|
|||
from encoder.params_model import model_embedding_size as speaker_embedding_size
|
||||
from models.encoder.params_model import model_embedding_size as speaker_embedding_size
|
||||
from utils.argutils import print_args
|
||||
from utils.modelutils import check_model_paths
|
||||
from synthesizer.inference import Synthesizer
|
||||
from encoder import inference as encoder
|
||||
from vocoder import inference as vocoder
|
||||
from models.synthesizer.inference import Synthesizer
|
||||
from models.encoder import inference as encoder
|
||||
from models.vocoder import inference as vocoder
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
from encoder.preprocess import preprocess_librispeech, preprocess_voxceleb1, preprocess_voxceleb2, preprocess_aidatatang_200zh
|
||||
from utils.argutils import print_args
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from models.encoder.preprocess import (preprocess_aidatatang_200zh,
|
||||
preprocess_librispeech, preprocess_voxceleb1,
|
||||
preprocess_voxceleb2)
|
||||
from utils.argutils import print_args
|
||||
|
||||
if __name__ == "__main__":
|
||||
class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
|
|
@ -1,5 +1,5 @@
|
|||
from utils.argutils import print_args
|
||||
from encoder.train import train
|
||||
from models.encoder.train import train
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
|
|
@ -3,7 +3,7 @@ import torch
|
|||
import argparse
|
||||
import numpy as np
|
||||
from utils.load_yaml import HpsYaml
|
||||
from ppg2mel.train.train_linglf02mel_seq2seq_oneshotvc import Solver
|
||||
from models.ppg2mel.train.train_linglf02mel_seq2seq_oneshotvc import Solver
|
||||
|
||||
# For reproducibility, comment these may speed up training
|
||||
torch.backends.cudnn.deterministic = True
|
|
@ -1,7 +1,7 @@
|
|||
from pathlib import Path
|
||||
import argparse
|
||||
|
||||
from ppg2mel.preprocess import preprocess_dataset
|
||||
from models.ppg2mel.preprocess import preprocess_dataset
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
from synthesizer.preprocess import preprocess_dataset
|
||||
from synthesizer.hparams import hparams
|
||||
from models.synthesizer.preprocess import preprocess_dataset
|
||||
from models.synthesizer.hparams import hparams
|
||||
from utils.argutils import print_args
|
||||
from pathlib import Path
|
||||
import argparse
|
|
@ -1,4 +1,4 @@
|
|||
from synthesizer.preprocess import create_embeddings
|
||||
from models.synthesizer.preprocess import create_embeddings
|
||||
from utils.argutils import print_args
|
||||
from pathlib import Path
|
||||
import argparse
|
|
@ -1,5 +1,5 @@
|
|||
from synthesizer.hparams import hparams
|
||||
from synthesizer.train import train
|
||||
from models.synthesizer.hparams import hparams
|
||||
from models.synthesizer.train import train
|
||||
from utils.argutils import print_args
|
||||
import argparse
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
from synthesizer.synthesize import run_synthesis
|
||||
from synthesizer.hparams import hparams
|
||||
from models.synthesizer.synthesize import run_synthesis
|
||||
from models.synthesizer.hparams import hparams
|
||||
from utils.argutils import print_args
|
||||
import argparse
|
||||
import os
|
|
@ -1,7 +1,7 @@
|
|||
from utils.argutils import print_args
|
||||
from vocoder.wavernn.train import train
|
||||
from vocoder.hifigan.train import train as train_hifigan
|
||||
from vocoder.fregan.train import train as train_fregan
|
||||
from models.vocoder.wavernn.train import train
|
||||
from models.vocoder.hifigan.train import train as train_hifigan
|
||||
from models.vocoder.fregan.train import train as train_fregan
|
||||
from utils.util import AttrDict
|
||||
from pathlib import Path
|
||||
import argparse
|
|
@ -2,22 +2,22 @@ from pydantic import BaseModel, Field
|
|||
import os
|
||||
from pathlib import Path
|
||||
from enum import Enum
|
||||
from encoder import inference as encoder
|
||||
from models.encoder import inference as encoder
|
||||
import librosa
|
||||
from scipy.io.wavfile import write
|
||||
import re
|
||||
import numpy as np
|
||||
from mkgui.base.components.types import FileContent
|
||||
from vocoder.hifigan import inference as gan_vocoder
|
||||
from synthesizer.inference import Synthesizer
|
||||
from control.mkgui.base.components.types import FileContent
|
||||
from models.vocoder.hifigan import inference as gan_vocoder
|
||||
from models.synthesizer.inference import Synthesizer
|
||||
from typing import Any, Tuple
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# Constants
|
||||
AUDIO_SAMPLES_DIR = f"samples{os.sep}"
|
||||
SYN_MODELS_DIRT = f"synthesizer{os.sep}saved_models"
|
||||
ENC_MODELS_DIRT = f"encoder{os.sep}saved_models"
|
||||
VOC_MODELS_DIRT = f"vocoder{os.sep}saved_models"
|
||||
AUDIO_SAMPLES_DIR = f"data{os.sep}samples{os.sep}"
|
||||
SYN_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}synthesizer"
|
||||
ENC_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}encoder"
|
||||
VOC_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}vocoder"
|
||||
TEMP_SOURCE_AUDIO = f"wavs{os.sep}temp_source.wav"
|
||||
TEMP_RESULT_AUDIO = f"wavs{os.sep}temp_result.wav"
|
||||
if not os.path.isdir("wavs"):
|
||||
|
@ -31,7 +31,7 @@ if os.path.isdir(SYN_MODELS_DIRT):
|
|||
synthesizers = Enum('synthesizers', list((file.name, file) for file in Path(SYN_MODELS_DIRT).glob("**/*.pt")))
|
||||
print("Loaded synthesizer models: " + str(len(synthesizers)))
|
||||
else:
|
||||
raise Exception(f"Model folder {SYN_MODELS_DIRT} doesn't exist.")
|
||||
raise Exception(f"Model folder {SYN_MODELS_DIRT} doesn't exist. 请将模型文件位置移动到上述位置中进行重试!")
|
||||
|
||||
if os.path.isdir(ENC_MODELS_DIRT):
|
||||
encoders = Enum('encoders', list((file.name, file) for file in Path(ENC_MODELS_DIRT).glob("**/*.pt")))
|
|
@ -1,27 +1,26 @@
|
|||
from synthesizer.inference import Synthesizer
|
||||
from pydantic import BaseModel, Field
|
||||
from encoder import inference as speacker_encoder
|
||||
import torch
|
||||
import os
|
||||
from pathlib import Path
|
||||
from enum import Enum
|
||||
import ppg_extractor as Extractor
|
||||
import ppg2mel as Convertor
|
||||
import librosa
|
||||
from scipy.io.wavfile import write
|
||||
import re
|
||||
import numpy as np
|
||||
from mkgui.base.components.types import FileContent
|
||||
from vocoder.hifigan import inference as gan_vocoder
|
||||
from pathlib import Path
|
||||
from typing import Any, Tuple
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import librosa
|
||||
import matplotlib.pyplot as plt
|
||||
import torch
|
||||
from pydantic import BaseModel, Field
|
||||
from scipy.io.wavfile import write
|
||||
|
||||
import models.ppg2mel as Convertor
|
||||
import models.ppg_extractor as Extractor
|
||||
from control.mkgui.base.components.types import FileContent
|
||||
from models.encoder import inference as speacker_encoder
|
||||
from models.synthesizer.inference import Synthesizer
|
||||
from models.vocoder.hifigan import inference as gan_vocoder
|
||||
|
||||
# Constants
|
||||
AUDIO_SAMPLES_DIR = f'samples{os.sep}'
|
||||
EXT_MODELS_DIRT = f'ppg_extractor{os.sep}saved_models'
|
||||
CONV_MODELS_DIRT = f'ppg2mel{os.sep}saved_models'
|
||||
VOC_MODELS_DIRT = f'vocoder{os.sep}saved_models'
|
||||
AUDIO_SAMPLES_DIR = f'data{os.sep}samples{os.sep}'
|
||||
EXT_MODELS_DIRT = f'data{os.sep}ckpt{os.sep}ppg_extractor'
|
||||
CONV_MODELS_DIRT = f'data{os.sep}ckpt{os.sep}ppg2mel'
|
||||
VOC_MODELS_DIRT = f'data{os.sep}ckpt{os.sep}vocoder'
|
||||
TEMP_SOURCE_AUDIO = f'wavs{os.sep}temp_source.wav'
|
||||
TEMP_TARGET_AUDIO = f'wavs{os.sep}temp_target.wav'
|
||||
TEMP_RESULT_AUDIO = f'wavs{os.sep}temp_result.wav'
|
||||
|
@ -132,9 +131,10 @@ def convert(input: Input) -> Output:
|
|||
|
||||
ppg = extractor.extract_from_wav(src_wav)
|
||||
# Import necessary dependency of Voice Conversion
|
||||
from utils.f0_utils import compute_f0, f02lf0, compute_mean_std, get_converted_lf0uv
|
||||
from utils.f0_utils import (compute_f0, compute_mean_std, f02lf0,
|
||||
get_converted_lf0uv)
|
||||
ref_lf0_mean, ref_lf0_std = compute_mean_std(f02lf0(compute_f0(ref_wav)))
|
||||
speacker_encoder.load_model(Path(f"encoder{os.sep}saved_models{os.sep}pretrained_bak_5805000.pt"))
|
||||
speacker_encoder.load_model(Path(f"data{os.sep}ckpt{os.sep}encoder{os.sep}pretrained_bak_5805000.pt"))
|
||||
embed = speacker_encoder.embed_utterance(ref_wav)
|
||||
lf0_uv = get_converted_lf0uv(src_wav, ref_lf0_mean, ref_lf0_std, convert=True)
|
||||
min_len = min(ppg.shape[1], len(lf0_uv))
|
|
@ -14,14 +14,13 @@ from fastapi.encoders import jsonable_encoder
|
|||
from loguru import logger
|
||||
from pydantic import BaseModel, ValidationError, parse_obj_as
|
||||
|
||||
from mkgui.base import Opyrator
|
||||
from mkgui.base.core import name_to_title
|
||||
from mkgui.base.ui import schema_utils
|
||||
from mkgui.base.ui.streamlit_utils import CUSTOM_STREAMLIT_CSS
|
||||
from control.mkgui.base import Opyrator
|
||||
from control.mkgui.base.core import name_to_title
|
||||
from . import schema_utils
|
||||
from .streamlit_utils import CUSTOM_STREAMLIT_CSS
|
||||
|
||||
STREAMLIT_RUNNER_SNIPPET = """
|
||||
from mkgui.base.ui import render_streamlit_ui
|
||||
from mkgui.base import Opyrator
|
||||
from control.mkgui.base.ui import render_streamlit_ui
|
||||
|
||||
import streamlit as st
|
||||
|
||||
|
@ -807,18 +806,18 @@ class OutputUI:
|
|||
|
||||
def getOpyrator(mode: str) -> Opyrator:
|
||||
if mode == None or mode.startswith('VC'):
|
||||
from mkgui.app_vc import convert
|
||||
from control.mkgui.app_vc import convert
|
||||
return Opyrator(convert)
|
||||
if mode == None or mode.startswith('预处理'):
|
||||
from mkgui.preprocess import preprocess
|
||||
from control.mkgui.preprocess import preprocess
|
||||
return Opyrator(preprocess)
|
||||
if mode == None or mode.startswith('模型训练'):
|
||||
from mkgui.train import train
|
||||
from control.mkgui.train import train
|
||||
return Opyrator(train)
|
||||
if mode == None or mode.startswith('模型训练(VC)'):
|
||||
from mkgui.train_vc import train_vc
|
||||
from control.mkgui.train_vc import train_vc
|
||||
return Opyrator(train_vc)
|
||||
from mkgui.app import synthesize
|
||||
from control.mkgui.app import synthesize
|
||||
return Opyrator(synthesize)
|
||||
|
||||
|
||||
|
@ -845,7 +844,7 @@ def render_streamlit_ui() -> None:
|
|||
col2.title(title)
|
||||
col2.markdown("欢迎使用MockingBird Web 2")
|
||||
|
||||
image = Image.open(path.join('mkgui', 'static', 'mb.png'))
|
||||
image = Image.open(path.join('control','mkgui', 'static', 'mb.png'))
|
||||
col1.image(image)
|
||||
|
||||
st.markdown("---")
|
|
@ -6,8 +6,8 @@ from typing import Any, Tuple
|
|||
|
||||
|
||||
# Constants
|
||||
EXT_MODELS_DIRT = f"ppg_extractor{os.sep}saved_models"
|
||||
ENC_MODELS_DIRT = f"encoder{os.sep}saved_models"
|
||||
EXT_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}ppg_extractor"
|
||||
ENC_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}encoder"
|
||||
|
||||
|
||||
if os.path.isdir(EXT_MODELS_DIRT):
|
||||
|
@ -83,7 +83,7 @@ def preprocess(input: Input) -> Output:
|
|||
"""Preprocess(预处理)"""
|
||||
finished = 0
|
||||
if input.model == Model.VC_PPG2MEL:
|
||||
from ppg2mel.preprocess import preprocess_dataset
|
||||
from models.ppg2mel.preprocess import preprocess_dataset
|
||||
finished = preprocess_dataset(
|
||||
datasets_root=Path(input.datasets_root),
|
||||
dataset=input.dataset,
|
Before Width: | Height: | Size: 5.6 KiB After Width: | Height: | Size: 5.6 KiB |
|
@ -3,17 +3,17 @@ import os
|
|||
from pathlib import Path
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
from synthesizer.hparams import hparams
|
||||
from synthesizer.train import train as synt_train
|
||||
from models.synthesizer.hparams import hparams
|
||||
from models.synthesizer.train import train as synt_train
|
||||
|
||||
# Constants
|
||||
SYN_MODELS_DIRT = f"synthesizer{os.sep}saved_models"
|
||||
ENC_MODELS_DIRT = f"encoder{os.sep}saved_models"
|
||||
SYN_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}synthesizer"
|
||||
ENC_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}encoder"
|
||||
|
||||
|
||||
# EXT_MODELS_DIRT = f"ppg_extractor{os.sep}saved_models"
|
||||
# CONV_MODELS_DIRT = f"ppg2mel{os.sep}saved_models"
|
||||
# ENC_MODELS_DIRT = f"encoder{os.sep}saved_models"
|
||||
# EXT_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}ppg_extractor"
|
||||
# CONV_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}ppg2mel"
|
||||
# ENC_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}encoder"
|
||||
|
||||
# Pre-Load models
|
||||
if os.path.isdir(SYN_MODELS_DIRT):
|
||||
|
@ -96,7 +96,7 @@ def train(input: Input) -> Output:
|
|||
synt_train(
|
||||
input.run_id,
|
||||
input.input_root,
|
||||
f"synthesizer{os.sep}saved_models",
|
||||
f"data{os.sep}ckpt{os.sep}synthesizer",
|
||||
input.save_every,
|
||||
input.backup_every,
|
||||
input.log_every,
|
|
@ -9,9 +9,9 @@ from utils.util import AttrDict
|
|||
import torch
|
||||
|
||||
# Constants
|
||||
EXT_MODELS_DIRT = f"ppg_extractor{os.sep}saved_models"
|
||||
CONV_MODELS_DIRT = f"ppg2mel{os.sep}saved_models"
|
||||
ENC_MODELS_DIRT = f"encoder{os.sep}saved_models"
|
||||
EXT_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}ppg_extractor"
|
||||
CONV_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}ppg2mel"
|
||||
ENC_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}encoder"
|
||||
|
||||
|
||||
if os.path.isdir(EXT_MODELS_DIRT):
|
||||
|
@ -144,7 +144,7 @@ def train_vc(input: Input) -> Output:
|
|||
if torch.cuda.is_available():
|
||||
torch.cuda.manual_seed_all(input.seed)
|
||||
mode = "train"
|
||||
from ppg2mel.train.train_linglf02mel_seq2seq_oneshotvc import Solver
|
||||
from models.ppg2mel.train.train_linglf02mel_seq2seq_oneshotvc import Solver
|
||||
solver = Solver(config, params, mode)
|
||||
solver.load_data()
|
||||
solver.set_model()
|
|
@ -1,12 +1,12 @@
|
|||
from toolbox.ui import UI
|
||||
from encoder import inference as encoder
|
||||
from synthesizer.inference import Synthesizer
|
||||
from vocoder.wavernn import inference as rnn_vocoder
|
||||
from vocoder.hifigan import inference as gan_vocoder
|
||||
from vocoder.fregan import inference as fgan_vocoder
|
||||
from control.toolbox.ui import UI
|
||||
from models.encoder import inference as encoder
|
||||
from models.synthesizer.inference import Synthesizer
|
||||
from models.vocoder.wavernn import inference as rnn_vocoder
|
||||
from models.vocoder.hifigan import inference as gan_vocoder
|
||||
from models.vocoder.fregan import inference as fgan_vocoder
|
||||
from pathlib import Path
|
||||
from time import perf_counter as timer
|
||||
from toolbox.utterance import Utterance
|
||||
from control.toolbox.utterance import Utterance
|
||||
import numpy as np
|
||||
import traceback
|
||||
import sys
|
||||
|
@ -396,7 +396,7 @@ class Toolbox:
|
|||
self.ui.log("Loading the extractor %s... " % model_fpath)
|
||||
self.ui.set_loading(1)
|
||||
start = timer()
|
||||
import ppg_extractor as extractor
|
||||
import models.ppg_extractor as extractor
|
||||
self.extractor = extractor.load_model(model_fpath)
|
||||
self.ui.log("Done (%dms)." % int(1000 * (timer() - start)), "append")
|
||||
self.ui.set_loading(0)
|
||||
|
@ -408,7 +408,7 @@ class Toolbox:
|
|||
self.ui.log("Loading the convertor %s... " % model_fpath)
|
||||
self.ui.set_loading(1)
|
||||
start = timer()
|
||||
import ppg2mel as convertor
|
||||
import models.ppg2mel as convertor
|
||||
self.convertor = convertor.load_model( model_fpath)
|
||||
self.ui.log("Done (%dms)." % int(1000 * (timer() - start)), "append")
|
||||
self.ui.set_loading(0)
|
Before Width: | Height: | Size: 5.6 KiB After Width: | Height: | Size: 5.6 KiB |
|
@ -4,8 +4,8 @@ from PyQt5.QtWidgets import *
|
|||
import matplotlib.pyplot as plt
|
||||
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
|
||||
from matplotlib.figure import Figure
|
||||
from encoder.inference import plot_embedding_as_heatmap
|
||||
from toolbox.utterance import Utterance
|
||||
from models.encoder.inference import plot_embedding_as_heatmap
|
||||
from control.toolbox.utterance import Utterance
|
||||
from pathlib import Path
|
||||
from typing import List, Set
|
||||
import sounddevice as sd
|
|
@ -1,5 +1,5 @@
|
|||
from pathlib import Path
|
||||
from toolbox import Toolbox
|
||||
from control.toolbox import Toolbox
|
||||
from utils.argutils import print_args
|
||||
from utils.modelutils import check_model_paths
|
||||
import argparse
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
from encoder.data_objects.speaker_verification_dataset import SpeakerVerificationDataset
|
||||
from encoder.data_objects.speaker_verification_dataset import SpeakerVerificationDataLoader
|
Binary file not shown.
14
gen_voice.py
14
gen_voice.py
|
@ -1,23 +1,15 @@
|
|||
from encoder.params_model import model_embedding_size as speaker_embedding_size
|
||||
from utils.argutils import print_args
|
||||
from utils.modelutils import check_model_paths
|
||||
from synthesizer.inference import Synthesizer
|
||||
from encoder import inference as encoder
|
||||
from vocoder.wavernn import inference as rnn_vocoder
|
||||
from vocoder.hifigan import inference as gan_vocoder
|
||||
from models.synthesizer.inference import Synthesizer
|
||||
from models.encoder import inference as encoder
|
||||
from models.vocoder.hifigan import inference as gan_vocoder
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import librosa
|
||||
import argparse
|
||||
import torch
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import cn2an
|
||||
import glob
|
||||
|
||||
from audioread.exceptions import NoBackendError
|
||||
vocoder = gan_vocoder
|
||||
|
||||
def gen_one_wav(synthesizer, in_fpath, embed, texts, file_name, seq):
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from scipy.ndimage.morphology import binary_dilation
|
||||
from encoder.params_data import *
|
||||
from models.encoder.params_data import *
|
||||
from pathlib import Path
|
||||
from typing import Optional, Union
|
||||
from warnings import warn
|
2
models/encoder/data_objects/__init__.py
Normal file
2
models/encoder/data_objects/__init__.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
from models.encoder.data_objects.speaker_verification_dataset import SpeakerVerificationDataset
|
||||
from models.encoder.data_objects.speaker_verification_dataset import SpeakerVerificationDataLoader
|
|
@ -1,5 +1,5 @@
|
|||
from encoder.data_objects.random_cycler import RandomCycler
|
||||
from encoder.data_objects.utterance import Utterance
|
||||
from models.encoder.data_objects.random_cycler import RandomCycler
|
||||
from models.encoder.data_objects.utterance import Utterance
|
||||
from pathlib import Path
|
||||
|
||||
# Contains the set of utterances of a single speaker
|
|
@ -1,6 +1,6 @@
|
|||
import numpy as np
|
||||
from typing import List
|
||||
from encoder.data_objects.speaker import Speaker
|
||||
from models.encoder.data_objects.speaker import Speaker
|
||||
|
||||
class SpeakerBatch:
|
||||
def __init__(self, speakers: List[Speaker], utterances_per_speaker: int, n_frames: int):
|
|
@ -1,7 +1,7 @@
|
|||
from encoder.data_objects.random_cycler import RandomCycler
|
||||
from encoder.data_objects.speaker_batch import SpeakerBatch
|
||||
from encoder.data_objects.speaker import Speaker
|
||||
from encoder.params_data import partials_n_frames
|
||||
from models.encoder.data_objects.random_cycler import RandomCycler
|
||||
from models.encoder.data_objects.speaker_batch import SpeakerBatch
|
||||
from models.encoder.data_objects.speaker import Speaker
|
||||
from models.encoder.params_data import partials_n_frames
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from pathlib import Path
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
from encoder.params_data import *
|
||||
from encoder.model import SpeakerEncoder
|
||||
from encoder.audio import preprocess_wav # We want to expose this function from here
|
||||
from models.encoder.params_data import *
|
||||
from models.encoder.model import SpeakerEncoder
|
||||
from models.encoder.audio import preprocess_wav # We want to expose this function from here
|
||||
from matplotlib import cm
|
||||
from encoder import audio
|
||||
from models.encoder import audio
|
||||
from pathlib import Path
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
|
@ -1,5 +1,5 @@
|
|||
from encoder.params_model import *
|
||||
from encoder.params_data import *
|
||||
from models.encoder.params_model import *
|
||||
from models.encoder.params_data import *
|
||||
from scipy.interpolate import interp1d
|
||||
from sklearn.metrics import roc_curve
|
||||
from torch.nn.utils import clip_grad_norm_
|
|
@ -1,8 +1,8 @@
|
|||
from multiprocess.pool import ThreadPool
|
||||
from encoder.params_data import *
|
||||
from encoder.config import librispeech_datasets, anglophone_nationalites
|
||||
from models.encoder.params_data import *
|
||||
from models.encoder.config import librispeech_datasets, anglophone_nationalites
|
||||
from datetime import datetime
|
||||
from encoder import audio
|
||||
from models.encoder import audio
|
||||
from pathlib import Path
|
||||
from tqdm import tqdm
|
||||
import numpy as np
|
||||
|
@ -22,7 +22,7 @@ class DatasetLog:
|
|||
self._log_params()
|
||||
|
||||
def _log_params(self):
|
||||
from encoder import params_data
|
||||
from models.encoder import params_data
|
||||
self.write_line("Parameter values:")
|
||||
for param_name in (p for p in dir(params_data) if not p.startswith("__")):
|
||||
value = getattr(params_data, param_name)
|
|
@ -1,7 +1,7 @@
|
|||
from encoder.visualizations import Visualizations
|
||||
from encoder.data_objects import SpeakerVerificationDataLoader, SpeakerVerificationDataset
|
||||
from encoder.params_model import *
|
||||
from encoder.model import SpeakerEncoder
|
||||
from models.encoder.visualizations import Visualizations
|
||||
from models.encoder.data_objects import SpeakerVerificationDataLoader, SpeakerVerificationDataset
|
||||
from models.encoder.params_model import *
|
||||
from models.encoder.model import SpeakerEncoder
|
||||
from utils.profiler import Profiler
|
||||
from pathlib import Path
|
||||
import torch
|
|
@ -1,4 +1,4 @@
|
|||
from encoder.data_objects.speaker_verification_dataset import SpeakerVerificationDataset
|
||||
from models.encoder.data_objects.speaker_verification_dataset import SpeakerVerificationDataset
|
||||
from datetime import datetime
|
||||
from time import perf_counter as timer
|
||||
import matplotlib.pyplot as plt
|
||||
|
@ -65,8 +65,8 @@ class Visualizations:
|
|||
def log_params(self):
|
||||
if self.disabled:
|
||||
return
|
||||
from encoder import params_data
|
||||
from encoder import params_model
|
||||
from models.encoder import params_data
|
||||
from models.encoder import params_model
|
||||
param_string = "<b>Model parameters</b>:<br>"
|
||||
for param_name in (p for p in dir(params_model) if not p.startswith("__")):
|
||||
value = getattr(params_model, param_name)
|
|
@ -7,10 +7,10 @@ from pathlib import Path
|
|||
import soundfile
|
||||
import resampy
|
||||
|
||||
from ppg_extractor import load_model
|
||||
from models.ppg_extractor import load_model
|
||||
import encoder.inference as Encoder
|
||||
from encoder.audio import preprocess_wav
|
||||
from encoder import audio
|
||||
from models.encoder.audio import preprocess_wav
|
||||
from models.encoder import audio
|
||||
from utils.f0_utils import compute_f0
|
||||
|
||||
from torch.multiprocessing import Pool, cpu_count
|
|
@ -3,7 +3,7 @@ import torch
|
|||
import argparse
|
||||
import numpy as np
|
||||
from utils.load_yaml import HpsYaml
|
||||
from ppg2mel.train.train_linglf02mel_seq2seq_oneshotvc import Solver
|
||||
from models.ppg2mel.train.train_linglf02mel_seq2seq_oneshotvc import Solver
|
||||
|
||||
# For reproducibility, comment these may speed up training
|
||||
torch.backends.cudnn.deterministic = True
|
|
@ -14,7 +14,7 @@ from utils.data_load import OneshotVcDataset, MultiSpkVcCollate
|
|||
from .loss import MaskedMSELoss
|
||||
from .optim import Optimizer
|
||||
from utils.util import human_format
|
||||
from ppg2mel import MelDecoderMOLv2
|
||||
from models.ppg2mel import MelDecoderMOLv2
|
||||
|
||||
|
||||
class Solver(BaseSolver):
|
0
models/synthesizer/__init__.py
Normal file
0
models/synthesizer/__init__.py
Normal file
|
@ -1,10 +1,10 @@
|
|||
import torch
|
||||
from synthesizer import audio
|
||||
from synthesizer.hparams import hparams
|
||||
from synthesizer.models.tacotron import Tacotron
|
||||
from synthesizer.utils.symbols import symbols
|
||||
from synthesizer.utils.text import text_to_sequence
|
||||
from vocoder.display import simple_table
|
||||
from models.synthesizer import audio
|
||||
from models.synthesizer.hparams import hparams
|
||||
from models.synthesizer.models.tacotron import Tacotron
|
||||
from models.synthesizer.utils.symbols import symbols
|
||||
from models.synthesizer.utils.text import text_to_sequence
|
||||
from models.vocoder.display import simple_table
|
||||
from pathlib import Path
|
||||
from typing import Union, List
|
||||
import numpy as np
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user