Add preprocessing mode

This commit is contained in:
babysor00 2022-05-01 16:42:11 +08:00
parent c5998bfe71
commit 70cc3988d3
8 changed files with 132 additions and 13 deletions

View File

@ -86,7 +86,7 @@ class Output(BaseModel):
streamlit_app.pyplot(fig)
def main(input: Input) -> Output:
def synthesize(input: Input) -> Output:
"""synthesize(合成)"""
# load models
encoder.load_model(Path(input.encoder.value))

View File

@ -101,7 +101,7 @@ class Output(BaseModel):
ax.set_title("mel spectrogram(Result Audio)")
streamlit_app.pyplot(fig)
def main(input: Input) -> Output:
def convert(input: Input) -> Output:
"""convert(转换)"""
# load models
extractor = Extractor.load_model(Path(input.extractor.value))

View File

@ -1,5 +1,5 @@
import base64
from typing import Any, Dict
from typing import Any, Dict, overload
class FileContent(str):
@ -27,3 +27,20 @@ class FileContent(str):
return FileContent(base64.b64encode(value).decode())
else:
raise Exception("Wrong type")
# # 暂时无法使用,因为浏览器中没有考虑选择文件夹
# class DirectoryContent(FileContent):
# @classmethod
# def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
# field_schema.update(format="path")
# @classmethod
# def validate(cls, value: Any) -> "DirectoryContent":
# if isinstance(value, DirectoryContent):
# return value
# elif isinstance(value, str):
# return DirectoryContent(value)
# elif isinstance(value, (bytes, bytearray, memoryview)):
# return DirectoryContent(base64.b64encode(value).decode())
# else:
# raise Exception("Wrong type")

View File

@ -112,7 +112,6 @@ class Opyrator:
else:
self.function = func
self._name = "Opyrator"
self._action = "Execute"
self._input_type = None
self._output_type = None

View File

@ -38,6 +38,11 @@ def is_single_file_property(property: Dict) -> bool:
return property.get("format") == "byte"
def is_single_directory_property(property: Dict) -> bool:
if property.get("type") != "string":
return False
return property.get("format") == "path"
def is_multi_enum_property(property: Dict, references: Dict) -> bool:
if property.get("type") != "array":
return False

View File

@ -110,7 +110,7 @@ class InputUI:
# The rendering also returns the current state of input data
self._session_state.input_data = self._input_class.render_input_ui( # type: ignore
st, self._session_state.input_data
).dict()
)
return
# print(self._schema_properties)
@ -802,25 +802,31 @@ class OutputUI:
def getOpyrator(mode: str) -> Opyrator:
if mode == None or mode.startswith('VC'):
from mkgui.app_vc import main
return Opyrator(main)
from mkgui.app import main
return Opyrator(main)
from mkgui.app_vc import convert
return Opyrator(convert)
if mode == None or mode.startswith('预处理'):
from mkgui.preprocess import preprocess
return Opyrator(preprocess)
from mkgui.app import synthesize
return Opyrator(synthesize)
def render_streamlit_ui() -> None:
# init
session_state = st.session_state
session_state.input_data = {}
session_state.mode = None
with st.spinner("Loading MockingBird GUI. Please wait..."):
session_state.mode = st.sidebar.selectbox(
'模式选择',
("AI拟音", "VC拟音")
( "AI拟音", "VC拟音", "预处理")
)
opyrator = getOpyrator(session_state.mode)
title = opyrator.name
if "mode" in session_state:
mode = session_state.mode
else:
mode = ""
opyrator = getOpyrator(mode)
title = opyrator.name + mode
col1, col2, _ = st.columns(3)
col2.title(title)

91
mkgui/preprocess.py Normal file
View File

@ -0,0 +1,91 @@
from pydantic import BaseModel, Field
import os
from pathlib import Path
from enum import Enum
from typing import Any
# Constants
EXT_MODELS_DIRT = "ppg_extractor\\saved_models"
ENC_MODELS_DIRT = "encoder\\saved_models"
if os.path.isdir(EXT_MODELS_DIRT):
extractors = Enum('extractors', list((file.name, file) for file in Path(EXT_MODELS_DIRT).glob("**/*.pt")))
print("Loaded extractor models: " + str(len(extractors)))
if os.path.isdir(ENC_MODELS_DIRT):
encoders = Enum('encoders', list((file.name, file) for file in Path(ENC_MODELS_DIRT).glob("**/*.pt")))
print("Loaded encoders models: " + str(len(encoders)))
class Model(str, Enum):
VC_PPG2MEL = "ppg2mel"
class Dataset(str, Enum):
AIDATATANG_200ZH = "aidatatang_200zh"
AIDATATANG_200ZH_S = "aidatatang_200zh_s"
class Input(BaseModel):
# def render_input_ui(st, input) -> Dict:
# input["selected_dataset"] = st.selectbox(
# '选择数据集',
# ("aidatatang_200zh", "aidatatang_200zh_s")
# )
# return input
model: Model = Field(
Model.VC_PPG2MEL, title="目标模型",
)
dataset: Dataset = Field(
Dataset.AIDATATANG_200ZH, title="数据集选择",
)
datasets_root: str = Field(
..., alias="数据集根目录", description="输入数据集根目录(相对/绝对)",
format=True,
example="..\\trainning_data\\"
)
output_root: str = Field(
..., alias="输出根目录", description="输出结果根目录(相对/绝对)",
format=True,
example="..\\trainning_data\\"
)
n_processes: int = Field(
2, alias="处理线程数", description="根据CPU线程数来设置",
le=32, ge=1
)
extractor: extractors = Field(
..., alias="特征提取模型",
description="选择PPG特征提取模型文件."
)
encoder: encoders = Field(
..., alias="语音编码模型",
description="选择语音编码模型文件."
)
class AudioEntity(BaseModel):
content: bytes
mel: Any
class Output(BaseModel):
__root__: tuple[str, int]
def render_output_ui(self, streamlit_app, input) -> None: # type: ignore
"""Custom output UI.
If this method is implmeneted, it will be used instead of the default Output UI renderer.
"""
sr, count = self.__root__
streamlit_app.subheader(f"Dataset {sr} done processed total of {count}")
def preprocess(input: Input) -> Output:
"""Preprocess(预处理)"""
finished = 0
if input.model == Model.VC_PPG2MEL:
from ppg2mel.preprocess import preprocess_dataset
finished = preprocess_dataset(
datasets_root=Path(input.datasets_root),
dataset=input.dataset,
out_dir=Path(input.output_root),
n_processes=input.n_processes,
ppg_encoder_model_fpath=Path(input.extractor.value),
speaker_encoder_model=Path(input.encoder.value)
)
# TODO: pass useful return code
return Output(__root__=(input.dataset, finished))

View File

@ -110,3 +110,4 @@ def preprocess_dataset(datasets_root, dataset, out_dir, n_processes, ppg_encoder
t_fid_file.close()
d_fid_file.close()
e_fid_file.close()
return len(wav_file_list)