diff --git a/mkgui/app.py b/mkgui/app.py index 5e681ef..0f77006 100644 --- a/mkgui/app.py +++ b/mkgui/app.py @@ -86,7 +86,7 @@ class Output(BaseModel): streamlit_app.pyplot(fig) -def main(input: Input) -> Output: +def synthesize(input: Input) -> Output: """synthesize(合成)""" # load models encoder.load_model(Path(input.encoder.value)) diff --git a/mkgui/app_vc.py b/mkgui/app_vc.py index 8f55a9c..707f87a 100644 --- a/mkgui/app_vc.py +++ b/mkgui/app_vc.py @@ -101,7 +101,7 @@ class Output(BaseModel): ax.set_title("mel spectrogram(Result Audio)") streamlit_app.pyplot(fig) -def main(input: Input) -> Output: +def convert(input: Input) -> Output: """convert(转换)""" # load models extractor = Extractor.load_model(Path(input.extractor.value)) diff --git a/mkgui/base/components/types.py b/mkgui/base/components/types.py index e18e267..125809a 100644 --- a/mkgui/base/components/types.py +++ b/mkgui/base/components/types.py @@ -1,5 +1,5 @@ import base64 -from typing import Any, Dict +from typing import Any, Dict, overload class FileContent(str): @@ -27,3 +27,20 @@ class FileContent(str): return FileContent(base64.b64encode(value).decode()) else: raise Exception("Wrong type") + +# # 暂时无法使用,因为浏览器中没有考虑选择文件夹 +# class DirectoryContent(FileContent): +# @classmethod +# def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None: +# field_schema.update(format="path") + +# @classmethod +# def validate(cls, value: Any) -> "DirectoryContent": +# if isinstance(value, DirectoryContent): +# return value +# elif isinstance(value, str): +# return DirectoryContent(value) +# elif isinstance(value, (bytes, bytearray, memoryview)): +# return DirectoryContent(base64.b64encode(value).decode()) +# else: +# raise Exception("Wrong type") diff --git a/mkgui/base/core.py b/mkgui/base/core.py index fe7ed50..8166a33 100644 --- a/mkgui/base/core.py +++ b/mkgui/base/core.py @@ -112,7 +112,6 @@ class Opyrator: else: self.function = func - self._name = "Opyrator" self._action = "Execute" self._input_type = None self._output_type = None diff --git a/mkgui/base/ui/schema_utils.py b/mkgui/base/ui/schema_utils.py index 4e2e0b8..a2be43c 100644 --- a/mkgui/base/ui/schema_utils.py +++ b/mkgui/base/ui/schema_utils.py @@ -38,6 +38,11 @@ def is_single_file_property(property: Dict) -> bool: return property.get("format") == "byte" +def is_single_directory_property(property: Dict) -> bool: + if property.get("type") != "string": + return False + return property.get("format") == "path" + def is_multi_enum_property(property: Dict, references: Dict) -> bool: if property.get("type") != "array": return False diff --git a/mkgui/base/ui/streamlit_ui.py b/mkgui/base/ui/streamlit_ui.py index 9708668..08232f7 100644 --- a/mkgui/base/ui/streamlit_ui.py +++ b/mkgui/base/ui/streamlit_ui.py @@ -110,7 +110,7 @@ class InputUI: # The rendering also returns the current state of input data self._session_state.input_data = self._input_class.render_input_ui( # type: ignore st, self._session_state.input_data - ).dict() + ) return # print(self._schema_properties) @@ -802,25 +802,31 @@ class OutputUI: def getOpyrator(mode: str) -> Opyrator: if mode == None or mode.startswith('VC'): - from mkgui.app_vc import main - return Opyrator(main) - from mkgui.app import main - return Opyrator(main) + from mkgui.app_vc import convert + return Opyrator(convert) + if mode == None or mode.startswith('预处理'): + from mkgui.preprocess import preprocess + return Opyrator(preprocess) + from mkgui.app import synthesize + return Opyrator(synthesize) def render_streamlit_ui() -> None: # init session_state = st.session_state session_state.input_data = {} - session_state.mode = None with st.spinner("Loading MockingBird GUI. Please wait..."): session_state.mode = st.sidebar.selectbox( '模式选择', - ("AI拟音", "VC拟音") + ( "AI拟音", "VC拟音", "预处理") ) - opyrator = getOpyrator(session_state.mode) - title = opyrator.name + if "mode" in session_state: + mode = session_state.mode + else: + mode = "" + opyrator = getOpyrator(mode) + title = opyrator.name + mode col1, col2, _ = st.columns(3) col2.title(title) diff --git a/mkgui/preprocess.py b/mkgui/preprocess.py new file mode 100644 index 0000000..0827add --- /dev/null +++ b/mkgui/preprocess.py @@ -0,0 +1,91 @@ +from pydantic import BaseModel, Field +import os +from pathlib import Path +from enum import Enum +from typing import Any + + +# Constants +EXT_MODELS_DIRT = "ppg_extractor\\saved_models" +ENC_MODELS_DIRT = "encoder\\saved_models" + + +if os.path.isdir(EXT_MODELS_DIRT): + extractors = Enum('extractors', list((file.name, file) for file in Path(EXT_MODELS_DIRT).glob("**/*.pt"))) + print("Loaded extractor models: " + str(len(extractors))) +if os.path.isdir(ENC_MODELS_DIRT): + encoders = Enum('encoders', list((file.name, file) for file in Path(ENC_MODELS_DIRT).glob("**/*.pt"))) + print("Loaded encoders models: " + str(len(encoders))) + +class Model(str, Enum): + VC_PPG2MEL = "ppg2mel" + +class Dataset(str, Enum): + AIDATATANG_200ZH = "aidatatang_200zh" + AIDATATANG_200ZH_S = "aidatatang_200zh_s" + +class Input(BaseModel): + # def render_input_ui(st, input) -> Dict: + # input["selected_dataset"] = st.selectbox( + # '选择数据集', + # ("aidatatang_200zh", "aidatatang_200zh_s") + # ) + # return input + model: Model = Field( + Model.VC_PPG2MEL, title="目标模型", + ) + dataset: Dataset = Field( + Dataset.AIDATATANG_200ZH, title="数据集选择", + ) + datasets_root: str = Field( + ..., alias="数据集根目录", description="输入数据集根目录(相对/绝对)", + format=True, + example="..\\trainning_data\\" + ) + output_root: str = Field( + ..., alias="输出根目录", description="输出结果根目录(相对/绝对)", + format=True, + example="..\\trainning_data\\" + ) + n_processes: int = Field( + 2, alias="处理线程数", description="根据CPU线程数来设置", + le=32, ge=1 + ) + extractor: extractors = Field( + ..., alias="特征提取模型", + description="选择PPG特征提取模型文件." + ) + encoder: encoders = Field( + ..., alias="语音编码模型", + description="选择语音编码模型文件." + ) + +class AudioEntity(BaseModel): + content: bytes + mel: Any + +class Output(BaseModel): + __root__: tuple[str, int] + + def render_output_ui(self, streamlit_app, input) -> None: # type: ignore + """Custom output UI. + If this method is implmeneted, it will be used instead of the default Output UI renderer. + """ + sr, count = self.__root__ + streamlit_app.subheader(f"Dataset {sr} done processed total of {count}") + +def preprocess(input: Input) -> Output: + """Preprocess(预处理)""" + finished = 0 + if input.model == Model.VC_PPG2MEL: + from ppg2mel.preprocess import preprocess_dataset + finished = preprocess_dataset( + datasets_root=Path(input.datasets_root), + dataset=input.dataset, + out_dir=Path(input.output_root), + n_processes=input.n_processes, + ppg_encoder_model_fpath=Path(input.extractor.value), + speaker_encoder_model=Path(input.encoder.value) + ) + # TODO: pass useful return code + return Output(__root__=(input.dataset, finished)) \ No newline at end of file diff --git a/ppg2mel/preprocess.py b/ppg2mel/preprocess.py index 6da9054..0feee6e 100644 --- a/ppg2mel/preprocess.py +++ b/ppg2mel/preprocess.py @@ -110,3 +110,4 @@ def preprocess_dataset(datasets_root, dataset, out_dir, n_processes, ppg_encoder t_fid_file.close() d_fid_file.close() e_fid_file.close() + return len(wav_file_list)