Add preprocessing mode

2024-03-22 13:11:31 +08:00 · 2022-05-01 16:42:11 +08:00 · 2022-05-01 16:42:11 +08:00 · 70cc3988d3
commit 70cc3988d3
parent c5998bfe71
8 changed files with 132 additions and 13 deletions
--- a/mkgui/app.py
+++ b/mkgui/app.py
@ -86,7 +86,7 @@ class Output(BaseModel):
        streamlit_app.pyplot(fig)


-def main(input: Input) -> Output:
+def synthesize(input: Input) -> Output:
    """synthesize(合成)"""
    # load models
    encoder.load_model(Path(input.encoder.value))
--- a/mkgui/app_vc.py
+++ b/mkgui/app_vc.py
@ -101,7 +101,7 @@ class Output(BaseModel):
        ax.set_title("mel spectrogram(Result Audio)")
        streamlit_app.pyplot(fig)

-def main(input: Input) -> Output:
+def convert(input: Input) -> Output:
    """convert(转换)"""
    # load models
    extractor = Extractor.load_model(Path(input.extractor.value))
--- a/mkgui/base/components/types.py
+++ b/mkgui/base/components/types.py
@ -1,5 +1,5 @@
 import base64
-from typing import Any, Dict
+from typing import Any, Dict, overload


 class FileContent(str):
@ -27,3 +27,20 @@ class FileContent(str):
            return FileContent(base64.b64encode(value).decode())
        else:
            raise Exception("Wrong type")
+
+# # 暂时无法使用，因为浏览器中没有考虑选择文件夹
+# class DirectoryContent(FileContent):
+#     @classmethod
+#     def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
+#         field_schema.update(format="path")
+
+#     @classmethod
+#     def validate(cls, value: Any) -> "DirectoryContent":
+#         if isinstance(value, DirectoryContent):
+#             return value
+#         elif isinstance(value, str):
+#             return DirectoryContent(value)
+#         elif isinstance(value, (bytes, bytearray, memoryview)):
+#             return DirectoryContent(base64.b64encode(value).decode())
+#         else:
+#             raise Exception("Wrong type")
--- a/mkgui/base/core.py
+++ b/mkgui/base/core.py
@ -112,7 +112,6 @@ class Opyrator:
        else:
            self.function = func

-        self._name = "Opyrator"
        self._action = "Execute"
        self._input_type = None
        self._output_type = None
--- a/mkgui/base/ui/schema_utils.py
+++ b/mkgui/base/ui/schema_utils.py
@ -38,6 +38,11 @@ def is_single_file_property(property: Dict) -> bool:
    return property.get("format") == "byte"


+def is_single_directory_property(property: Dict) -> bool:
+    if property.get("type") != "string":
+        return False
+    return property.get("format") == "path"
+
 def is_multi_enum_property(property: Dict, references: Dict) -> bool:
    if property.get("type") != "array":
        return False
--- a/mkgui/base/ui/streamlit_ui.py
+++ b/mkgui/base/ui/streamlit_ui.py
@ -110,7 +110,7 @@ class InputUI:
            # The rendering also returns the current state of input data
            self._session_state.input_data = self._input_class.render_input_ui(  # type: ignore
                st, self._session_state.input_data
-            ).dict()
+            )
            return

        # print(self._schema_properties)
@ -802,25 +802,31 @@ class OutputUI:

 def getOpyrator(mode: str) -> Opyrator:
    if mode == None or mode.startswith('VC'):
-        from mkgui.app_vc import main
-        return  Opyrator(main)
-    from mkgui.app import main
-    return Opyrator(main)
+        from mkgui.app_vc import convert
+        return  Opyrator(convert)
+    if mode == None or mode.startswith('预处理'):
+        from mkgui.preprocess import preprocess
+        return  Opyrator(preprocess)
+    from mkgui.app import synthesize
+    return Opyrator(synthesize)
    

 def render_streamlit_ui() -> None:
    # init
    session_state = st.session_state
    session_state.input_data = {}
-    session_state.mode = None

    with st.spinner("Loading MockingBird GUI. Please wait..."):
        session_state.mode = st.sidebar.selectbox(
            '模式选择', 
-            ("AI拟音", "VC拟音")
+            ( "AI拟音", "VC拟音", "预处理")
        )
-        opyrator = getOpyrator(session_state.mode)
-    title = opyrator.name
+        if "mode" in session_state:
+            mode = session_state.mode
+        else:
+            mode = ""
+        opyrator = getOpyrator(mode)
+    title = opyrator.name + mode

    col1, col2, _ = st.columns(3)
    col2.title(title)
--- a/mkgui/preprocess.py
+++ b/mkgui/preprocess.py
@ -0,0 +1,91 @@
+from pydantic import BaseModel, Field
+import os
+from pathlib import Path
+from enum import Enum
+from typing import Any
+
+
+# Constants
+EXT_MODELS_DIRT = "ppg_extractor\\saved_models"
+ENC_MODELS_DIRT = "encoder\\saved_models"
+
+
+if os.path.isdir(EXT_MODELS_DIRT):    
+    extractors =  Enum('extractors', list((file.name, file) for file in Path(EXT_MODELS_DIRT).glob("**/*.pt")))
+    print("Loaded extractor models: " + str(len(extractors)))
+if os.path.isdir(ENC_MODELS_DIRT):    
+    encoders = Enum('encoders', list((file.name, file) for file in Path(ENC_MODELS_DIRT).glob("**/*.pt")))
+    print("Loaded encoders models: " + str(len(encoders)))
+
+class Model(str, Enum):
+    VC_PPG2MEL = "ppg2mel"
+
+class Dataset(str, Enum):
+    AIDATATANG_200ZH = "aidatatang_200zh"
+    AIDATATANG_200ZH_S = "aidatatang_200zh_s"
+
+class Input(BaseModel):
+    # def render_input_ui(st, input) -> Dict: 
+    #     input["selected_dataset"] = st.selectbox(
+    #         '选择数据集', 
+    #         ("aidatatang_200zh", "aidatatang_200zh_s")
+    #     )
+    # return input
+    model: Model = Field(
+        Model.VC_PPG2MEL, title="目标模型",
+    )
+    dataset: Dataset = Field(
+        Dataset.AIDATATANG_200ZH, title="数据集选择",
+    )
+    datasets_root: str = Field(
+        ..., alias="数据集根目录", description="输入数据集根目录（相对/绝对）",
+        format=True,
+        example="..\\trainning_data\\"
+    )
+    output_root: str = Field(
+        ..., alias="输出根目录", description="输出结果根目录（相对/绝对）",
+        format=True,
+        example="..\\trainning_data\\"
+    )
+    n_processes: int = Field(   
+        2, alias="处理线程数", description="根据CPU线程数来设置",
+        le=32, ge=1
+    )
+    extractor: extractors = Field(
+        ..., alias="特征提取模型", 
+        description="选择PPG特征提取模型文件."
+    )
+    encoder: encoders = Field(
+        ..., alias="语音编码模型", 
+        description="选择语音编码模型文件."
+    )
+
+class AudioEntity(BaseModel):
+    content: bytes
+    mel: Any
+
+class Output(BaseModel):
+    __root__: tuple[str, int]
+
+    def render_output_ui(self, streamlit_app, input) -> None:  # type: ignore
+        """Custom output UI.
+        If this method is implmeneted, it will be used instead of the default Output UI renderer.
+        """
+        sr, count = self.__root__
+        streamlit_app.subheader(f"Dataset {sr} done processed total of {count}")
+
+def preprocess(input: Input) -> Output:
+    """Preprocess(预处理)"""
+    finished = 0
+    if input.model == Model.VC_PPG2MEL:
+        from ppg2mel.preprocess import preprocess_dataset
+        finished = preprocess_dataset(
+            datasets_root=Path(input.datasets_root),
+            dataset=input.dataset,
+            out_dir=Path(input.output_root),
+            n_processes=input.n_processes,
+            ppg_encoder_model_fpath=Path(input.extractor.value),
+            speaker_encoder_model=Path(input.encoder.value)
+        )
+    # TODO: pass useful return code
+    return Output(__root__=(input.dataset, finished))
--- a/ppg2mel/preprocess.py
+++ b/ppg2mel/preprocess.py
@ -110,3 +110,4 @@ def preprocess_dataset(datasets_root, dataset, out_dir, n_processes, ppg_encoder
    t_fid_file.close()
    d_fid_file.close()
    e_fid_file.close()
+    return len(wav_file_list)