Skip earlier

2024-03-22 13:11:31 +08:00 · 2023-08-29 19:42:48 +08:00 · 2023-08-29 19:42:48 +08:00 · a7b0431afe
commit a7b0431afe
parent af1426a610
1 changed files with 11 additions and 7 deletions
--- a/models/synthesizer/preprocess.py
+++ b/models/synthesizer/preprocess.py
@ -39,6 +39,9 @@ data_info = {
    }
 }
 def should_skip(fpath: Path, skip_existing: bool) -> bool:
    return skip_existing and fpath.exists()
 def preprocess_dataset(datasets_root: Path, out_dir: Path, n_processes: int,
                           skip_existing: bool, hparams, no_alignments: bool, 
                           dataset: str, emotion_extract = False, encoder_model_fpath=None):
@ -100,7 +103,7 @@ def preprocess_dataset(datasets_root: Path, out_dir: Path, n_processes: int,
    print("Max audio timesteps length: %d" % max(int(m[3]) for m in metadata))
 def embed_utterance(fpaths: str, encoder_model_fpath: str, skip_existing: bool):
-    if skip_existing and fpaths.exists():
+    if should_skip(fpaths, skip_existing):
        return
    if not encoder.is_loaded():
        encoder.load_model(encoder_model_fpath)
@ -127,14 +130,15 @@ def create_embeddings(synthesizer_root: Path, encoder_model_fpath: Path, n_proce
    embed_dir = synthesizer_root.joinpath("embeds")
    embed_dir.mkdir(exist_ok=True)
    # Gather the input wave filepath and the target output embed filepath
    with metadata_fpath.open("r", encoding="utf-8") as metadata_file:
        metadata = [line.split("|") for line in metadata_file]
-        fpaths = [(wav_dir.joinpath(m[0]), embed_dir.joinpath(m[2])) for m in metadata]
+        fpaths = [(wav_dir.joinpath(m[0]), embed_dir.joinpath(m[2])) for m in metadata if not should_skip(embed_dir.joinpath(m[2]), skip_existing)]
-        
+
    # TODO: improve on the multiprocessing, it's terrible. Disk I/O is the bottleneck here.
    # Embed the utterances in separate threads
-    func = partial(embed_utterance, encoder_model_fpath=encoder_model_fpath, skip_existing=skip_existing)
+    func = partial(embed_utterance, encoder_model_fpath=encoder_model_fpath)
    job = Pool(n_processes).imap(func, fpaths)
    tuple(tqdm(job, "Embedding", len(fpaths), unit="utterances"))
@ -144,14 +148,14 @@ def create_emo(synthesizer_root: Path, n_processes: int, skip_existing: bool, hp
    assert wav_dir.exists() and metadata_fpath.exists()
    emo_dir = synthesizer_root.joinpath("emo")
    emo_dir.mkdir(exist_ok=True)
-    
+
    # Gather the input wave filepath and the target output embed filepath
    with metadata_fpath.open("r", encoding="utf-8") as metadata_file:
        metadata = [line.split("|") for line in metadata_file]
-        fpaths = [(wav_dir.joinpath(m[0]), emo_dir.joinpath(m[0].replace("audio-", "emo-"))) for m in metadata]
+        fpaths = [(wav_dir.joinpath(m[0]), emo_dir.joinpath(m[0].replace("audio-", "emo-"))) for m in metadata if not should_skip(emo_dir.joinpath(m[0].replace("audio-", "emo-")), skip_existing)]
    # TODO: improve on the multiprocessing, it's terrible. Disk I/O is the bottleneck here.
    # Embed the utterances in separate threads
-    func = partial(_emo_extract_from_utterance, hparams=hparams, skip_existing=skip_existing)
+    func = partial(_emo_extract_from_utterance, hparams=hparams)
    job = Pool(n_processes).imap(func, fpaths)
    tuple(tqdm(job, "Emo", len(fpaths), unit="utterances"))