Skip earlier

pull/950/head
babysor00 2023-08-29 19:42:48 +08:00
parent af1426a610
commit a7b0431afe
1 changed files with 11 additions and 7 deletions

View File

@ -39,6 +39,9 @@ data_info = {
}
}
def should_skip(fpath: Path, skip_existing: bool) -> bool:
return skip_existing and fpath.exists()
def preprocess_dataset(datasets_root: Path, out_dir: Path, n_processes: int,
skip_existing: bool, hparams, no_alignments: bool,
dataset: str, emotion_extract = False, encoder_model_fpath=None):
@ -100,7 +103,7 @@ def preprocess_dataset(datasets_root: Path, out_dir: Path, n_processes: int,
print("Max audio timesteps length: %d" % max(int(m[3]) for m in metadata))
def embed_utterance(fpaths: str, encoder_model_fpath: str, skip_existing: bool):
if skip_existing and fpaths.exists():
if should_skip(fpaths, skip_existing):
return
if not encoder.is_loaded():
encoder.load_model(encoder_model_fpath)
@ -127,14 +130,15 @@ def create_embeddings(synthesizer_root: Path, encoder_model_fpath: Path, n_proce
embed_dir = synthesizer_root.joinpath("embeds")
embed_dir.mkdir(exist_ok=True)
# Gather the input wave filepath and the target output embed filepath
with metadata_fpath.open("r", encoding="utf-8") as metadata_file:
metadata = [line.split("|") for line in metadata_file]
fpaths = [(wav_dir.joinpath(m[0]), embed_dir.joinpath(m[2])) for m in metadata]
fpaths = [(wav_dir.joinpath(m[0]), embed_dir.joinpath(m[2])) for m in metadata if not should_skip(embed_dir.joinpath(m[2]), skip_existing)]
# TODO: improve on the multiprocessing, it's terrible. Disk I/O is the bottleneck here.
# Embed the utterances in separate threads
func = partial(embed_utterance, encoder_model_fpath=encoder_model_fpath, skip_existing=skip_existing)
func = partial(embed_utterance, encoder_model_fpath=encoder_model_fpath)
job = Pool(n_processes).imap(func, fpaths)
tuple(tqdm(job, "Embedding", len(fpaths), unit="utterances"))
@ -144,14 +148,14 @@ def create_emo(synthesizer_root: Path, n_processes: int, skip_existing: bool, hp
assert wav_dir.exists() and metadata_fpath.exists()
emo_dir = synthesizer_root.joinpath("emo")
emo_dir.mkdir(exist_ok=True)
# Gather the input wave filepath and the target output embed filepath
with metadata_fpath.open("r", encoding="utf-8") as metadata_file:
metadata = [line.split("|") for line in metadata_file]
fpaths = [(wav_dir.joinpath(m[0]), emo_dir.joinpath(m[0].replace("audio-", "emo-"))) for m in metadata]
fpaths = [(wav_dir.joinpath(m[0]), emo_dir.joinpath(m[0].replace("audio-", "emo-"))) for m in metadata if not should_skip(emo_dir.joinpath(m[0].replace("audio-", "emo-")), skip_existing)]
# TODO: improve on the multiprocessing, it's terrible. Disk I/O is the bottleneck here.
# Embed the utterances in separate threads
func = partial(_emo_extract_from_utterance, hparams=hparams, skip_existing=skip_existing)
func = partial(_emo_extract_from_utterance, hparams=hparams)
job = Pool(n_processes).imap(func, fpaths)
tuple(tqdm(job, "Emo", len(fpaths), unit="utterances"))