diff --git a/.gitignore b/.gitignore index 486b8fd..70a9b93 100644 --- a/.gitignore +++ b/.gitignore @@ -17,5 +17,7 @@ *.sh synthesizer/saved_models/* vocoder/saved_models/* +encoder/saved_models/* cp_hifigan/* -!vocoder/saved_models/pretrained/* \ No newline at end of file +!vocoder/saved_models/pretrained/* +!encoder/saved_models/pretrained.pt \ No newline at end of file diff --git a/encoder/preprocess.py b/encoder/preprocess.py index 551a8b2..69986bb 100644 --- a/encoder/preprocess.py +++ b/encoder/preprocess.py @@ -117,6 +117,15 @@ def _preprocess_speaker_dirs(speaker_dirs, dataset_name, datasets_root, out_dir, logger.finalize() print("Done preprocessing %s.\n" % dataset_name) +def preprocess_aidatatang_200zh(datasets_root: Path, out_dir: Path, skip_existing=False): + dataset_name = "aidatatang_200zh" + dataset_root, logger = _init_preprocess_dataset(dataset_name, datasets_root, out_dir) + if not dataset_root: + return + # Preprocess all speakers + speaker_dirs = list(dataset_root.joinpath("corpus", "train").glob("*")) + _preprocess_speaker_dirs(speaker_dirs, dataset_name, datasets_root, out_dir, "wav", + skip_existing, logger) def preprocess_librispeech(datasets_root: Path, out_dir: Path, skip_existing=False): for dataset_name in librispeech_datasets["train"]["other"]: diff --git a/encoder/saved_models/pretrained.pt b/encoder/saved_models/pretrained.pt index 47e405a..e817ffe 100644 Binary files a/encoder/saved_models/pretrained.pt and b/encoder/saved_models/pretrained.pt differ diff --git a/archived_untest_files/encoder_preprocess.py b/encoder_preprocess.py similarity index 78% rename from archived_untest_files/encoder_preprocess.py rename to encoder_preprocess.py index 1150201..853c6cb 100644 --- a/archived_untest_files/encoder_preprocess.py +++ b/encoder_preprocess.py @@ -1,4 +1,4 @@ -from encoder.preprocess import preprocess_librispeech, preprocess_voxceleb1, preprocess_voxceleb2 +from encoder.preprocess import preprocess_librispeech, preprocess_voxceleb1, preprocess_voxceleb2, preprocess_aidatatang_200zh from utils.argutils import print_args from pathlib import Path import argparse @@ -10,17 +10,7 @@ if __name__ == "__main__": parser = argparse.ArgumentParser( description="Preprocesses audio files from datasets, encodes them as mel spectrograms and " "writes them to the disk. This will allow you to train the encoder. The " - "datasets required are at least one of VoxCeleb1, VoxCeleb2 and LibriSpeech. " - "Ideally, you should have all three. You should extract them as they are " - "after having downloaded them and put them in a same directory, e.g.:\n" - "-[datasets_root]\n" - " -LibriSpeech\n" - " -train-other-500\n" - " -VoxCeleb1\n" - " -wav\n" - " -vox1_meta.csv\n" - " -VoxCeleb2\n" - " -dev", + "datasets required are at least one of LibriSpeech, VoxCeleb1, VoxCeleb2, aidatatang_200zh. ", formatter_class=MyFormatter ) parser.add_argument("datasets_root", type=Path, help=\ @@ -29,7 +19,7 @@ if __name__ == "__main__": "Path to the output directory that will contain the mel spectrograms. If left out, " "defaults to /SV2TTS/encoder/") parser.add_argument("-d", "--datasets", type=str, - default="librispeech_other,voxceleb1,voxceleb2", help=\ + default="librispeech_other,voxceleb1,aidatatang_200zh", help=\ "Comma-separated list of the name of the datasets you want to preprocess. Only the train " "set of these datasets will be used. Possible names: librispeech_other, voxceleb1, " "voxceleb2.") @@ -63,6 +53,7 @@ if __name__ == "__main__": "librispeech_other": preprocess_librispeech, "voxceleb1": preprocess_voxceleb1, "voxceleb2": preprocess_voxceleb2, + "aidatatang_200zh": preprocess_aidatatang_200zh, } args = vars(args) for dataset in args.pop("datasets"): diff --git a/archived_untest_files/encoder_train.py b/encoder_train.py similarity index 100% rename from archived_untest_files/encoder_train.py rename to encoder_train.py