From beec0b93ed44d0e616da2810da0157cfd89971ae Mon Sep 17 00:00:00 2001 From: babysor00 Date: Sat, 4 Feb 2023 17:00:49 +0800 Subject: [PATCH] Fix issues --- utils/audio_utils.py | 16 ++++------------ vits.ipynb | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/utils/audio_utils.py b/utils/audio_utils.py index bed38b5..dee34d1 100644 --- a/utils/audio_utils.py +++ b/utils/audio_utils.py @@ -68,20 +68,12 @@ def mel_spectrogram( if torch.max(y) > 1.: print('max value is ', torch.max(y)) - # global mel_basis, hann_window - # if fmax not in mel_basis: - # mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax) - # mel_basis[str(fmax)+'_'+str(y.device)] = torch.from_numpy(mel).float().to(y.device) - # hann_window[str(y.device)] = torch.hann_window(win_size).to(y.device) global mel_basis, hann_window - dtype_device = str(y.dtype) + '_' + str(y.device) - fmax_dtype_device = str(fmax) + '_' + dtype_device - wnsize_dtype_device = str(win_size) + '_' + dtype_device - if fmax_dtype_device not in mel_basis: + if fmax not in mel_basis: mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax) - mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=y.dtype, device=y.device) - if wnsize_dtype_device not in hann_window: - hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) + mel_basis[str(fmax)+'_'+str(y.device)] = torch.from_numpy(mel).float().to(y.device) + hann_window[str(y.device)] = torch.hann_window(win_size).to(y.device) + y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect') y = y.squeeze(1) diff --git a/vits.ipynb b/vits.ipynb index c0ff3e6..cd01684 100644 --- a/vits.ipynb +++ b/vits.ipynb @@ -377,6 +377,44 @@ " metadata_file.write(new_info)\n", "metadata_file.close()" ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import os\n", + "import shutil\n", + "emo_root = Path('../audiodata/SV2TTS/synthesizer').joinpath('emo')\n", + "# raw_root = Path('../audiodata/aidatatang_200zh/corpus/train')\n", + "# emo_file_list = emo_root.glob(\"**/*.npy\")\n", + "# for emo_file in emo_file_list:\n", + "# if emo_file.name.endswith('wav__00.npy'):\n", + "# folder = emo_file.parent\n", + "# os.rename(emo_file, folder.joinpath(emo_file.name.replace(\"__00\", \"_00\")))\n", + " # shutil.move(emo_file, emo_root.joinpath(emo_file.name))\n", + "\n", + "root = Path('../audiodata/SV2TTS/synthesizer')\n", + "dict_info = []\n", + "with open(root.joinpath(\"train.txt\"), \"r\", encoding=\"utf-8\") as dict_meta:\n", + " for raw in dict_meta:\n", + " if not raw:\n", + " continue\n", + " v = raw.split(\"|\")[0].replace(\"audio\",\"emo\")\n", + " emo_fpath = root.joinpath(\"emo\").joinpath(v)\n", + " if emo_fpath.exists():\n", + " dict_info.append(raw)\n", + " # else:\n", + " # print(emo_fpath)\n", + "# Iterate over each wav\n", + "meta2 = Path('../audiodata/SV2TTS/synthesizer/train2.txt')\n", + "metadata_file = meta2.open(\"w\", encoding=\"utf-8\")\n", + "for new_info in dict_info:\n", + " metadata_file.write(new_info)\n", + "metadata_file.close()" + ] } ], "metadata": {