Fix issues

2024-03-22 13:11:31 +08:00 · 2023-02-04 17:00:49 +08:00 · 2023-02-04 17:00:49 +08:00 · beec0b93ed
commit beec0b93ed
parent 9d67b757f0
2 changed files with 42 additions and 12 deletions
--- a/utils/audio_utils.py
+++ b/utils/audio_utils.py
@ -68,20 +68,12 @@ def mel_spectrogram(
    if torch.max(y) > 1.:
        print('max value is ', torch.max(y))
    # global mel_basis, hann_window
    # if fmax not in mel_basis:
    #     mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax)
    #     mel_basis[str(fmax)+'_'+str(y.device)] = torch.from_numpy(mel).float().to(y.device)
    #     hann_window[str(y.device)] = torch.hann_window(win_size).to(y.device)
    global mel_basis, hann_window
-    dtype_device = str(y.dtype) + '_' + str(y.device)
+    if fmax not in mel_basis:
    fmax_dtype_device = str(fmax) + '_' + dtype_device
    wnsize_dtype_device = str(win_size) + '_' + dtype_device
    if fmax_dtype_device not in mel_basis:
        mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax)
-        mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=y.dtype, device=y.device)
+        mel_basis[str(fmax)+'_'+str(y.device)] = torch.from_numpy(mel).float().to(y.device)
-    if wnsize_dtype_device not in hann_window:
+        hann_window[str(y.device)] = torch.hann_window(win_size).to(y.device)
-        hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device)
+   
    y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
    y = y.squeeze(1)
--- a/vits.ipynb
+++ b/vits.ipynb
@ -377,6 +377,44 @@
    "    metadata_file.write(new_info)\n",
    "metadata_file.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "import os\n",
    "import shutil\n",
    "emo_root = Path('../audiodata/SV2TTS/synthesizer').joinpath('emo')\n",
    "# raw_root = Path('../audiodata/aidatatang_200zh/corpus/train')\n",
    "# emo_file_list = emo_root.glob(\"**/*.npy\")\n",
    "# for emo_file in emo_file_list:\n",
    "#     if emo_file.name.endswith('wav__00.npy'):\n",
    "#         folder = emo_file.parent\n",
    "#         os.rename(emo_file, folder.joinpath(emo_file.name.replace(\"__00\", \"_00\")))\n",
    "    # shutil.move(emo_file, emo_root.joinpath(emo_file.name))\n",
    "\n",
    "root = Path('../audiodata/SV2TTS/synthesizer')\n",
    "dict_info = []\n",
    "with open(root.joinpath(\"train.txt\"), \"r\", encoding=\"utf-8\") as dict_meta:\n",
    "    for raw in dict_meta:\n",
    "        if not raw:\n",
    "            continue\n",
    "        v = raw.split(\"|\")[0].replace(\"audio\",\"emo\")\n",
    "        emo_fpath = root.joinpath(\"emo\").joinpath(v)\n",
    "        if emo_fpath.exists():\n",
    "            dict_info.append(raw)\n",
    "        # else:\n",
    "        #     print(emo_fpath)\n",
    "# Iterate over each wav\n",
    "meta2 = Path('../audiodata/SV2TTS/synthesizer/train2.txt')\n",
    "metadata_file = meta2.open(\"w\", encoding=\"utf-8\")\n",
    "for new_info in dict_info:\n",
    "    metadata_file.write(new_info)\n",
    "metadata_file.close()"
   ]
  }
 ],
 "metadata": {