From beec0b93ed44d0e616da2810da0157cfd89971ae Mon Sep 17 00:00:00 2001
From: babysor00 <babysor00@gmail.com>
Date: Sat, 4 Feb 2023 17:00:49 +0800
Subject: [PATCH] Fix issues

---
 utils/audio_utils.py | 16 ++++------------
 vits.ipynb           | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/utils/audio_utils.py b/utils/audio_utils.py
index bed38b5..dee34d1 100644
--- a/utils/audio_utils.py
+++ b/utils/audio_utils.py
@@ -68,20 +68,12 @@ def mel_spectrogram(
     if torch.max(y) > 1.:
         print('max value is ', torch.max(y))
 
-    # global mel_basis, hann_window
-    # if fmax not in mel_basis:
-    #     mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax)
-    #     mel_basis[str(fmax)+'_'+str(y.device)] = torch.from_numpy(mel).float().to(y.device)
-    #     hann_window[str(y.device)] = torch.hann_window(win_size).to(y.device)
     global mel_basis, hann_window
-    dtype_device = str(y.dtype) + '_' + str(y.device)
-    fmax_dtype_device = str(fmax) + '_' + dtype_device
-    wnsize_dtype_device = str(win_size) + '_' + dtype_device
-    if fmax_dtype_device not in mel_basis:
+    if fmax not in mel_basis:
         mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax)
-        mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=y.dtype, device=y.device)
-    if wnsize_dtype_device not in hann_window:
-        hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device)
+        mel_basis[str(fmax)+'_'+str(y.device)] = torch.from_numpy(mel).float().to(y.device)
+        hann_window[str(y.device)] = torch.hann_window(win_size).to(y.device)
+   
 
     y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
     y = y.squeeze(1)
diff --git a/vits.ipynb b/vits.ipynb
index c0ff3e6..cd01684 100644
--- a/vits.ipynb
+++ b/vits.ipynb
@@ -377,6 +377,44 @@
     "    metadata_file.write(new_info)\n",
     "metadata_file.close()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import os\n",
+    "import shutil\n",
+    "emo_root = Path('../audiodata/SV2TTS/synthesizer').joinpath('emo')\n",
+    "# raw_root = Path('../audiodata/aidatatang_200zh/corpus/train')\n",
+    "# emo_file_list = emo_root.glob(\"**/*.npy\")\n",
+    "# for emo_file in emo_file_list:\n",
+    "#     if emo_file.name.endswith('wav__00.npy'):\n",
+    "#         folder = emo_file.parent\n",
+    "#         os.rename(emo_file, folder.joinpath(emo_file.name.replace(\"__00\", \"_00\")))\n",
+    "    # shutil.move(emo_file, emo_root.joinpath(emo_file.name))\n",
+    "\n",
+    "root = Path('../audiodata/SV2TTS/synthesizer')\n",
+    "dict_info = []\n",
+    "with open(root.joinpath(\"train.txt\"), \"r\", encoding=\"utf-8\") as dict_meta:\n",
+    "    for raw in dict_meta:\n",
+    "        if not raw:\n",
+    "            continue\n",
+    "        v = raw.split(\"|\")[0].replace(\"audio\",\"emo\")\n",
+    "        emo_fpath = root.joinpath(\"emo\").joinpath(v)\n",
+    "        if emo_fpath.exists():\n",
+    "            dict_info.append(raw)\n",
+    "        # else:\n",
+    "        #     print(emo_fpath)\n",
+    "# Iterate over each wav\n",
+    "meta2 = Path('../audiodata/SV2TTS/synthesizer/train2.txt')\n",
+    "metadata_file = meta2.open(\"w\", encoding=\"utf-8\")\n",
+    "for new_info in dict_info:\n",
+    "    metadata_file.write(new_info)\n",
+    "metadata_file.close()"
+   ]
   }
  ],
  "metadata": {