From ad22997614a80d26ac26fc36d667c4e513a1180c Mon Sep 17 00:00:00 2001
From: AyahaShirane <83604606+AyahaShirane@users.noreply.github.com>
Date: Sun, 27 Feb 2022 11:02:01 +0800
Subject: [PATCH] fixed the issues #372 (#379)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

修复了一些参数传递造成的问题，把过时的torch.nn.functional.tanh()改成了torch.tanh()
---
 synthesizer/audio.py                       | 2 +-
 synthesizer/inference.py                   | 2 +-
 synthesizer/models/global_style_token.py   | 2 +-
 synthesizer/preprocess_speaker.py          | 2 +-
 vocoder/wavernn/models/deepmind_version.py | 6 +++---
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/synthesizer/audio.py b/synthesizer/audio.py
index 83dc96c..2e03ae5 100644
--- a/synthesizer/audio.py
+++ b/synthesizer/audio.py
@@ -167,7 +167,7 @@ def _mel_to_linear(mel_spectrogram, hparams):
 
 def _build_mel_basis(hparams):
     assert hparams.fmax <= hparams.sample_rate // 2
-    return librosa.filters.mel(hparams.sample_rate, hparams.n_fft, n_mels=hparams.num_mels,
+    return librosa.filters.mel(sr=hparams.sample_rate, n_fft=hparams.n_fft, n_mels=hparams.num_mels,
                                fmin=hparams.fmin, fmax=hparams.fmax)
 
 def _amp_to_db(x, hparams):
diff --git a/synthesizer/inference.py b/synthesizer/inference.py
index 2b4d15b..3ff856b 100644
--- a/synthesizer/inference.py
+++ b/synthesizer/inference.py
@@ -149,7 +149,7 @@ class Synthesizer:
         Loads and preprocesses an audio file under the same conditions the audio files were used to
         train the synthesizer. 
         """
-        wav = librosa.load(str(fpath), hparams.sample_rate)[0]
+        wav = librosa.load(path=str(fpath), sr=hparams.sample_rate)[0]
         if hparams.rescale:
             wav = wav / np.abs(wav).max() * hparams.rescaling_max
         # denoise
diff --git a/synthesizer/models/global_style_token.py b/synthesizer/models/global_style_token.py
index 229b9ef..21ce07e 100644
--- a/synthesizer/models/global_style_token.py
+++ b/synthesizer/models/global_style_token.py
@@ -97,7 +97,7 @@ class STL(nn.Module):
     def forward(self, inputs):
         N = inputs.size(0)
         query = inputs.unsqueeze(1)  # [N, 1, E//2]
-        keys = tFunctional.tanh(self.embed).unsqueeze(0).expand(N, -1, -1)  # [N, token_num, E // num_heads]
+        keys = torch.tanh(self.embed).unsqueeze(0).expand(N, -1, -1)  # [N, token_num, E // num_heads]
         style_embed = self.attention(query, keys)
 
         return style_embed
diff --git a/synthesizer/preprocess_speaker.py b/synthesizer/preprocess_speaker.py
index 88fad38..28ddad4 100644
--- a/synthesizer/preprocess_speaker.py
+++ b/synthesizer/preprocess_speaker.py
@@ -63,7 +63,7 @@ def _process_utterance(wav: np.ndarray, text: str, out_dir: Path, basename: str,
 
 def _split_on_silences(wav_fpath, words, hparams):
     # Load the audio waveform
-    wav, _ = librosa.load(wav_fpath, hparams.sample_rate)
+    wav, _ = librosa.load(wav_fpath, sr= hparams.sample_rate)
     wav = librosa.effects.trim(wav, top_db= 40, frame_length=2048, hop_length=512)[0]
     if hparams.rescale:
         wav = wav / np.abs(wav).max() * hparams.rescaling_max
diff --git a/vocoder/wavernn/models/deepmind_version.py b/vocoder/wavernn/models/deepmind_version.py
index 1d973d9..17b33b2 100644
--- a/vocoder/wavernn/models/deepmind_version.py
+++ b/vocoder/wavernn/models/deepmind_version.py
@@ -59,7 +59,7 @@ class WaveRNN(nn.Module) :
         # Compute all gates for coarse and fine 
         u = F.sigmoid(R_u + I_u + self.bias_u)
         r = F.sigmoid(R_r + I_r + self.bias_r)
-        e = F.tanh(r * R_e + I_e + self.bias_e)
+        e = torch.tanh(r * R_e + I_e + self.bias_e)
         hidden = u * prev_hidden + (1. - u) * e
         
         # Split the hidden state
@@ -118,7 +118,7 @@ class WaveRNN(nn.Module) :
                 # Compute the coarse gates
                 u = F.sigmoid(R_coarse_u + I_coarse_u + b_coarse_u)
                 r = F.sigmoid(R_coarse_r + I_coarse_r + b_coarse_r)
-                e = F.tanh(r * R_coarse_e + I_coarse_e + b_coarse_e)
+                e = torch.tanh(r * R_coarse_e + I_coarse_e + b_coarse_e)
                 hidden_coarse = u * hidden_coarse + (1. - u) * e
 
                 # Compute the coarse output
@@ -138,7 +138,7 @@ class WaveRNN(nn.Module) :
                 # Compute the fine gates
                 u = F.sigmoid(R_fine_u + I_fine_u + b_fine_u)
                 r = F.sigmoid(R_fine_r + I_fine_r + b_fine_r)
-                e = F.tanh(r * R_fine_e + I_fine_e + b_fine_e)
+                e = torch.tanh(r * R_fine_e + I_fine_e + b_fine_e)
                 hidden_fine = u * hidden_fine + (1. - u) * e
 
                 # Compute the fine output