fixed the issues #372 (#379)

修复了一些参数传递造成的问题，把过时的torch.nn.functional.tanh()改成了torch.tanh()
2024-03-22 13:11:31 +08:00 · 2022-02-27 11:02:01 +08:00 · 2022-02-27 11:02:01 +08:00 · ad22997614
commit ad22997614
parent 9e072c2619
5 changed files with 7 additions and 7 deletions
--- a/synthesizer/audio.py
+++ b/synthesizer/audio.py
@ -167,7 +167,7 @@ def _mel_to_linear(mel_spectrogram, hparams):

 def _build_mel_basis(hparams):
    assert hparams.fmax <= hparams.sample_rate // 2
-    return librosa.filters.mel(hparams.sample_rate, hparams.n_fft, n_mels=hparams.num_mels,
+    return librosa.filters.mel(sr=hparams.sample_rate, n_fft=hparams.n_fft, n_mels=hparams.num_mels,
                               fmin=hparams.fmin, fmax=hparams.fmax)

 def _amp_to_db(x, hparams):
--- a/synthesizer/inference.py
+++ b/synthesizer/inference.py
@ -149,7 +149,7 @@ class Synthesizer:
        Loads and preprocesses an audio file under the same conditions the audio files were used to
        train the synthesizer. 
        """
-        wav = librosa.load(str(fpath), hparams.sample_rate)[0]
+        wav = librosa.load(path=str(fpath), sr=hparams.sample_rate)[0]
        if hparams.rescale:
            wav = wav / np.abs(wav).max() * hparams.rescaling_max
        # denoise
--- a/synthesizer/models/global_style_token.py
+++ b/synthesizer/models/global_style_token.py
@ -97,7 +97,7 @@ class STL(nn.Module):
    def forward(self, inputs):
        N = inputs.size(0)
        query = inputs.unsqueeze(1)  # [N, 1, E//2]
-        keys = tFunctional.tanh(self.embed).unsqueeze(0).expand(N, -1, -1)  # [N, token_num, E // num_heads]
+        keys = torch.tanh(self.embed).unsqueeze(0).expand(N, -1, -1)  # [N, token_num, E // num_heads]
        style_embed = self.attention(query, keys)

        return style_embed
--- a/synthesizer/preprocess_speaker.py
+++ b/synthesizer/preprocess_speaker.py
@ -63,7 +63,7 @@ def _process_utterance(wav: np.ndarray, text: str, out_dir: Path, basename: str,

 def _split_on_silences(wav_fpath, words, hparams):
    # Load the audio waveform
-    wav, _ = librosa.load(wav_fpath, hparams.sample_rate)
+    wav, _ = librosa.load(wav_fpath, sr= hparams.sample_rate)
    wav = librosa.effects.trim(wav, top_db= 40, frame_length=2048, hop_length=512)[0]
    if hparams.rescale:
        wav = wav / np.abs(wav).max() * hparams.rescaling_max
--- a/vocoder/wavernn/models/deepmind_version.py
+++ b/vocoder/wavernn/models/deepmind_version.py
@ -59,7 +59,7 @@ class WaveRNN(nn.Module) :
        # Compute all gates for coarse and fine 
        u = F.sigmoid(R_u + I_u + self.bias_u)
        r = F.sigmoid(R_r + I_r + self.bias_r)
-        e = F.tanh(r * R_e + I_e + self.bias_e)
+        e = torch.tanh(r * R_e + I_e + self.bias_e)
        hidden = u * prev_hidden + (1. - u) * e
        
        # Split the hidden state
@ -118,7 +118,7 @@ class WaveRNN(nn.Module) :
                # Compute the coarse gates
                u = F.sigmoid(R_coarse_u + I_coarse_u + b_coarse_u)
                r = F.sigmoid(R_coarse_r + I_coarse_r + b_coarse_r)
-                e = F.tanh(r * R_coarse_e + I_coarse_e + b_coarse_e)
+                e = torch.tanh(r * R_coarse_e + I_coarse_e + b_coarse_e)
                hidden_coarse = u * hidden_coarse + (1. - u) * e

                # Compute the coarse output
@ -138,7 +138,7 @@ class WaveRNN(nn.Module) :
                # Compute the fine gates
                u = F.sigmoid(R_fine_u + I_fine_u + b_fine_u)
                r = F.sigmoid(R_fine_r + I_fine_r + b_fine_r)
-                e = F.tanh(r * R_fine_e + I_fine_e + b_fine_e)
+                e = torch.tanh(r * R_fine_e + I_fine_e + b_fine_e)
                hidden_fine = u * hidden_fine + (1. - u) * e

                # Compute the fine output