mirror of
https://github.com/babysor/MockingBird.git
synced 2024-03-22 13:11:31 +08:00
b617a87ee4
* Init ppg extractor and ppg2mel * add preprocess and training * FIx known issues * Update __init__.py Allow to gen audio * Fix length issue * Fix bug of preparing fid * Fix sample issues * Add UI usage of PPG-vc
125 lines
3.5 KiB
Python
125 lines
3.5 KiB
Python
import logging
|
|
import numpy as np
|
|
import pyworld
|
|
from scipy.interpolate import interp1d
|
|
from scipy.signal import firwin, get_window, lfilter
|
|
|
|
def compute_mean_std(lf0):
|
|
nonzero_indices = np.nonzero(lf0)
|
|
mean = np.mean(lf0[nonzero_indices])
|
|
std = np.std(lf0[nonzero_indices])
|
|
return mean, std
|
|
|
|
|
|
def compute_f0(wav, sr=16000, frame_period=10.0):
|
|
"""Compute f0 from wav using pyworld harvest algorithm."""
|
|
wav = wav.astype(np.float64)
|
|
f0, _ = pyworld.harvest(
|
|
wav, sr, frame_period=frame_period, f0_floor=80.0, f0_ceil=600.0)
|
|
return f0.astype(np.float32)
|
|
|
|
def f02lf0(f0):
|
|
lf0 = f0.copy()
|
|
nonzero_indices = np.nonzero(f0)
|
|
lf0[nonzero_indices] = np.log(f0[nonzero_indices])
|
|
return lf0
|
|
|
|
def get_converted_lf0uv(
|
|
wav,
|
|
lf0_mean_trg,
|
|
lf0_std_trg,
|
|
convert=True,
|
|
):
|
|
f0_src = compute_f0(wav)
|
|
if not convert:
|
|
uv, cont_lf0 = get_cont_lf0(f0_src)
|
|
lf0_uv = np.concatenate([cont_lf0[:, np.newaxis], uv[:, np.newaxis]], axis=1)
|
|
return lf0_uv
|
|
|
|
lf0_src = f02lf0(f0_src)
|
|
lf0_mean_src, lf0_std_src = compute_mean_std(lf0_src)
|
|
|
|
lf0_vc = lf0_src.copy()
|
|
lf0_vc[lf0_src > 0.0] = (lf0_src[lf0_src > 0.0] - lf0_mean_src) / lf0_std_src * lf0_std_trg + lf0_mean_trg
|
|
f0_vc = lf0_vc.copy()
|
|
f0_vc[lf0_src > 0.0] = np.exp(lf0_vc[lf0_src > 0.0])
|
|
|
|
uv, cont_lf0_vc = get_cont_lf0(f0_vc)
|
|
lf0_uv = np.concatenate([cont_lf0_vc[:, np.newaxis], uv[:, np.newaxis]], axis=1)
|
|
return lf0_uv
|
|
|
|
def low_pass_filter(x, fs, cutoff=70, padding=True):
|
|
"""FUNCTION TO APPLY LOW PASS FILTER
|
|
|
|
Args:
|
|
x (ndarray): Waveform sequence
|
|
fs (int): Sampling frequency
|
|
cutoff (float): Cutoff frequency of low pass filter
|
|
|
|
Return:
|
|
(ndarray): Low pass filtered waveform sequence
|
|
"""
|
|
|
|
nyquist = fs // 2
|
|
norm_cutoff = cutoff / nyquist
|
|
|
|
# low cut filter
|
|
numtaps = 255
|
|
fil = firwin(numtaps, norm_cutoff)
|
|
x_pad = np.pad(x, (numtaps, numtaps), 'edge')
|
|
lpf_x = lfilter(fil, 1, x_pad)
|
|
lpf_x = lpf_x[numtaps + numtaps // 2: -numtaps // 2]
|
|
|
|
return lpf_x
|
|
|
|
|
|
def convert_continuos_f0(f0):
|
|
"""CONVERT F0 TO CONTINUOUS F0
|
|
|
|
Args:
|
|
f0 (ndarray): original f0 sequence with the shape (T)
|
|
|
|
Return:
|
|
(ndarray): continuous f0 with the shape (T)
|
|
"""
|
|
# get uv information as binary
|
|
uv = np.float32(f0 != 0)
|
|
|
|
# get start and end of f0
|
|
if (f0 == 0).all():
|
|
logging.warn("all of the f0 values are 0.")
|
|
return uv, f0
|
|
start_f0 = f0[f0 != 0][0]
|
|
end_f0 = f0[f0 != 0][-1]
|
|
|
|
# padding start and end of f0 sequence
|
|
start_idx = np.where(f0 == start_f0)[0][0]
|
|
end_idx = np.where(f0 == end_f0)[0][-1]
|
|
f0[:start_idx] = start_f0
|
|
f0[end_idx:] = end_f0
|
|
|
|
# get non-zero frame index
|
|
nz_frames = np.where(f0 != 0)[0]
|
|
|
|
# perform linear interpolation
|
|
f = interp1d(nz_frames, f0[nz_frames])
|
|
cont_f0 = f(np.arange(0, f0.shape[0]))
|
|
|
|
return uv, cont_f0
|
|
|
|
|
|
def get_cont_lf0(f0, frame_period=10.0, lpf=False):
|
|
uv, cont_f0 = convert_continuos_f0(f0)
|
|
if lpf:
|
|
cont_f0_lpf = low_pass_filter(cont_f0, int(1.0 / (frame_period * 0.001)), cutoff=20)
|
|
cont_lf0_lpf = cont_f0_lpf.copy()
|
|
nonzero_indices = np.nonzero(cont_lf0_lpf)
|
|
cont_lf0_lpf[nonzero_indices] = np.log(cont_f0_lpf[nonzero_indices])
|
|
# cont_lf0_lpf = np.log(cont_f0_lpf)
|
|
return uv, cont_lf0_lpf
|
|
else:
|
|
nonzero_indices = np.nonzero(cont_f0)
|
|
cont_lf0 = cont_f0.copy()
|
|
cont_lf0[cont_f0>0] = np.log(cont_f0[cont_f0>0])
|
|
return uv, cont_lf0
|