MockingBird/vits.ipynb

409 lines
238 KiB
Plaintext
Raw Normal View History

2023-02-04 14:13:38 +08:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'log_interval': 2000, 'eval_interval': 4000, 'seed': 1234, 'epochs': 10000, 'learning_rate': 0.0001, 'betas': [0.8, 0.99], 'eps': 1e-09, 'batch_size': 16, 'fp16_run': True, 'lr_decay': 0.5, 'segment_size': 8192, 'init_lr_ratio': 1, 'warmup_epochs': 0, 'c_mel': 45, 'c_kl': 1.0}\n",
"Trainable Parameters: 0.000M\n"
]
}
],
"source": [
"from utils.hparams import load_hparams_json\n",
"from utils.util import intersperse\n",
"import json\n",
"from models.synthesizer.models.vits import Vits\n",
"import torch\n",
"import numpy as np\n",
"import IPython.display as ipd\n",
"\n",
"# chinese_cleaners\n",
"_pad = '_'\n",
"_punctuation = ',。!?—…'\n",
"_letters = 'ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩˉˊˇˋ˙ '\n",
"# Export all symbols:\n",
"symbols = [_pad] + list(_punctuation) + list(_letters)\n",
"\n",
"hps = load_hparams_json(\"data/ckpt/synthesizer/vits/config.json\")\n",
"print(hps.train)\n",
"model = Vits(\n",
" len(symbols),\n",
" hps[\"data\"][\"filter_length\"] // 2 + 1,\n",
" hps[\"train\"][\"segment_size\"] // hps[\"data\"][\"hop_length\"],\n",
" n_speakers=hps[\"data\"][\"n_speakers\"],\n",
" stop_threshold=0.5,\n",
" **hps[\"model\"])\n",
"_ = model.eval()\n",
"device = torch.device(\"cpu\")\n",
"model.load(\"data/ckpt/synthesizer/vits/G_208000.pth\", device)\n",
"\n",
"# 随机抽取情感参考音频的根目录\n",
"random_emotion_root = \"D:\\\\audiodata\\\\aidatatang_200zh\\\\corpus\\\\train\\\\G0017\"\n",
"import random, re\n",
"# import cn2an # remove dependency before production\n",
"from pypinyin import lazy_pinyin, BOPOMOFO\n",
"\n",
"_symbol_to_id = {s: i for i, s in enumerate(symbols)}\n",
"\n",
"# def number_to_chinese(text):\n",
"# numbers = re.findall(r'\\d+(?:\\.?\\d+)?', text)\n",
"# for number in numbers:\n",
"# text = text.replace(number, cn2an.an2cn(number), 1)\n",
"# return text\n",
"\n",
"def chinese_to_bopomofo(text, taiwanese=False):\n",
" text = text.replace('、', '').replace('', '').replace('', '')\n",
" for word in list(text):\n",
" bopomofos = lazy_pinyin(word, BOPOMOFO)\n",
" if not re.search('[\\u4e00-\\u9fff]', word):\n",
" text += word\n",
" continue\n",
" for i in range(len(bopomofos)):\n",
" bopomofos[i] = re.sub(r'([\\u3105-\\u3129])$', r'\\1ˉ', bopomofos[i])\n",
" if text != '':\n",
" text += ' '\n",
" if taiwanese:\n",
" text += '#'+'#'.join(bopomofos)\n",
" else:\n",
" text += ''.join(bopomofos)\n",
" return text\n",
"\n",
"_latin_to_bopomofo = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [\n",
" ('a', 'ㄟˉ'),\n",
" ('b', 'ㄅㄧˋ'),\n",
" ('c', 'ㄙㄧˉ'),\n",
" ('d', 'ㄉㄧˋ'),\n",
" ('e', 'ㄧˋ'),\n",
" ('f', 'ㄝˊㄈㄨˋ'),\n",
" ('g', 'ㄐㄧˋ'),\n",
" ('h', 'ㄝˇㄑㄩˋ'),\n",
" ('i', 'ㄞˋ'),\n",
" ('j', 'ㄐㄟˋ'),\n",
" ('k', 'ㄎㄟˋ'),\n",
" ('l', 'ㄝˊㄛˋ'),\n",
" ('m', 'ㄝˊㄇㄨˋ'),\n",
" ('n', 'ㄣˉ'),\n",
" ('o', 'ㄡˉ'),\n",
" ('p', 'ㄆㄧˉ'),\n",
" ('q', 'ㄎㄧㄡˉ'),\n",
" ('r', 'ㄚˋ'),\n",
" ('s', 'ㄝˊㄙˋ'),\n",
" ('t', 'ㄊㄧˋ'),\n",
" ('u', 'ㄧㄡˉ'),\n",
" ('v', 'ㄨㄧˉ'),\n",
" ('w', 'ㄉㄚˋㄅㄨˋㄌㄧㄡˋ'),\n",
" ('x', 'ㄝˉㄎㄨˋㄙˋ'),\n",
" ('y', 'ㄨㄞˋ'),\n",
" ('z', 'ㄗㄟˋ')\n",
"]]\n",
"\n",
"def latin_to_bopomofo(text):\n",
" for regex, replacement in _latin_to_bopomofo:\n",
" text = re.sub(regex, replacement, text)\n",
" return text\n",
"\n",
"#TODO: add cleaner to support multilang\n",
"def chinese_cleaners(text, cleaner_names):\n",
" '''Pipeline for Chinese text'''\n",
" # text = number_to_chinese(text)\n",
" text = chinese_to_bopomofo(text)\n",
" text = latin_to_bopomofo(text)\n",
" if re.match('[ˉˊˇˋ˙]', text[-1]):\n",
" text += '。'\n",
" return text\n",
"\n",
"\n",
"def text_to_sequence(text, cleaner_names):\n",
" '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.\n",
" Args:\n",
" text: string to convert to a sequence\n",
" cleaner_names: names of the cleaner functions to run the text through\n",
" Returns:\n",
" List of integers corresponding to the symbols in the text\n",
" '''\n",
" sequence = []\n",
"\n",
" clean_text = chinese_cleaners(text, cleaner_names)\n",
" for symbol in clean_text:\n",
" if symbol not in _symbol_to_id.keys():\n",
" continue\n",
" symbol_id = _symbol_to_id[symbol]\n",
" sequence += [symbol_id]\n",
" return sequence\n",
"\n",
"import os\n",
"\n",
"def tts(txt, emotion, sid=0):\n",
" text_norm = text_to_sequence(txt, hps[\"data\"][\"text_cleaners\"])\n",
" if hps[\"data\"][\"add_blank\"]:\n",
" text_norm = intersperse(text_norm, 0)\n",
" stn_tst = torch.LongTensor(text_norm)\n",
"\n",
" with torch.no_grad(): #inference mode\n",
" x_tst = stn_tst.unsqueeze(0)\n",
" x_tst_lengths = torch.LongTensor([stn_tst.size(0)])\n",
" sid = torch.LongTensor([sid])\n",
" if emotion.endswith(\"wav\"):\n",
" from models.synthesizer.preprocess_audio import extract_emo\n",
" import librosa\n",
" wav, sr = librosa.load(emotion, 16000)\n",
" emo = torch.FloatTensor(extract_emo(np.expand_dims(wav, 0), sr, embeddings=True))\n",
" else:\n",
" print(\"emotion参数不正确\")\n",
"\n",
" audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=0.667, noise_scale_w=0.8, length_scale=1, emo=emo)[0][0,0].data.float().numpy()\n",
" ipd.display(ipd.Audio(audio, rate=hps[\"data\"][\"sampling_rate\"], normalize=False))\n",
"\n",
"\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"推理:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
]
},
{
"data": {
"text/html": [
"\n",
" <audio controls=\"controls\" >\n",
" <source src=\"data:audio/wav;base64,UklGRiSUAgBXQVZFZm10IBAAAAABAAEAgD4AAAB9AAACABAAZGF0YQCUAgCtAHsAjAB+AEcAawCAAI0AhgBhAEkASAAqAP3////V/6L/l/+0/8f/5P/z/wMAJwAaAAQACQAWACIAGgAeABUA6P/W/6v/l/9s/3//pP+n/77/x//K/9L/w/+Y/5b/nP+E/1b/SP9Q/1r/iP+b/6b/j/9f/3//pf+j/5r/rP/E/9D/xP/k/+//3f/6/9z/5v/4/+//3//e//b/+P/w////KgA6AEUAOQBDAE8AUQByAHIAWQBrAIkATwBgAGMAWABoAIEAcABuAI0AhQCVAIAARQBmAF4APwAsAAAA8P/J/7j/sf++/9z/7P/p/wkAEgAPAPb/8/8AAAUABgANACwAKgAeACIA/f/s//j/AAAEAPX/8/8CAPj/4v8RAAYANgBZAEMAXwBkADwAIwAXAA4A+v/n/8b/qv/H/8b/uf/I/+n/7f/i/8//xv/I/8r/zP/l/8P/ZP+N/6H/tf/6/w4ANQAvACQAMAAYADEAFgDt/wgAAQDw/wAA/v/R/9j/vP+4/+D/9/8AABQAFgAAAAMA3P/p/9b/sv+e/5f/n/+8/7P/j/+D/4b/cf+I/5j/if+l/53/m/+v/6n/vf/O/+7/4v/1/+D/xP+7/7//BQARAEsAeQBTAEQAOgAPABIAJQA8AFEAXgB/AIAANAArADoAGQA/AEAAPQA3ADYAIwD0/woAHQAPACIAKQBTAFkAQQBIACwAIAA6ABgAEQA9ACwARwAkAAwAHgDj/+H/7f+Z/4z/u//D/8X/vP/a/+z/7//a/7n/z/+x/7X/0f/H/77/qf+2/8n/3/+6/9j/GwAiADcAWwBVAGIAQgAnAF0AcwB1AIkAggA8AEYAPwD7/9n/xv++/7r/wv/C/6//lv+d/6r/tv/M/8z/z//d/+P/8f8GABcA9v/1/yAAFgAGAA0AIQAdAB0A/P/f/9D/l/+N/5L/gf+b/53/g/+U/6T/kf+V/5X/uP+7/9D////6/xgAIAAxAEAALAA4ACYA8//3/wMADAAuAEkARgA4ACEADwDY/8z/7f8JAAIANQA7AB0ABAD3/wQA8P8DACEAGgAwACAA7//y/+7/yP/T/8z/qf+z/8X/2v/i/9X/vv/S/9j/0P/d/+7/8P/X/8L/pv+O/33/eP93/37/ov+j/5//s/+l/73/3f/f/xAAKgAnADoANAAuAE8ARQA2AE0ATQBOAHAAbgBYAE4ARwA+AFgAXQBfAGkATwBTAC8AEgAAAAEAGABRAGUAUQBgAF4AKwAvACEAKgAPAPz///8KADYAAgDH/63/1P/b/87/5v/0//3/BwD0/7P/wv/P/7n/z/+//9//3P/P/7//rP+w/4n/pf+O/6H/pv+4/73/oP+r/5P/rP/C/8r/3P/X/9n/5f/x/9f/3v/j//X///8gAFgAVgBbAD8AHgAPACYAKABGAFcASwBMAPr/0//J/7X/qf+d/7j/2v/t/wAA/P/i/9f/1P+y/7P/n/+c/6r/tP/R/+3//v8WAEIAUwBjAKMArQCeAJkAiQBqAEMALwAyACQACwAuAB4AFwBJADMAHQAYAPr/4v/7/+//tP+5/8b/rf+n/7//t//y/0QAXwB+AFwAPwAmAD8AGgANACQA+v8RAPP/+f8qAOr/MQAmAAQAEwDn/+//8//Y/9z/y/+1/9r/1//K//T/+f8gAAYA3P/J/6b/rP+d/7L/tv/D/67/vv+u/7D/wP/T/87/yv/Q/6D/uf/h/9z/4v/d/8f/3v/Y/7j/6v/q/87/AAD+/wUABAD4/xMA5P+4/8H/sP+x/5z/nf+t/7n/z//K/67/vv+y/6H/nf/I/+T/5v/r/9f/BwDt/8v/wf+V/6f/yv8EADAAVwBMAA0ABwAYABsAQABJAG4ATwA3ABgA/f8MAAcALAAyACAAKQA/ACIARwBkAHcAoQCpAJAAhACNAIYAZABLAEsAPAAFABkAFwD8//X/5//y/+L/6f/U/77/x/+x/7//1v/l//v/AAAAABwABgD0/xUABwAAAP//DQAmAFAAUQBKADoAJQAwAB4AHAAjACoATQBHADYAIwAoAB4ACADd/9P/8f/z////7/8KABIADQAqAC8AHAD8/9n/vP/H/7r/vf+3/7H/y//H//f/CADc/zcARAApAAQAAAD1/93/1//H/6v/s//X/8r/2v/k/9L/wf+u/47/fv+D/4P/kf+U/7L/vv+l/4n/c/9q/13/ev+Z/4b/tv/u/8//2v/2//X/IgBEAEsAcABjAFoAVQBGAEUAIgAsAEoAQgAvADwAGwAMAAsA3//Q/+b/DwD+//D/+v8AAAAA7f/x//D/7P/r/9P/7v/0/97/BQDz/wAAKAA9AFAAUwBNADwAJwAdAAwA9P/R/+r/7v/h/8b/xv/D/7z/r/+N/23/h/+z/93/8v/h/+P/1f/c//T/6P/s/yIASABkAIYAgwCRAIoAdQCBAJMAdQBMAGkASAACAC4ANgAqAAAA/v8CANf/7//d/9P/8f/V/7n/uf+C/3P/kP+G/8z//v/4/+r/1P/X/8T/3f8RADcAUQBRAGMATQA2APr/x//D/8b/1v+l/4b/sv8CADIAOABYAF8ATABXACgA7//Z/7b/rv+w/8L/yP+7/7n/sv+4/5j/cf+Q/37/X/9e/4b/pf+3/7r/2/8SAAwADgA6AEAAZABlAIYAnQCQAGkAaQBWAEoAXwBOAEUAYAByAGMAZwBKAE4AWABIAEoAPwAWABwACADn/wgA/f/s/+f/4P/8/wcAHAADAOb/AADv//j/yP++/7L/nf++/6L/m/+x/53/mf+0/6//vP+u/6z/mv+0/87/yP/j/9f/+P8LAAAAEgALAOz/3P/k/83/yP8OAB8AMgAyAC4AMgAYAAYAGwAyAAcA/v/I/77/sf+j/9//s/+z/6f/p/+2/6P/t//a/+z/6//o//r/8P/0/yQAEwAsADgABAAVADUALABDAF8AXQBWAFYAXABMADIAJgA3AEMAJQBGAD0ANQAkAPT/1P/p/wIA6P8RAD4ASwA8AFkARwBPAGIANgAwAD0ANwAyACkAFQDa/8T/0v/s//r/5//b/7b/aP9G/0P/SP9u/5X/mv+7/4//mP/E/8P/DgASAC4AYwBJAD0ALAD5/xMAQgAqABkA+f+6/5r/Zf99/3//cv+o/6H/hv9y/3L/Xf9j/3b/f/+Z/8n/4v8IAD0ASAA6ADgAUQBdACoAJAAsADYAGQAOAAgA1//R/9X/6//8/xQAIQAnADYAOQBAACgAQwBFADkAMQAoACgARgByAHgAmwDQAL4AmwC2AHgAZABkAFcAcAA1AAQACADm/9j/wv+t/5X/e/9B/yr/X/9g/5b/lf9o/7L/t/+j/8T/x//f/wAADQAgAA0A7P8JAAcA6//h/wUAAwAFACgADQD6/+z/0v+8/7b/rf+X/7//yf+k/7P/m/+q/83/u//I/+r/8v/s/wsAGAAcABkAFAAlADwAhQC5AKYArQBuAE8ARwAWABIA3P+f/5P/Uf9U/13/bP+c/2v/aP91/2f/T/9N/2v/dv+I/6P/1f/e/+f//v8WAEAAWwCIAKkArACxAJIAZABtAEcALABSACYAGAAMANj/4f/f/9D/wf+h/53/yf/r/wgALQBbAJkAbwBsAGAAUgBrAGIAXgBUAEsAOwA7ADoADgAjAAgAGgAFANf/1/+3/5j/j/+Z/4T/f/9y/4v/kP+0/8j/9v83AC4AJABHAEMARwA6AF0AYgAgAC8AJwDm/9v/4//h/9T/2//F/8n/CQAnACQANAAzACgATwAoAAwAJAA1AFAAUABPAEQAOgA8AEsAMgAhADsANgAYAPr/6v+8/6L/mf93/3H/hv+b/6//vf/q//z/CQDp/+H/0P+b/7P/vv/C/8f/v//A/57/hP+f/8P////x/8b/vf+Z/5n/nv+2/8P/0v/g/8f/uv/k/8r/zf+9/6f/vv/N/9H/AQAvABc
" Your browser does not support the audio element.\n",
" </audio>\n",
" "
],
"text/plain": [
"<IPython.lib.display.Audio object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"txt = \"随机抽取的音频文件路径可以用于使用该情感合成其他句子\"\n",
"tts(txt, emotion='C:\\\\Users\\\\babys\\\\Desktop\\\\voicecollection\\\\secondround\\\\美玉.wav', sid=0)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"预处理:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using data from:\n",
" ..\\audiodata\\magicdata\\train\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"magicdata: 0%| | 0/1018 [00:00<?, ?speakers/s]"
]
},
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
]
}
],
"source": [
"from models.synthesizer.preprocess import preprocess_dataset\n",
"from pathlib import Path\n",
"from utils.hparams import HParams\n",
"datasets_root = Path(\"../audiodata/\")\n",
"hparams=HParams(\n",
" sample_rate = 16000,\n",
" rescale = True,\n",
" max_mel_frames = 900,\n",
" rescaling_max = 0.9,\n",
"\n",
" utterance_min_duration = 1.6, # Duration in seconds below which utterances are discarded\n",
" ### Audio processing options\n",
" fmax = 7600, # Should not exceed (sample_rate // 2)\n",
" allow_clipping_in_normalization = True, # Used when signal_normalization = True\n",
" clip_mels_length = True, # If true, discards samples exceeding max_mel_frames\n",
" use_lws = False, # \"Fast spectrogram phase recovery using local weighted sums\"\n",
" symmetric_mels = True, # Sets mel range to [-max_abs_value, max_abs_value] if True,\n",
" # and [0, max_abs_value] if False\n",
" trim_silence = True, # Use with sample_rate of 16000 for best results\n",
"\n",
")\n",
"preprocess_dataset(datasets_root=datasets_root, \n",
" out_dir=datasets_root.joinpath(\"SV2TTS\", \"synthesizer\"),\n",
" n_processes=8,\n",
" skip_existing=True, \n",
" hparams=hparams, \n",
" no_alignments=False, \n",
" dataset=\"magicdata\", \n",
" emotion_extract=True)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"训练:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\\Loading the json with %s\n",
" data\\ckpt\\synthesizer\\vits\\config.json\n"
]
},
{
"ename": "ProcessRaisedException",
"evalue": "\n\n-- Process 0 terminated with the following error:\nTraceback (most recent call last):\n File \"d:\\Users\\babys\\Anaconda3\\envs\\mo\\lib\\site-packages\\torch\\multiprocessing\\spawn.py\", line 59, in _wrap\n fn(i, *args)\n File \"d:\\Real-Time-Voice-Cloning-Chinese\\models\\synthesizer\\train_vits.py\", line 123, in run\n net_g = Vits(\nTypeError: __init__() missing 1 required positional argument: 'stop_threshold'\n",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mProcessRaisedException\u001b[0m Traceback (most recent call last)",
"\u001b[1;32md:\\Real-Time-Voice-Cloning-Chinese\\vits.ipynb Cell 7\u001b[0m in \u001b[0;36m<cell line: 20>\u001b[1;34m()\u001b[0m\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/Real-Time-Voice-Cloning-Chinese/vits.ipynb#W6sZmlsZQ%3D%3D?line=17'>18</a>\u001b[0m os\u001b[39m.\u001b[39menviron[\u001b[39m'\u001b[39m\u001b[39mMASTER_ADDR\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39mlocalhost\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/Real-Time-Voice-Cloning-Chinese/vits.ipynb#W6sZmlsZQ%3D%3D?line=18'>19</a>\u001b[0m os\u001b[39m.\u001b[39menviron[\u001b[39m'\u001b[39m\u001b[39mMASTER_PORT\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m8899\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m---> <a href='vscode-notebook-cell:/d%3A/Real-Time-Voice-Cloning-Chinese/vits.ipynb#W6sZmlsZQ%3D%3D?line=19'>20</a>\u001b[0m mp\u001b[39m.\u001b[39;49mspawn(run, nprocs\u001b[39m=\u001b[39;49mn_gpus, args\u001b[39m=\u001b[39;49m(n_gpus, hparams))\n",
"File \u001b[1;32md:\\Users\\babys\\Anaconda3\\envs\\mo\\lib\\site-packages\\torch\\multiprocessing\\spawn.py:230\u001b[0m, in \u001b[0;36mspawn\u001b[1;34m(fn, args, nprocs, join, daemon, start_method)\u001b[0m\n\u001b[0;32m 226\u001b[0m msg \u001b[39m=\u001b[39m (\u001b[39m'\u001b[39m\u001b[39mThis method only supports start_method=spawn (got: \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m).\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m\n\u001b[0;32m 227\u001b[0m \u001b[39m'\u001b[39m\u001b[39mTo use a different start_method use:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m'\u001b[39m\n\u001b[0;32m 228\u001b[0m \u001b[39m'\u001b[39m\u001b[39m torch.multiprocessing.start_processes(...)\u001b[39m\u001b[39m'\u001b[39m \u001b[39m%\u001b[39m start_method)\n\u001b[0;32m 229\u001b[0m warnings\u001b[39m.\u001b[39mwarn(msg)\n\u001b[1;32m--> 230\u001b[0m \u001b[39mreturn\u001b[39;00m start_processes(fn, args, nprocs, join, daemon, start_method\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mspawn\u001b[39;49m\u001b[39m'\u001b[39;49m)\n",
"File \u001b[1;32md:\\Users\\babys\\Anaconda3\\envs\\mo\\lib\\site-packages\\torch\\multiprocessing\\spawn.py:188\u001b[0m, in \u001b[0;36mstart_processes\u001b[1;34m(fn, args, nprocs, join, daemon, start_method)\u001b[0m\n\u001b[0;32m 185\u001b[0m \u001b[39mreturn\u001b[39;00m context\n\u001b[0;32m 187\u001b[0m \u001b[39m# Loop on join until it returns True or raises an exception.\u001b[39;00m\n\u001b[1;32m--> 188\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mnot\u001b[39;00m context\u001b[39m.\u001b[39;49mjoin():\n\u001b[0;32m 189\u001b[0m \u001b[39mpass\u001b[39;00m\n",
"File \u001b[1;32md:\\Users\\babys\\Anaconda3\\envs\\mo\\lib\\site-packages\\torch\\multiprocessing\\spawn.py:150\u001b[0m, in \u001b[0;36mProcessContext.join\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 148\u001b[0m msg \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m-- Process \u001b[39m\u001b[39m%d\u001b[39;00m\u001b[39m terminated with the following error:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m error_index\n\u001b[0;32m 149\u001b[0m msg \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m original_trace\n\u001b[1;32m--> 150\u001b[0m \u001b[39mraise\u001b[39;00m ProcessRaisedException(msg, error_index, failed_process\u001b[39m.\u001b[39mpid)\n",
"\u001b[1;31mProcessRaisedException\u001b[0m: \n\n-- Process 0 terminated with the following error:\nTraceback (most recent call last):\n File \"d:\\Users\\babys\\Anaconda3\\envs\\mo\\lib\\site-packages\\torch\\multiprocessing\\spawn.py\", line 59, in _wrap\n fn(i, *args)\n File \"d:\\Real-Time-Voice-Cloning-Chinese\\models\\synthesizer\\train_vits.py\", line 123, in run\n net_g = Vits(\nTypeError: __init__() missing 1 required positional argument: 'stop_threshold'\n"
]
},
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
]
}
],
"source": [
"from models.synthesizer.train_vits import run\n",
"from pathlib import Path\n",
"from utils.hparams import HParams\n",
"import torch, os\n",
"import torch.multiprocessing as mp\n",
"\n",
"datasets_root = Path(\"../audiodata/SV2TTS/synthesizer\")\n",
"hparams= HParams(\n",
" model_dir = \"data/ckpt/synthesizer/vits\",\n",
")\n",
"hparams.loadJson(Path(hparams.model_dir).joinpath(\"config.json\"))\n",
"hparams.data[\"training_files\"] = str(datasets_root.joinpath(\"train.txt\"))\n",
"hparams.data[\"validation_files\"] = str(datasets_root.joinpath(\"train.txt\"))\n",
"hparams.data[\"datasets_root\"] = str(datasets_root)\n",
"\n",
"n_gpus = torch.cuda.device_count()\n",
"# for spawn\n",
"os.environ['MASTER_ADDR'] = 'localhost'\n",
"os.environ['MASTER_PORT'] = '8899'\n",
"mp.spawn(run, nprocs=n_gpus, args=(n_gpus, hparams))"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"挑选只有对应emo文件的meta数据"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import os\n",
"root = Path('../audiodata/SV2TTS/synthesizer')\n",
"dict_info = []\n",
"with open(root.joinpath(\"train.txt\"), \"r\", encoding=\"utf-8\") as dict_meta:\n",
" for raw in dict_meta:\n",
" if not raw:\n",
" continue\n",
" v = raw.split(\"|\")[0].replace(\"audio\",\"emo\")\n",
" emo_fpath = root.joinpath(\"emo\").joinpath(v)\n",
" if emo_fpath.exists():\n",
" dict_info.append(raw)\n",
" # else:\n",
" # print(emo_fpath)\n",
"# Iterate over each wav\n",
"meta2 = Path('../audiodata/SV2TTS/synthesizer/train2.txt')\n",
"metadata_file = meta2.open(\"w\", encoding=\"utf-8\")\n",
"for new_info in dict_info:\n",
" metadata_file.write(new_info)\n",
"metadata_file.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "mo",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"vscode": {
"interpreter": {
"hash": "788ab866da3baa6c99886d56abb59fe71b6a552bf52c65473ecf96c784704db8"
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}