MockingBird/vits.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'log_interval': 2000, 'eval_interval': 4000, 'seed': 1234, 'epochs': 10000, 'learning_rate': 0.0001, 'betas': [0.8, 0.99], 'eps': 1e-09, 'batch_size': 16, 'fp16_run': True, 'lr_decay': 0.5, 'segment_size': 8192, 'init_lr_ratio': 1, 'warmup_epochs': 0, 'c_mel': 45, 'c_kl': 1.0}\n",
      "Trainable Parameters: 0.000M\n"
     ]
    }
   ],
   "source": [
    "from utils.hparams import load_hparams_json\n",
    "from utils.util import intersperse\n",
    "import json\n",
    "from models.synthesizer.models.vits import Vits\n",
    "import torch\n",
    "import numpy as np\n",
    "import IPython.display as ipd\n",
    "\n",
    "# chinese_cleaners\n",
    "_pad        = '_'\n",
    "_punctuation = '，。！？—…'\n",
    "_letters = 'ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩˉˊˇˋ˙ '\n",
    "# Export all symbols:\n",
    "symbols = [_pad] + list(_punctuation) + list(_letters)\n",
    "\n",
    "hps = load_hparams_json(\"data/ckpt/synthesizer/vits/config.json\")\n",
    "print(hps.train)\n",
    "model = Vits(\n",
    "    len(symbols),\n",
    "    hps[\"data\"][\"filter_length\"] // 2 + 1,\n",
    "    hps[\"train\"][\"segment_size\"] // hps[\"data\"][\"hop_length\"],\n",
    "    n_speakers=hps[\"data\"][\"n_speakers\"],\n",
    "    stop_threshold=0.5,\n",
    "    **hps[\"model\"])\n",
    "_ = model.eval()\n",
    "device = torch.device(\"cpu\")\n",
    "model.load(\"data/ckpt/synthesizer/vits/G_208000.pth\", device)\n",
    "\n",
    "# 随机抽取情感参考音频的根目录\n",
    "random_emotion_root = \"D:\\\\audiodata\\\\aidatatang_200zh\\\\corpus\\\\train\\\\G0017\"\n",
    "import random, re\n",
    "# import cn2an # remove dependency before production\n",
    "from pypinyin import lazy_pinyin, BOPOMOFO\n",
    "\n",
    "_symbol_to_id = {s: i for i, s in enumerate(symbols)}\n",
    "\n",
    "# def number_to_chinese(text):\n",
    "#     numbers = re.findall(r'\\d+(?:\\.?\\d+)?', text)\n",
    "#     for number in numbers:\n",
    "#         text = text.replace(number, cn2an.an2cn(number), 1)\n",
    "#     return text\n",
    "\n",
    "def chinese_to_bopomofo(text, taiwanese=False):\n",
    "    text = text.replace('、', '，').replace('；', '，').replace('：', '，')\n",
    "    for word in list(text):\n",
    "        bopomofos = lazy_pinyin(word, BOPOMOFO)\n",
    "        if not re.search('[\\u4e00-\\u9fff]', word):\n",
    "            text += word\n",
    "            continue\n",
    "        for i in range(len(bopomofos)):\n",
    "            bopomofos[i] = re.sub(r'([\\u3105-\\u3129])$', r'\\1ˉ', bopomofos[i])\n",
    "        if text != '':\n",
    "            text += ' '\n",
    "        if taiwanese:\n",
    "            text += '#'+'#'.join(bopomofos)\n",
    "        else:\n",
    "            text += ''.join(bopomofos)\n",
    "    return text\n",
    "\n",
    "_latin_to_bopomofo = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [\n",
    "    ('a', 'ㄟˉ'),\n",
    "    ('b', 'ㄅㄧˋ'),\n",
    "    ('c', 'ㄙㄧˉ'),\n",
    "    ('d', 'ㄉㄧˋ'),\n",
    "    ('e', 'ㄧˋ'),\n",
    "    ('f', 'ㄝˊㄈㄨˋ'),\n",
    "    ('g', 'ㄐㄧˋ'),\n",
    "    ('h', 'ㄝˇㄑㄩˋ'),\n",
    "    ('i', 'ㄞˋ'),\n",
    "    ('j', 'ㄐㄟˋ'),\n",
    "    ('k', 'ㄎㄟˋ'),\n",
    "    ('l', 'ㄝˊㄛˋ'),\n",
    "    ('m', 'ㄝˊㄇㄨˋ'),\n",
    "    ('n', 'ㄣˉ'),\n",
    "    ('o', 'ㄡˉ'),\n",
    "    ('p', 'ㄆㄧˉ'),\n",
    "    ('q', 'ㄎㄧㄡˉ'),\n",
    "    ('r', 'ㄚˋ'),\n",
    "    ('s', 'ㄝˊㄙˋ'),\n",
    "    ('t', 'ㄊㄧˋ'),\n",
    "    ('u', 'ㄧㄡˉ'),\n",
    "    ('v', 'ㄨㄧˉ'),\n",
    "    ('w', 'ㄉㄚˋㄅㄨˋㄌㄧㄡˋ'),\n",
    "    ('x', 'ㄝˉㄎㄨˋㄙˋ'),\n",
    "    ('y', 'ㄨㄞˋ'),\n",
    "    ('z', 'ㄗㄟˋ')\n",
    "]]\n",
    "\n",
    "def latin_to_bopomofo(text):\n",
    "    for regex, replacement in _latin_to_bopomofo:\n",
    "        text = re.sub(regex, replacement, text)\n",
    "    return text\n",
    "\n",
    "#TODO: add cleaner to support multilang\n",
    "def chinese_cleaners(text, cleaner_names):\n",
    "    '''Pipeline for Chinese text'''\n",
    "    # text = number_to_chinese(text)\n",
    "    text = chinese_to_bopomofo(text)\n",
    "    text = latin_to_bopomofo(text)\n",
    "    if re.match('[ˉˊˇˋ˙]', text[-1]):\n",
    "        text += '。'\n",
    "    return text\n",
    "\n",
    "\n",
    "def text_to_sequence(text, cleaner_names):\n",
    "  '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.\n",
    "    Args:\n",
    "      text: string to convert to a sequence\n",
    "      cleaner_names: names of the cleaner functions to run the text through\n",
    "    Returns:\n",
    "      List of integers corresponding to the symbols in the text\n",
    "  '''\n",
    "  sequence = []\n",
    "\n",
    "  clean_text = chinese_cleaners(text, cleaner_names)\n",
    "  for symbol in clean_text:\n",
    "    if symbol not in _symbol_to_id.keys():\n",
    "      continue\n",
    "    symbol_id = _symbol_to_id[symbol]\n",
    "    sequence += [symbol_id]\n",
    "  return sequence\n",
    "\n",
    "import os\n",
    "\n",
    "def tts(txt, emotion, sid=0):\n",
    "    text_norm = text_to_sequence(txt, hps[\"data\"][\"text_cleaners\"])\n",
    "    if hps[\"data\"][\"add_blank\"]:\n",
    "        text_norm = intersperse(text_norm, 0)\n",
    "    stn_tst = torch.LongTensor(text_norm)\n",
    "\n",
    "    with torch.no_grad(): #inference mode\n",
    "        x_tst = stn_tst.unsqueeze(0)\n",
    "        x_tst_lengths = torch.LongTensor([stn_tst.size(0)])\n",
    "        sid = torch.LongTensor([sid])\n",
    "        if emotion.endswith(\"wav\"):\n",
    "            from models.synthesizer.preprocess_audio import extract_emo\n",
    "            import librosa\n",
    "            wav, sr = librosa.load(emotion, 16000)\n",
    "            emo = torch.FloatTensor(extract_emo(np.expand_dims(wav, 0), sr, embeddings=True))\n",
    "        else:\n",
    "            print(\"emotion参数不正确\")\n",
    "\n",
    "        audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=0.667, noise_scale_w=0.8, length_scale=1, emo=emo)[0][0,0].data.float().numpy()\n",
    "    ipd.display(ipd.Audio(audio, rate=hps[\"data\"][\"sampling_rate\"], normalize=False))\n",
    "\n",
    "\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "推理："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "                <audio  controls=\"controls\" >\n",
       "                    <source src=\"data:audio/wav;base64,UklGRiSUAgBXQVZFZm10IBAAAAABAAEAgD4AAAB9AAACABAAZGF0YQCUAgCtAHsAjAB+AEcAawCAAI0AhgBhAEkASAAqAP3////V/6L/l/+0/8f/5P/z/wMAJwAaAAQACQAWACIAGgAeABUA6P/W/6v/l/9s/3//pP+n/77/x//K/9L/w/+Y/5b/nP+E/1b/SP9Q/1r/iP+b/6b/j/9f/3//pf+j/5r/rP/E/9D/xP/k/+//3f/6/9z/5v/4/+//3//e//b/+P/w////KgA6AEUAOQBDAE8AUQByAHIAWQBrAIkATwBgAGMAWABoAIEAcABuAI0AhQCVAIAARQBmAF4APwAsAAAA8P/J/7j/sf++/9z/7P/p/wkAEgAPAPb/8/8AAAUABgANACwAKgAeACIA/f/s//j/AAAEAPX/8/8CAPj/4v8RAAYANgBZAEMAXwBkADwAIwAXAA4A+v/n/8b/qv/H/8b/uf/I/+n/7f/i/8//xv/I/8r/zP/l/8P/ZP+N/6H/tf/6/w4ANQAvACQAMAAYADEAFgDt/wgAAQDw/wAA/v/R/9j/vP+4/+D/9/8AABQAFgAAAAMA3P/p/9b/sv+e/5f/n/+8/7P/j/+D/4b/cf+I/5j/if+l/53/m/+v/6n/vf/O/+7/4v/1/+D/xP+7/7//BQARAEsAeQBTAEQAOgAPABIAJQA8AFEAXgB/AIAANAArADoAGQA/AEAAPQA3ADYAIwD0/woAHQAPACIAKQBTAFkAQQBIACwAIAA6ABgAEQA9ACwARwAkAAwAHgDj/+H/7f+Z/4z/u//D/8X/vP/a/+z/7//a/7n/z/+x/7X/0f/H/77/qf+2/8n/3/+6/9j/GwAiADcAWwBVAGIAQgAnAF0AcwB1AIkAggA8AEYAPwD7/9n/xv++/7r/wv/C/6//lv+d/6r/tv/M/8z/z//d/+P/8f8GABcA9v/1/yAAFgAGAA0AIQAdAB0A/P/f/9D/l/+N/5L/gf+b/53/g/+U/6T/kf+V/5X/uP+7/9D////6/xgAIAAxAEAALAA4ACYA8//3/wMADAAuAEkARgA4ACEADwDY/8z/7f8JAAIANQA7AB0ABAD3/wQA8P8DACEAGgAwACAA7//y/+7/yP/T/8z/qf+z/8X/2v/i/9X/vv/S/9j/0P/d/+7/8P/X/8L/pv+O/33/eP93/37/ov+j/5//s/+l/73/3f/f/xAAKgAnADoANAAuAE8ARQA2AE0ATQBOAHAAbgBYAE4ARwA+AFgAXQBfAGkATwBTAC8AEgAAAAEAGABRAGUAUQBgAF4AKwAvACEAKgAPAPz///8KADYAAgDH/63/1P/b/87/5v/0//3/BwD0/7P/wv/P/7n/z/+//9//3P/P/7//rP+w/4n/pf+O/6H/pv+4/73/oP+r/5P/rP/C/8r/3P/X/9n/5f/x/9f/3v/j//X///8gAFgAVgBbAD8AHgAPACYAKABGAFcASwBMAPr/0//J/7X/qf+d/7j/2v/t/wAA/P/i/9f/1P+y/7P/n/+c/6r/tP/R/+3//v8WAEIAUwBjAKMArQCeAJkAiQBqAEMALwAyACQACwAuAB4AFwBJADMAHQAYAPr/4v/7/+//tP+5/8b/rf+n/7//t//y/0QAXwB+AFwAPwAmAD8AGgANACQA+v8RAPP/+f8qAOr/MQAmAAQAEwDn/+//8//Y/9z/y/+1/9r/1//K//T/+f8gAAYA3P/J/6b/rP+d/7L/tv/D/67/vv+u/7D/wP/T/87/yv/Q/6D/uf/h/9z/4v/d/8f/3v/Y/7j/6v/q/87/AAD+/wUABAD4/xMA5P+4/8H/sP+x/5z/nf+t/7n/z//K/67/vv+y/6H/nf/I/+T/5v/r/9f/BwDt/8v/wf+V/6f/yv8EADAAVwBMAA0ABwAYABsAQABJAG4ATwA3ABgA/f8MAAcALAAyACAAKQA/ACIARwBkAHcAoQCpAJAAhACNAIYAZABLAEsAPAAFABkAFwD8//X/5//y/+L/6f/U/77/x/+x/7//1v/l//v/AAAAABwABgD0/xUABwAAAP//DQAmAFAAUQBKADoAJQAwAB4AHAAjACoATQBHADYAIwAoAB4ACADd/9P/8f/z////7/8KABIADQAqAC8AHAD8/9n/vP/H/7r/vf+3/7H/y//H//f/CADc/zcARAApAAQAAAD1/93/1//H/6v/s//X/8r/2v/k/9L/wf+u/47/fv+D/4P/kf+U/7L/vv+l/4n/c/9q/13/ev+Z/4b/tv/u/8//2v/2//X/IgBEAEsAcABjAFoAVQBGAEUAIgAsAEoAQgAvADwAGwAMAAsA3//Q/+b/DwD+//D/+v8AAAAA7f/x//D/7P/r/9P/7v/0/97/BQDz/wAAKAA9AFAAUwBNADwAJwAdAAwA9P/R/+r/7v/h/8b/xv/D/7z/r/+N/23/h/+z/93/8v/h/+P/1f/c//T/6P/s/yIASABkAIYAgwCRAIoAdQCBAJMAdQBMAGkASAACAC4ANgAqAAAA/v8CANf/7//d/9P/8f/V/7n/uf+C/3P/kP+G/8z//v/4/+r/1P/X/8T/3f8RADcAUQBRAGMATQA2APr/x//D/8b/1v+l/4b/sv8CADIAOABYAF8ATABXACgA7//Z/7b/rv+w/8L/yP+7/7n/sv+4/5j/cf+Q/37/X/9e/4b/pf+3/7r/2/8SAAwADgA6AEAAZABlAIYAnQCQAGkAaQBWAEoAXwBOAEUAYAByAGMAZwBKAE4AWABIAEoAPwAWABwACADn/wgA/f/s/+f/4P/8/wcAHAADAOb/AADv//j/yP++/7L/nf++/6L/m/+x/53/mf+0/6//vP+u/6z/mv+0/87/yP/j/9f/+P8LAAAAEgALAOz/3P/k/83/yP8OAB8AMgAyAC4AMgAYAAYAGwAyAAcA/v/I/77/sf+j/9//s/+z/6f/p/+2/6P/t//a/+z/6//o//r/8P/0/yQAEwAsADgABAAVADUALABDAF8AXQBWAFYAXABMADIAJgA3AEMAJQBGAD0ANQAkAPT/1P/p/wIA6P8RAD4ASwA8AFkARwBPAGIANgAwAD0ANwAyACkAFQDa/8T/0v/s//r/5//b/7b/aP9G/0P/SP9u/5X/mv+7/4//mP/E/8P/DgASAC4AYwBJAD0ALAD5/xMAQgAqABkA+f+6/5r/Zf99/3//cv+o/6H/hv9y/3L/Xf9j/3b/f/+Z/8n/4v8IAD0ASAA6ADgAUQBdACoAJAAsADYAGQAOAAgA1//R/9X/6//8/xQAIQAnADYAOQBAACgAQwBFADkAMQAoACgARgByAHgAmwDQAL4AmwC2AHgAZABkAFcAcAA1AAQACADm/9j/wv+t/5X/e/9B/yr/X/9g/5b/lf9o/7L/t/+j/8T/x//f/wAADQAgAA0A7P8JAAcA6//h/wUAAwAFACgADQD6/+z/0v+8/7b/rf+X/7//yf+k/7P/m/+q/83/u//I/+r/8v/s/wsAGAAcABkAFAAlADwAhQC5AKYArQBuAE8ARwAWABIA3P+f/5P/Uf9U/13/bP+c/2v/aP91/2f/T/9N/2v/dv+I/6P/1f/e/+f//v8WAEAAWwCIAKkArACxAJIAZABtAEcALABSACYAGAAMANj/4f/f/9D/wf+h/53/yf/r/wgALQBbAJkAbwBsAGAAUgBrAGIAXgBUAEsAOwA7ADoADgAjAAgAGgAFANf/1/+3/5j/j/+Z/4T/f/9y/4v/kP+0/8j/9v83AC4AJABHAEMARwA6AF0AYgAgAC8AJwDm/9v/4//h/9T/2//F/8n/CQAnACQANAAzACgATwAoAAwAJAA1AFAAUABPAEQAOgA8AEsAMgAhADsANgAYAPr/6v+8/6L/mf93/3H/hv+b/6//vf/q//z/CQDp/+H/0P+b/7P/vv/C/8f/v//A/57/hP+f/8P////x/8b/vf+Z/5n/nv+2/8P/0v/g/8f/uv/k/8r/zf+9/6f/vv/N/9H/AQAvABc
       "                    Your browser does not support the audio element.\n",
       "                </audio>\n",
       "              "
      ],
      "text/plain": [
       "<IPython.lib.display.Audio object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "txt = \"随机抽取的音频文件路径可以用于使用该情感合成其他句子\"\n",
    "tts(txt, emotion='C:\\\\Users\\\\babys\\\\Desktop\\\\voicecollection\\\\secondround\\\\美玉.wav', sid=0)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "预处理："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using data from:\n",
      "    ..\\audiodata\\magicdata\\train\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "magicdata:   0%|          | 0/1018 [00:00<?, ?speakers/s]"
     ]
    },
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
     ]
    }
   ],
   "source": [
    "from models.synthesizer.preprocess import preprocess_dataset\n",
    "from pathlib import Path\n",
    "from utils.hparams import HParams\n",
    "datasets_root = Path(\"../audiodata/\")\n",
    "hparams=HParams(\n",
    "        sample_rate = 16000,\n",
    "        rescale = True,\n",
    "        max_mel_frames = 900,\n",
    "        rescaling_max = 0.9,\n",
    "\n",
    "        utterance_min_duration = 1.6,               # Duration in seconds below which utterances are discarded\n",
    "        ### Audio processing options\n",
    "        fmax = 7600,                                # Should not exceed (sample_rate // 2)\n",
    "        allow_clipping_in_normalization = True,     # Used when signal_normalization = True\n",
    "        clip_mels_length = True,                    # If true, discards samples exceeding max_mel_frames\n",
    "        use_lws = False,                            # \"Fast spectrogram phase recovery using local weighted sums\"\n",
    "        symmetric_mels = True,                      # Sets mel range to [-max_abs_value, max_abs_value] if True,\n",
    "                                                    #               and [0, max_abs_value] if False\n",
    "        trim_silence = True,                        # Use with sample_rate of 16000 for best results\n",
    "\n",
    ")\n",
    "preprocess_dataset(datasets_root=datasets_root, \n",
    "        out_dir=datasets_root.joinpath(\"SV2TTS\", \"synthesizer\"),\n",
    "        n_processes=8,\n",
    "        skip_existing=True, \n",
    "        hparams=hparams, \n",
    "        no_alignments=False, \n",
    "        dataset=\"magicdata\", \n",
    "        emotion_extract=True)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "训练："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\Loading the json with %s\n",
      " data\\ckpt\\synthesizer\\vits\\config.json\n"
     ]
    },
    {
     "ename": "ProcessRaisedException",
     "evalue": "\n\n-- Process 0 terminated with the following error:\nTraceback (most recent call last):\n  File \"d:\\Users\\babys\\Anaconda3\\envs\\mo\\lib\\site-packages\\torch\\multiprocessing\\spawn.py\", line 59, in _wrap\n    fn(i, *args)\n  File \"d:\\Real-Time-Voice-Cloning-Chinese\\models\\synthesizer\\train_vits.py\", line 123, in run\n    net_g = Vits(\nTypeError: __init__() missing 1 required positional argument: 'stop_threshold'\n",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mProcessRaisedException\u001b[0m                    Traceback (most recent call last)",
      "\u001b[1;32md:\\Real-Time-Voice-Cloning-Chinese\\vits.ipynb Cell 7\u001b[0m in \u001b[0;36m<cell line: 20>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     <a href='vscode-notebook-cell:/d%3A/Real-Time-Voice-Cloning-Chinese/vits.ipynb#W6sZmlsZQ%3D%3D?line=17'>18</a>\u001b[0m os\u001b[39m.\u001b[39menviron[\u001b[39m'\u001b[39m\u001b[39mMASTER_ADDR\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39mlocalhost\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m     <a href='vscode-notebook-cell:/d%3A/Real-Time-Voice-Cloning-Chinese/vits.ipynb#W6sZmlsZQ%3D%3D?line=18'>19</a>\u001b[0m os\u001b[39m.\u001b[39menviron[\u001b[39m'\u001b[39m\u001b[39mMASTER_PORT\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m8899\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m---> <a href='vscode-notebook-cell:/d%3A/Real-Time-Voice-Cloning-Chinese/vits.ipynb#W6sZmlsZQ%3D%3D?line=19'>20</a>\u001b[0m mp\u001b[39m.\u001b[39;49mspawn(run, nprocs\u001b[39m=\u001b[39;49mn_gpus, args\u001b[39m=\u001b[39;49m(n_gpus, hparams))\n",
      "File \u001b[1;32md:\\Users\\babys\\Anaconda3\\envs\\mo\\lib\\site-packages\\torch\\multiprocessing\\spawn.py:230\u001b[0m, in \u001b[0;36mspawn\u001b[1;34m(fn, args, nprocs, join, daemon, start_method)\u001b[0m\n\u001b[0;32m    226\u001b[0m     msg \u001b[39m=\u001b[39m (\u001b[39m'\u001b[39m\u001b[39mThis method only supports start_method=spawn (got: \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m).\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m\n\u001b[0;32m    227\u001b[0m            \u001b[39m'\u001b[39m\u001b[39mTo use a different start_method use:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m'\u001b[39m\n\u001b[0;32m    228\u001b[0m            \u001b[39m'\u001b[39m\u001b[39m torch.multiprocessing.start_processes(...)\u001b[39m\u001b[39m'\u001b[39m \u001b[39m%\u001b[39m start_method)\n\u001b[0;32m    229\u001b[0m     warnings\u001b[39m.\u001b[39mwarn(msg)\n\u001b[1;32m--> 230\u001b[0m \u001b[39mreturn\u001b[39;00m start_processes(fn, args, nprocs, join, daemon, start_method\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mspawn\u001b[39;49m\u001b[39m'\u001b[39;49m)\n",
      "File \u001b[1;32md:\\Users\\babys\\Anaconda3\\envs\\mo\\lib\\site-packages\\torch\\multiprocessing\\spawn.py:188\u001b[0m, in \u001b[0;36mstart_processes\u001b[1;34m(fn, args, nprocs, join, daemon, start_method)\u001b[0m\n\u001b[0;32m    185\u001b[0m     \u001b[39mreturn\u001b[39;00m context\n\u001b[0;32m    187\u001b[0m \u001b[39m# Loop on join until it returns True or raises an exception.\u001b[39;00m\n\u001b[1;32m--> 188\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mnot\u001b[39;00m context\u001b[39m.\u001b[39;49mjoin():\n\u001b[0;32m    189\u001b[0m     \u001b[39mpass\u001b[39;00m\n",
      "File \u001b[1;32md:\\Users\\babys\\Anaconda3\\envs\\mo\\lib\\site-packages\\torch\\multiprocessing\\spawn.py:150\u001b[0m, in \u001b[0;36mProcessContext.join\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m    148\u001b[0m msg \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m-- Process \u001b[39m\u001b[39m%d\u001b[39;00m\u001b[39m terminated with the following error:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m error_index\n\u001b[0;32m    149\u001b[0m msg \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m original_trace\n\u001b[1;32m--> 150\u001b[0m \u001b[39mraise\u001b[39;00m ProcessRaisedException(msg, error_index, failed_process\u001b[39m.\u001b[39mpid)\n",
      "\u001b[1;31mProcessRaisedException\u001b[0m: \n\n-- Process 0 terminated with the following error:\nTraceback (most recent call last):\n  File \"d:\\Users\\babys\\Anaconda3\\envs\\mo\\lib\\site-packages\\torch\\multiprocessing\\spawn.py\", line 59, in _wrap\n    fn(i, *args)\n  File \"d:\\Real-Time-Voice-Cloning-Chinese\\models\\synthesizer\\train_vits.py\", line 123, in run\n    net_g = Vits(\nTypeError: __init__() missing 1 required positional argument: 'stop_threshold'\n"
     ]
    },
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
     ]
    }
   ],
   "source": [
    "from models.synthesizer.train_vits import run\n",
    "from pathlib import Path\n",
    "from utils.hparams import HParams\n",
    "import torch, os\n",
    "import torch.multiprocessing as mp\n",
    "\n",
    "datasets_root = Path(\"../audiodata/SV2TTS/synthesizer\")\n",
    "hparams= HParams(\n",
    "  model_dir = \"data/ckpt/synthesizer/vits\",\n",
    ")\n",
    "hparams.loadJson(Path(hparams.model_dir).joinpath(\"config.json\"))\n",
    "hparams.data[\"training_files\"] = str(datasets_root.joinpath(\"train.txt\"))\n",
    "hparams.data[\"validation_files\"] = str(datasets_root.joinpath(\"train.txt\"))\n",
    "hparams.data[\"datasets_root\"] = str(datasets_root)\n",
    "\n",
    "n_gpus = torch.cuda.device_count()\n",
    "# for spawn\n",
    "os.environ['MASTER_ADDR'] = 'localhost'\n",
    "os.environ['MASTER_PORT'] = '8899'\n",
    "mp.spawn(run, nprocs=n_gpus, args=(n_gpus, hparams))"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "挑选只有对应emo文件的meta数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "import os\n",
    "root = Path('../audiodata/SV2TTS/synthesizer')\n",
    "dict_info = []\n",
    "with open(root.joinpath(\"train.txt\"), \"r\", encoding=\"utf-8\") as dict_meta:\n",
    "    for raw in dict_meta:\n",
    "        if not raw:\n",
    "            continue\n",
    "        v = raw.split(\"|\")[0].replace(\"audio\",\"emo\")\n",
    "        emo_fpath = root.joinpath(\"emo\").joinpath(v)\n",
    "        if emo_fpath.exists():\n",
    "            dict_info.append(raw)\n",
    "        # else:\n",
    "        #     print(emo_fpath)\n",
    "# Iterate over each wav\n",
    "meta2 = Path('../audiodata/SV2TTS/synthesizer/train2.txt')\n",
    "metadata_file = meta2.open(\"w\", encoding=\"utf-8\")\n",
    "for new_info in dict_info:\n",
    "    metadata_file.write(new_info)\n",
    "metadata_file.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "mo",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "vscode": {
   "interpreter": {
    "hash": "788ab866da3baa6c99886d56abb59fe71b6a552bf52c65473ecf96c784704db8"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}