MockingBird/web/__init__.py

from web.api import api_blueprint
from pathlib import Path
from gevent import pywsgi as wsgi
from flask import Flask, Response, request, render_template
from synthesizer.inference import Synthesizer
from encoder import inference as encoder
from vocoder.hifigan import inference as gan_vocoder
from vocoder.wavernn import inference as rnn_vocoder
import numpy as np
import re
from scipy.io.wavfile import write
import librosa
import io
import base64
from flask_cors import CORS
from flask_wtf import CSRFProtect
import webbrowser

def webApp():
    # Init and load config
    app = Flask(__name__, instance_relative_config=True)
    app.config.from_object("web.config.default")
    app.config['RESTPLUS_MASK_SWAGGER'] = False
    app.register_blueprint(api_blueprint)

    # CORS(app) #允许跨域，注释掉此行则禁止跨域请求
    csrf = CSRFProtect(app)
    csrf.init_app(app)
   
    syn_models_dirt = "synthesizer/saved_models"
    synthesizers = list(Path(syn_models_dirt).glob("**/*.pt"))
    synthesizers_cache = {}
    encoder.load_model(Path("encoder/saved_models/pretrained.pt"))
    rnn_vocoder.load_model(Path("vocoder/saved_models/pretrained/pretrained.pt"))
    gan_vocoder.load_model(Path("vocoder/saved_models/pretrained/g_hifigan.pt"))

    def pcm2float(sig, dtype='float32'):
        """Convert PCM signal to floating point with a range from -1 to 1.
        Use dtype='float32' for single precision.
        Parameters
        ----------
        sig : array_like
            Input array, must have integral type.
        dtype : data type, optional
            Desired (floating point) data type.
        Returns
        -------
        numpy.ndarray
            Normalized floating point data.
        See Also
        --------
        float2pcm, dtype
        """
        sig = np.asarray(sig)
        if sig.dtype.kind not in 'iu':
            raise TypeError("'sig' must be an array of integers")
        dtype = np.dtype(dtype)
        if dtype.kind != 'f':
            raise TypeError("'dtype' must be a floating point type")

        i = np.iinfo(sig.dtype)
        abs_max = 2 ** (i.bits - 1)
        offset = i.min + abs_max
        return (sig.astype(dtype) - offset) / abs_max

    # Cache for synthesizer
    @csrf.exempt
    @app.route("/api/synthesize", methods=["POST"])
    def synthesize():
        # TODO Implementation with json to support more platform
        # Load synthesizer
        if "synt_path" in request.form:
            synt_path = request.form["synt_path"]
        else:
            synt_path = synthesizers[0]
            print("NO synthsizer is specified, try default first one.")
        if synthesizers_cache.get(synt_path) is None:
            current_synt = Synthesizer(Path(synt_path))
            synthesizers_cache[synt_path] = current_synt
        else:
            current_synt = synthesizers_cache[synt_path]
        print("using synthesizer model: " + str(synt_path))
        # Load input wav
        if "upfile_b64" in request.form:
            wav_base64 = request.form["upfile_b64"]
            wav = base64.b64decode(bytes(wav_base64, 'utf-8'))
            wav = pcm2float(np.frombuffer(wav, dtype=np.int16), dtype=np.float32)
            sample_rate = Synthesizer.sample_rate
        else:
            wav, sample_rate,  = librosa.load(request.files['file'])
        write("temp.wav", sample_rate, wav) #Make sure we get the correct wav
        
        encoder_wav = encoder.preprocess_wav(wav, sample_rate)
        embed, _, _ = encoder.embed_utterance(encoder_wav, return_partials=True)
        
        # Load input text
        texts = request.form["text"].split("\n")
        punctuation = '！，。、,' # punctuate and split/clean text
        processed_texts = []
        for text in texts:
            for processed_text in re.sub(r'[{}]+'.format(punctuation), '\n', text).split('\n'):
                if processed_text:
                    processed_texts.append(processed_text.strip())
        texts = processed_texts

        # synthesize and vocode
        embeds = [embed] * len(texts)
        specs = current_synt.synthesize_spectrograms(texts, embeds)
        spec = np.concatenate(specs, axis=1)
        if "vocoder" in request.form and request.form["vocoder"] == "WaveRNN":
            wav = rnn_vocoder.infer_waveform(spec)
        else:
            wav = gan_vocoder.infer_waveform(spec)

        # Return cooked wav
        out = io.BytesIO()
        write(out, Synthesizer.sample_rate, wav.astype(np.float32))
        return Response(out, mimetype="audio/wav")

    @app.route('/', methods=['GET'])
    def index():
        return render_template("index.html")

    host = app.config.get("HOST")
    port = app.config.get("PORT")
    web_address = 'http://{}:{}'.format(host, port)
    print(f"Web server:" + web_address)
    webbrowser.open(web_address)
    server = wsgi.WSGIServer((host, port), app)
    server.serve_forever()
    return app

if __name__ == "__main__":
    webApp()
Web server release v2 (#99) * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ * Restruct readme and add instruction to use web server * fix training preprocess of vocoder * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ * fix training preprocess of vocoder * Refactor to restful style Co-authored-by: balala <Ozgay@users.noreply.github.com> Co-authored-by: auau <auau@test.com> 2021-09-25 17:07:46 +08:00			`from web.api import api_blueprint`
Web server (#94) * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ Co-authored-by: balala <Ozgay@users.noreply.github.com> Co-authored-by: auau <auau@test.com> 2021-09-21 16:56:12 +08:00			`from pathlib import Path`
			`from gevent import pywsgi as wsgi`
Change synthesizer read to restful style 2021-09-26 10:01:50 +08:00			`from flask import Flask, Response, request, render_template`
Web server (#94) * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ Co-authored-by: balala <Ozgay@users.noreply.github.com> Co-authored-by: auau <auau@test.com> 2021-09-21 16:56:12 +08:00			`from synthesizer.inference import Synthesizer`
			`from encoder import inference as encoder`
			`from vocoder.hifigan import inference as gan_vocoder`
			`from vocoder.wavernn import inference as rnn_vocoder`
			`import numpy as np`
			`import re`
Change synthesizer read to restful style 2021-09-26 10:01:50 +08:00			`from scipy.io.wavfile import write`
New web with selecting wav files 2021-10-01 22:13:39 +08:00			`import librosa`
Web server (#94) * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ Co-authored-by: balala <Ozgay@users.noreply.github.com> Co-authored-by: auau <auau@test.com> 2021-09-21 16:56:12 +08:00			`import io`
			`import base64`
			`from flask_cors import CORS`
			`from flask_wtf import CSRFProtect`
web tool box update UI (#111) * web tool box update UI * update img 2021-10-01 00:32:29 +08:00			`import webbrowser`
Web server (#94) * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ Co-authored-by: balala <Ozgay@users.noreply.github.com> Co-authored-by: auau <auau@test.com> 2021-09-21 16:56:12 +08:00
			`def webApp():`
			`# Init and load config`
			`app = Flask(__name__, instance_relative_config=True)`
			`app.config.from_object("web.config.default")`
Web server release v2 (#99) * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ * Restruct readme and add instruction to use web server * fix training preprocess of vocoder * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ * fix training preprocess of vocoder * Refactor to restful style Co-authored-by: balala <Ozgay@users.noreply.github.com> Co-authored-by: auau <auau@test.com> 2021-09-25 17:07:46 +08:00			`app.config['RESTPLUS_MASK_SWAGGER'] = False`
			`app.register_blueprint(api_blueprint)`
Web server (#94) * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ Co-authored-by: balala <Ozgay@users.noreply.github.com> Co-authored-by: auau <auau@test.com> 2021-09-21 16:56:12 +08:00
Change synthesizer read to restful style 2021-09-26 10:01:50 +08:00			`# CORS(app) #允许跨域，注释掉此行则禁止跨域请求`
Web server (#94) * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ Co-authored-by: balala <Ozgay@users.noreply.github.com> Co-authored-by: auau <auau@test.com> 2021-09-21 16:56:12 +08:00			`csrf = CSRFProtect(app)`
			`csrf.init_app(app)`
Change synthesizer read to restful style 2021-09-26 10:01:50 +08:00
Web server (#94) * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ Co-authored-by: balala <Ozgay@users.noreply.github.com> Co-authored-by: auau <auau@test.com> 2021-09-21 16:56:12 +08:00			`syn_models_dirt = "synthesizer/saved_models"`
			`synthesizers = list(Path(syn_models_dirt).glob("*/.pt"))`
			`synthesizers_cache = {}`
			`encoder.load_model(Path("encoder/saved_models/pretrained.pt"))`
Allow to select vocoder in web 2021-11-08 23:55:16 +08:00			`rnn_vocoder.load_model(Path("vocoder/saved_models/pretrained/pretrained.pt"))`
Web server (#94) * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ Co-authored-by: balala <Ozgay@users.noreply.github.com> Co-authored-by: auau <auau@test.com> 2021-09-21 16:56:12 +08:00			`gan_vocoder.load_model(Path("vocoder/saved_models/pretrained/g_hifigan.pt"))`

			`def pcm2float(sig, dtype='float32'):`
			`"""Convert PCM signal to floating point with a range from -1 to 1.`
			`Use dtype='float32' for single precision.`
			`Parameters`
			`----------`
			`sig : array_like`
			`Input array, must have integral type.`
			`dtype : data type, optional`
			`Desired (floating point) data type.`
			`Returns`
			`-------`
			`numpy.ndarray`
			`Normalized floating point data.`
			`See Also`
			`--------`
			`float2pcm, dtype`
			`"""`
			`sig = np.asarray(sig)`
			`if sig.dtype.kind not in 'iu':`
			`raise TypeError("'sig' must be an array of integers")`
			`dtype = np.dtype(dtype)`
			`if dtype.kind != 'f':`
			`raise TypeError("'dtype' must be a floating point type")`

			`i = np.iinfo(sig.dtype)`
			`abs_max = 2 ** (i.bits - 1)`
			`offset = i.min + abs_max`
			`return (sig.astype(dtype) - offset) / abs_max`

			`# Cache for synthesizer`
			`@csrf.exempt`
			`@app.route("/api/synthesize", methods=["POST"])`
			`def synthesize():`
			`# TODO Implementation with json to support more platform`
			`# Load synthesizer`
			`if "synt_path" in request.form:`
			`synt_path = request.form["synt_path"]`
			`else:`
			`synt_path = synthesizers[0]`
			`print("NO synthsizer is specified, try default first one.")`
			`if synthesizers_cache.get(synt_path) is None:`
			`current_synt = Synthesizer(Path(synt_path))`
			`synthesizers_cache[synt_path] = current_synt`
			`else:`
			`current_synt = synthesizers_cache[synt_path]`
			`print("using synthesizer model: " + str(synt_path))`
			`# Load input wav`
New web with selecting wav files 2021-10-01 22:13:39 +08:00			`if "upfile_b64" in request.form:`
			`wav_base64 = request.form["upfile_b64"]`
			`wav = base64.b64decode(bytes(wav_base64, 'utf-8'))`
			`wav = pcm2float(np.frombuffer(wav, dtype=np.int16), dtype=np.float32)`
			`sample_rate = Synthesizer.sample_rate`
			`else:`
			`wav, sample_rate, = librosa.load(request.files['file'])`
			`write("temp.wav", sample_rate, wav) #Make sure we get the correct wav`

			`encoder_wav = encoder.preprocess_wav(wav, sample_rate)`
Web server (#94) * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ Co-authored-by: balala <Ozgay@users.noreply.github.com> Co-authored-by: auau <auau@test.com> 2021-09-21 16:56:12 +08:00			`embed, _, _ = encoder.embed_utterance(encoder_wav, return_partials=True)`

			`# Load input text`
			`texts = request.form["text"].split("\n")`
			`punctuation = '！，。、,' # punctuate and split/clean text`
			`processed_texts = []`
			`for text in texts:`
			`for processed_text in re.sub(r'[{}]+'.format(punctuation), '\n', text).split('\n'):`
			`if processed_text:`
			`processed_texts.append(processed_text.strip())`
			`texts = processed_texts`

			`# synthesize and vocode`
			`embeds = [embed] * len(texts)`
			`specs = current_synt.synthesize_spectrograms(texts, embeds)`
			`spec = np.concatenate(specs, axis=1)`
Allow to select vocoder in web 2021-11-08 23:55:16 +08:00			`if "vocoder" in request.form and request.form["vocoder"] == "WaveRNN":`
			`wav = rnn_vocoder.infer_waveform(spec)`
			`else:`
			`wav = gan_vocoder.infer_waveform(spec)`
Web server (#94) * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ Co-authored-by: balala <Ozgay@users.noreply.github.com> Co-authored-by: auau <auau@test.com> 2021-09-21 16:56:12 +08:00
			`# Return cooked wav`
			`out = io.BytesIO()`
Allow to select vocoder in web 2021-11-08 23:55:16 +08:00			`write(out, Synthesizer.sample_rate, wav.astype(np.float32))`
Web server (#94) * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ Co-authored-by: balala <Ozgay@users.noreply.github.com> Co-authored-by: auau <auau@test.com> 2021-09-21 16:56:12 +08:00			`return Response(out, mimetype="audio/wav")`

			`@app.route('/', methods=['GET'])`
			`def index():`
			`return render_template("index.html")`

			`host = app.config.get("HOST")`
			`port = app.config.get("PORT")`
web tool box update UI (#111) * web tool box update UI * update img 2021-10-01 00:32:29 +08:00			`web_address = 'http://{}:{}'.format(host, port)`
			`print(f"Web server:" + web_address)`
			`webbrowser.open(web_address)`
Web server (#94) * Init App * init server.py (#93) * init server.py * Update requirements.txt Add requirement Co-authored-by: auau <auau@test.com> Co-authored-by: babysor00 <babysor00@gmail.com> * Run web.py！ Run web.py！ Co-authored-by: balala <Ozgay@users.noreply.github.com> Co-authored-by: auau <auau@test.com> 2021-09-21 16:56:12 +08:00			`server = wsgi.WSGIServer((host, port), app)`
			`server.serve_forever()`
			`return app`

			`if __name__ == "__main__":`
			`webApp()`