diff --git a/toolbox/__init__.py b/toolbox/__init__.py
index 7d67b52..d162a78 100644
--- a/toolbox/__init__.py
+++ b/toolbox/__init__.py
@@ -234,7 +234,7 @@ class Toolbox:
texts = processed_texts
embed = self.ui.selected_utterance.embed
embeds = [embed] * len(texts)
- specs = self.synthesizer.synthesize_spectrograms(texts, embeds, style_idx=int(self.ui.style_idx_textbox.text()))
+ specs = self.synthesizer.synthesize_spectrograms(texts, embeds, style_idx=int(self.ui.slider.value()))
breaks = [spec.shape[1] for spec in specs]
spec = np.concatenate(specs, axis=1)
diff --git a/toolbox/assets/mb.png b/toolbox/assets/mb.png
new file mode 100644
index 0000000..abd804c
Binary files /dev/null and b/toolbox/assets/mb.png differ
diff --git a/toolbox/ui.py b/toolbox/ui.py
index f448f0c..8f0013c 100644
--- a/toolbox/ui.py
+++ b/toolbox/ui.py
@@ -2,6 +2,7 @@ import matplotlib.pyplot as plt
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.figure import Figure
from PyQt5.QtCore import Qt, QStringListModel
+from PyQt5 import QtGui
from PyQt5.QtWidgets import *
from encoder.inference import plot_embedding_as_heatmap
from toolbox.utterance import Utterance
@@ -420,7 +421,8 @@ class UI(QDialog):
## Initialize the application
self.app = QApplication(sys.argv)
super().__init__(None)
- self.setWindowTitle("SV2TTS toolbox")
+ self.setWindowTitle("MockingBird GUI")
+ self.setWindowIcon(QtGui.QIcon('toolbox\\assets\\mb.png'))
self.setWindowFlag(Qt.WindowMinimizeButtonHint, True)
self.setWindowFlag(Qt.WindowMaximizeButtonHint, True)
@@ -432,21 +434,24 @@ class UI(QDialog):
# Browser
browser_layout = QGridLayout()
- root_layout.addLayout(browser_layout, 0, 0, 1, 2)
+ root_layout.addLayout(browser_layout, 0, 0, 1, 8)
# Generation
gen_layout = QVBoxLayout()
- root_layout.addLayout(gen_layout, 0, 2, 1, 2)
-
- # Projections
- self.projections_layout = QVBoxLayout()
- root_layout.addLayout(self.projections_layout, 1, 0, 1, 1)
-
+ root_layout.addLayout(gen_layout, 0, 8)
+
# Visualizations
vis_layout = QVBoxLayout()
- root_layout.addLayout(vis_layout, 1, 1, 1, 3)
+ root_layout.addLayout(vis_layout, 1, 0, 2, 8)
+ # Output
+ output_layout = QGridLayout()
+ vis_layout.addLayout(output_layout, 0)
+ # Projections
+ self.projections_layout = QVBoxLayout()
+ root_layout.addLayout(self.projections_layout, 1, 8, 2, 2)
+
## Projections
# UMap
fig, self.umap_ax = plt.subplots(figsize=(3, 3), facecolor="#F0F0F0")
@@ -460,80 +465,88 @@ class UI(QDialog):
## Browser
# Dataset, speaker and utterance selection
i = 0
- self.dataset_box = QComboBox()
- browser_layout.addWidget(QLabel("Dataset"), i, 0)
- browser_layout.addWidget(self.dataset_box, i + 1, 0)
- self.speaker_box = QComboBox()
- browser_layout.addWidget(QLabel("Speaker"), i, 1)
- browser_layout.addWidget(self.speaker_box, i + 1, 1)
- self.utterance_box = QComboBox()
- browser_layout.addWidget(QLabel("Utterance"), i, 2)
- browser_layout.addWidget(self.utterance_box, i + 1, 2)
- self.browser_load_button = QPushButton("Load")
- browser_layout.addWidget(self.browser_load_button, i + 1, 3)
- i += 2
- # Random buttons
+ source_groupbox = QGroupBox('Source(源音频)')
+ source_layout = QGridLayout()
+ source_groupbox.setLayout(source_layout)
+ browser_layout.addWidget(source_groupbox, i, 0, 1, 4)
+
+ self.dataset_box = QComboBox()
+ source_layout.addWidget(QLabel("Dataset(数据集):"), i, 0)
+ source_layout.addWidget(self.dataset_box, i, 1)
self.random_dataset_button = QPushButton("Random")
- browser_layout.addWidget(self.random_dataset_button, i, 0)
+ source_layout.addWidget(self.random_dataset_button, i, 2)
+ i += 1
+ self.speaker_box = QComboBox()
+ source_layout.addWidget(QLabel("Speaker(说话者)"), i, 0)
+ source_layout.addWidget(self.speaker_box, i, 1)
self.random_speaker_button = QPushButton("Random")
- browser_layout.addWidget(self.random_speaker_button, i, 1)
+ source_layout.addWidget(self.random_speaker_button, i, 2)
+ i += 1
+ self.utterance_box = QComboBox()
+ source_layout.addWidget(QLabel("Utterance(音频):"), i, 0)
+ source_layout.addWidget(self.utterance_box, i, 1)
self.random_utterance_button = QPushButton("Random")
- browser_layout.addWidget(self.random_utterance_button, i, 2)
+ source_layout.addWidget(self.random_utterance_button, i, 2)
+
+ i += 1
+ source_layout.addWidget(QLabel("Use(使用):"), i, 0)
+ self.browser_load_button = QPushButton("Load Above(加载上面)")
+ source_layout.addWidget(self.browser_load_button, i, 1, 1, 2)
self.auto_next_checkbox = QCheckBox("Auto select next")
self.auto_next_checkbox.setChecked(True)
- browser_layout.addWidget(self.auto_next_checkbox, i, 3)
- i += 1
+ source_layout.addWidget(self.auto_next_checkbox, i+1, 1)
+ self.browser_browse_button = QPushButton("Browse(打开本地)")
+ source_layout.addWidget(self.browser_browse_button, i, 3)
+ self.record_button = QPushButton("Record(录音)")
+ source_layout.addWidget(self.record_button, i+1, 3)
+ i += 2
# Utterance box
- browser_layout.addWidget(QLabel("Use embedding from:"), i, 0)
+ browser_layout.addWidget(QLabel("Current(当前):"), i, 0)
self.utterance_history = QComboBox()
- browser_layout.addWidget(self.utterance_history, i, 1, 1, 3)
- i += 1
-
- # Random & next utterance buttons
- self.browser_browse_button = QPushButton("Browse")
- browser_layout.addWidget(self.browser_browse_button, i, 0)
- self.record_button = QPushButton("Record")
- browser_layout.addWidget(self.record_button, i, 1)
- self.play_button = QPushButton("Play")
+ browser_layout.addWidget(self.utterance_history, i, 1)
+ self.play_button = QPushButton("Play(播放)")
browser_layout.addWidget(self.play_button, i, 2)
- self.stop_button = QPushButton("Stop")
+ self.stop_button = QPushButton("Stop(暂停)")
browser_layout.addWidget(self.stop_button, i, 3)
- i += 1
+ i += 1
+ model_groupbox = QGroupBox('Models(模型选择)')
+ model_layout = QHBoxLayout()
+ model_groupbox.setLayout(model_layout)
+ browser_layout.addWidget(model_groupbox, i, 0, 1, 4)
# Model and audio output selection
self.encoder_box = QComboBox()
- browser_layout.addWidget(QLabel("Encoder"), i, 0)
- browser_layout.addWidget(self.encoder_box, i + 1, 0)
+ model_layout.addWidget(QLabel("Encoder:"))
+ model_layout.addWidget(self.encoder_box)
self.synthesizer_box = QComboBox()
- browser_layout.addWidget(QLabel("Synthesizer"), i, 1)
- browser_layout.addWidget(self.synthesizer_box, i + 1, 1)
+ model_layout.addWidget(QLabel("Synthesizer:"))
+ model_layout.addWidget(self.synthesizer_box)
self.vocoder_box = QComboBox()
- browser_layout.addWidget(QLabel("Vocoder"), i, 2)
- browser_layout.addWidget(self.vocoder_box, i + 1, 2)
+ model_layout.addWidget(QLabel("Vocoder:"))
+ model_layout.addWidget(self.vocoder_box)
- self.audio_out_devices_cb=QComboBox()
- browser_layout.addWidget(QLabel("Audio Output"), i, 3)
- browser_layout.addWidget(self.audio_out_devices_cb, i + 1, 3)
- i += 2
#Replay & Save Audio
- browser_layout.addWidget(QLabel("Toolbox Output:"), i, 0)
+ i = 0
+ output_layout.addWidget(QLabel("Toolbox Output:"), i, 0)
self.waves_cb = QComboBox()
self.waves_cb_model = QStringListModel()
self.waves_cb.setModel(self.waves_cb_model)
self.waves_cb.setToolTip("Select one of the last generated waves in this section for replaying or exporting")
- browser_layout.addWidget(self.waves_cb, i, 1)
+ output_layout.addWidget(self.waves_cb, i, 1)
self.replay_wav_button = QPushButton("Replay")
self.replay_wav_button.setToolTip("Replay last generated vocoder")
- browser_layout.addWidget(self.replay_wav_button, i, 2)
+ output_layout.addWidget(self.replay_wav_button, i, 2)
self.export_wav_button = QPushButton("Export")
self.export_wav_button.setToolTip("Save last generated vocoder audio in filesystem as a wav file")
- browser_layout.addWidget(self.export_wav_button, i, 3)
+ output_layout.addWidget(self.export_wav_button, i, 3)
+ self.audio_out_devices_cb=QComboBox()
i += 1
-
+ output_layout.addWidget(QLabel("Audio Output"), i, 0)
+ output_layout.addWidget(self.audio_out_devices_cb, i, 1)
## Embed & spectrograms
vis_layout.addStretch()
@@ -554,7 +567,6 @@ class UI(QDialog):
for side in ["top", "right", "bottom", "left"]:
ax.spines[side].set_visible(False)
-
## Generation
self.text_prompt = QPlainTextEdit(default_text)
gen_layout.addWidget(self.text_prompt, stretch=1)
@@ -576,10 +588,14 @@ class UI(QDialog):
self.seed_textbox = QLineEdit()
self.seed_textbox.setMaximumWidth(80)
layout_seed.addWidget(self.seed_textbox, 0, 1)
- layout_seed.addWidget(QLabel("Style#:(0~9)"), 0, 2)
- self.style_idx_textbox = QLineEdit("-1")
- self.style_idx_textbox.setMaximumWidth(80)
- layout_seed.addWidget(self.style_idx_textbox, 0, 3)
+ self.slider = QSlider(Qt.Horizontal)
+ self.slider.setTickInterval(1)
+ self.slider.setFocusPolicy(Qt.NoFocus)
+ self.slider.setSingleStep(1)
+ self.slider.setRange(-1, 9)
+ self.slider.setValue(-1)
+ layout_seed.addWidget(QLabel("Style:"), 0, 2)
+ layout_seed.addWidget(self.slider, 0, 3)
self.trim_silences_checkbox = QCheckBox("Enhance vocoder output")
self.trim_silences_checkbox.setToolTip("When checked, trims excess silence in vocoder output."
" This feature requires `webrtcvad` to be installed.")
@@ -597,7 +613,7 @@ class UI(QDialog):
## Set the size of the window and of the elements
- max_size = QDesktopWidget().availableGeometry(self).size() * 0.8
+ max_size = QDesktopWidget().availableGeometry(self).size() * 0.5
self.resize(max_size)
## Finalize the display