diff --git a/toolbox/__init__.py b/toolbox/__init__.py index 7d67b52..d162a78 100644 --- a/toolbox/__init__.py +++ b/toolbox/__init__.py @@ -234,7 +234,7 @@ class Toolbox: texts = processed_texts embed = self.ui.selected_utterance.embed embeds = [embed] * len(texts) - specs = self.synthesizer.synthesize_spectrograms(texts, embeds, style_idx=int(self.ui.style_idx_textbox.text())) + specs = self.synthesizer.synthesize_spectrograms(texts, embeds, style_idx=int(self.ui.slider.value())) breaks = [spec.shape[1] for spec in specs] spec = np.concatenate(specs, axis=1) diff --git a/toolbox/assets/mb.png b/toolbox/assets/mb.png new file mode 100644 index 0000000..abd804c Binary files /dev/null and b/toolbox/assets/mb.png differ diff --git a/toolbox/ui.py b/toolbox/ui.py index f448f0c..8f0013c 100644 --- a/toolbox/ui.py +++ b/toolbox/ui.py @@ -2,6 +2,7 @@ import matplotlib.pyplot as plt from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas from matplotlib.figure import Figure from PyQt5.QtCore import Qt, QStringListModel +from PyQt5 import QtGui from PyQt5.QtWidgets import * from encoder.inference import plot_embedding_as_heatmap from toolbox.utterance import Utterance @@ -420,7 +421,8 @@ class UI(QDialog): ## Initialize the application self.app = QApplication(sys.argv) super().__init__(None) - self.setWindowTitle("SV2TTS toolbox") + self.setWindowTitle("MockingBird GUI") + self.setWindowIcon(QtGui.QIcon('toolbox\\assets\\mb.png')) self.setWindowFlag(Qt.WindowMinimizeButtonHint, True) self.setWindowFlag(Qt.WindowMaximizeButtonHint, True) @@ -432,21 +434,24 @@ class UI(QDialog): # Browser browser_layout = QGridLayout() - root_layout.addLayout(browser_layout, 0, 0, 1, 2) + root_layout.addLayout(browser_layout, 0, 0, 1, 8) # Generation gen_layout = QVBoxLayout() - root_layout.addLayout(gen_layout, 0, 2, 1, 2) - - # Projections - self.projections_layout = QVBoxLayout() - root_layout.addLayout(self.projections_layout, 1, 0, 1, 1) - + root_layout.addLayout(gen_layout, 0, 8) + # Visualizations vis_layout = QVBoxLayout() - root_layout.addLayout(vis_layout, 1, 1, 1, 3) + root_layout.addLayout(vis_layout, 1, 0, 2, 8) + # Output + output_layout = QGridLayout() + vis_layout.addLayout(output_layout, 0) + # Projections + self.projections_layout = QVBoxLayout() + root_layout.addLayout(self.projections_layout, 1, 8, 2, 2) + ## Projections # UMap fig, self.umap_ax = plt.subplots(figsize=(3, 3), facecolor="#F0F0F0") @@ -460,80 +465,88 @@ class UI(QDialog): ## Browser # Dataset, speaker and utterance selection i = 0 - self.dataset_box = QComboBox() - browser_layout.addWidget(QLabel("Dataset"), i, 0) - browser_layout.addWidget(self.dataset_box, i + 1, 0) - self.speaker_box = QComboBox() - browser_layout.addWidget(QLabel("Speaker"), i, 1) - browser_layout.addWidget(self.speaker_box, i + 1, 1) - self.utterance_box = QComboBox() - browser_layout.addWidget(QLabel("Utterance"), i, 2) - browser_layout.addWidget(self.utterance_box, i + 1, 2) - self.browser_load_button = QPushButton("Load") - browser_layout.addWidget(self.browser_load_button, i + 1, 3) - i += 2 - # Random buttons + source_groupbox = QGroupBox('Source(源音频)') + source_layout = QGridLayout() + source_groupbox.setLayout(source_layout) + browser_layout.addWidget(source_groupbox, i, 0, 1, 4) + + self.dataset_box = QComboBox() + source_layout.addWidget(QLabel("Dataset(数据集):"), i, 0) + source_layout.addWidget(self.dataset_box, i, 1) self.random_dataset_button = QPushButton("Random") - browser_layout.addWidget(self.random_dataset_button, i, 0) + source_layout.addWidget(self.random_dataset_button, i, 2) + i += 1 + self.speaker_box = QComboBox() + source_layout.addWidget(QLabel("Speaker(说话者)"), i, 0) + source_layout.addWidget(self.speaker_box, i, 1) self.random_speaker_button = QPushButton("Random") - browser_layout.addWidget(self.random_speaker_button, i, 1) + source_layout.addWidget(self.random_speaker_button, i, 2) + i += 1 + self.utterance_box = QComboBox() + source_layout.addWidget(QLabel("Utterance(音频):"), i, 0) + source_layout.addWidget(self.utterance_box, i, 1) self.random_utterance_button = QPushButton("Random") - browser_layout.addWidget(self.random_utterance_button, i, 2) + source_layout.addWidget(self.random_utterance_button, i, 2) + + i += 1 + source_layout.addWidget(QLabel("Use(使用):"), i, 0) + self.browser_load_button = QPushButton("Load Above(加载上面)") + source_layout.addWidget(self.browser_load_button, i, 1, 1, 2) self.auto_next_checkbox = QCheckBox("Auto select next") self.auto_next_checkbox.setChecked(True) - browser_layout.addWidget(self.auto_next_checkbox, i, 3) - i += 1 + source_layout.addWidget(self.auto_next_checkbox, i+1, 1) + self.browser_browse_button = QPushButton("Browse(打开本地)") + source_layout.addWidget(self.browser_browse_button, i, 3) + self.record_button = QPushButton("Record(录音)") + source_layout.addWidget(self.record_button, i+1, 3) + i += 2 # Utterance box - browser_layout.addWidget(QLabel("Use embedding from:"), i, 0) + browser_layout.addWidget(QLabel("Current(当前):"), i, 0) self.utterance_history = QComboBox() - browser_layout.addWidget(self.utterance_history, i, 1, 1, 3) - i += 1 - - # Random & next utterance buttons - self.browser_browse_button = QPushButton("Browse") - browser_layout.addWidget(self.browser_browse_button, i, 0) - self.record_button = QPushButton("Record") - browser_layout.addWidget(self.record_button, i, 1) - self.play_button = QPushButton("Play") + browser_layout.addWidget(self.utterance_history, i, 1) + self.play_button = QPushButton("Play(播放)") browser_layout.addWidget(self.play_button, i, 2) - self.stop_button = QPushButton("Stop") + self.stop_button = QPushButton("Stop(暂停)") browser_layout.addWidget(self.stop_button, i, 3) - i += 1 + i += 1 + model_groupbox = QGroupBox('Models(模型选择)') + model_layout = QHBoxLayout() + model_groupbox.setLayout(model_layout) + browser_layout.addWidget(model_groupbox, i, 0, 1, 4) # Model and audio output selection self.encoder_box = QComboBox() - browser_layout.addWidget(QLabel("Encoder"), i, 0) - browser_layout.addWidget(self.encoder_box, i + 1, 0) + model_layout.addWidget(QLabel("Encoder:")) + model_layout.addWidget(self.encoder_box) self.synthesizer_box = QComboBox() - browser_layout.addWidget(QLabel("Synthesizer"), i, 1) - browser_layout.addWidget(self.synthesizer_box, i + 1, 1) + model_layout.addWidget(QLabel("Synthesizer:")) + model_layout.addWidget(self.synthesizer_box) self.vocoder_box = QComboBox() - browser_layout.addWidget(QLabel("Vocoder"), i, 2) - browser_layout.addWidget(self.vocoder_box, i + 1, 2) + model_layout.addWidget(QLabel("Vocoder:")) + model_layout.addWidget(self.vocoder_box) - self.audio_out_devices_cb=QComboBox() - browser_layout.addWidget(QLabel("Audio Output"), i, 3) - browser_layout.addWidget(self.audio_out_devices_cb, i + 1, 3) - i += 2 #Replay & Save Audio - browser_layout.addWidget(QLabel("Toolbox Output:"), i, 0) + i = 0 + output_layout.addWidget(QLabel("Toolbox Output:"), i, 0) self.waves_cb = QComboBox() self.waves_cb_model = QStringListModel() self.waves_cb.setModel(self.waves_cb_model) self.waves_cb.setToolTip("Select one of the last generated waves in this section for replaying or exporting") - browser_layout.addWidget(self.waves_cb, i, 1) + output_layout.addWidget(self.waves_cb, i, 1) self.replay_wav_button = QPushButton("Replay") self.replay_wav_button.setToolTip("Replay last generated vocoder") - browser_layout.addWidget(self.replay_wav_button, i, 2) + output_layout.addWidget(self.replay_wav_button, i, 2) self.export_wav_button = QPushButton("Export") self.export_wav_button.setToolTip("Save last generated vocoder audio in filesystem as a wav file") - browser_layout.addWidget(self.export_wav_button, i, 3) + output_layout.addWidget(self.export_wav_button, i, 3) + self.audio_out_devices_cb=QComboBox() i += 1 - + output_layout.addWidget(QLabel("Audio Output"), i, 0) + output_layout.addWidget(self.audio_out_devices_cb, i, 1) ## Embed & spectrograms vis_layout.addStretch() @@ -554,7 +567,6 @@ class UI(QDialog): for side in ["top", "right", "bottom", "left"]: ax.spines[side].set_visible(False) - ## Generation self.text_prompt = QPlainTextEdit(default_text) gen_layout.addWidget(self.text_prompt, stretch=1) @@ -576,10 +588,14 @@ class UI(QDialog): self.seed_textbox = QLineEdit() self.seed_textbox.setMaximumWidth(80) layout_seed.addWidget(self.seed_textbox, 0, 1) - layout_seed.addWidget(QLabel("Style#:(0~9)"), 0, 2) - self.style_idx_textbox = QLineEdit("-1") - self.style_idx_textbox.setMaximumWidth(80) - layout_seed.addWidget(self.style_idx_textbox, 0, 3) + self.slider = QSlider(Qt.Horizontal) + self.slider.setTickInterval(1) + self.slider.setFocusPolicy(Qt.NoFocus) + self.slider.setSingleStep(1) + self.slider.setRange(-1, 9) + self.slider.setValue(-1) + layout_seed.addWidget(QLabel("Style:"), 0, 2) + layout_seed.addWidget(self.slider, 0, 3) self.trim_silences_checkbox = QCheckBox("Enhance vocoder output") self.trim_silences_checkbox.setToolTip("When checked, trims excess silence in vocoder output." " This feature requires `webrtcvad` to be installed.") @@ -597,7 +613,7 @@ class UI(QDialog): ## Set the size of the window and of the elements - max_size = QDesktopWidget().availableGeometry(self).size() * 0.8 + max_size = QDesktopWidget().availableGeometry(self).size() * 0.5 self.resize(max_size) ## Finalize the display