1
0
mirror of https://github.com/qTox/qTox.git synced 2024-03-22 14:00:36 +08:00

feat(audio): microphone voice activation

adding volume based voice detection
This commit is contained in:
Gavin Stark 2017-10-10 19:58:04 -05:00
parent 5d6ae9ae37
commit d24d4fb8ea
10 changed files with 235 additions and 4 deletions

View File

@ -161,7 +161,15 @@
* @fn void Audio::setInputGain(qreal dB)
* @brief set the input gain
*
* @param[in] dB the new input gain in dB
* @fn void Audio::getInputThreshold()
* @brief get the current input threshold
*
* @return current input threshold percentage
*
* @fn void Audio::setInputThreshold(qreal percent)
* @brief set the input threshold
*
* @param[in] percent the new input threshold percentage
*/
/**

View File

@ -72,6 +72,15 @@ public:
virtual qreal inputGain() const = 0;
virtual void setInputGain(qreal dB) = 0;
virtual qreal minInputThreshold() const = 0;
virtual void setMinInputThreshold(qreal dB) = 0;
virtual qreal maxInputThreshold() const = 0;
virtual void setMaxInputThreshold(qreal dB) = 0;
virtual qreal getInputThreshold() const = 0;
virtual void setInputThreshold(qreal percent) = 0;
virtual void reinitInput(const QString& inDevDesc) = 0;
virtual bool reinitOutput(const QString& outDevDesc) = 0;
@ -91,6 +100,8 @@ public:
virtual void playMono16Sound(const QByteArray& data) = 0;
virtual void playMono16Sound(const QString& path) = 0;
virtual void stopActive() = 0;
virtual void playAudioBuffer(uint sourceId, const int16_t* data, int samples, unsigned channels,
int sampleRate) = 0;
@ -104,6 +115,8 @@ protected:
signals:
void frameAvailable(const int16_t* pcm, size_t sample_count, uint8_t channels,
uint32_t sampling_rate);
void volumeAvailable(float value);
void startActive(qreal msec);
};
#endif // AUDIO_H

View File

@ -57,6 +57,9 @@ OpenAL::OpenAL()
, outputInitialized{false}
, minInGain{-30}
, maxInGain{30}
, minInThreshold{0.0f}
, maxInThreshold{0.4f}
, isActive{false}
{
// initialize OpenAL error stack
alGetError();
@ -67,6 +70,10 @@ OpenAL::OpenAL()
moveToThread(audioThread);
voiceTimer.setSingleShot(true);
connect(this, &Audio::startActive, &voiceTimer, static_cast<void (QTimer::*)(int)>(&QTimer::start));
connect(&voiceTimer, &QTimer::timeout, this, &Audio::stopActive);
connect(&captureTimer, &QTimer::timeout, this, &OpenAL::doCapture);
captureTimer.setInterval(AUDIO_FRAME_DURATION / 2);
captureTimer.setSingleShot(false);
@ -175,6 +182,50 @@ void OpenAL::setMaxInputGain(qreal dB)
maxInGain = dB;
}
/**
* @brief The minimum threshold value for an input device.
*
* @return minimum threshold percentage
*/
qreal OpenAL::minInputThreshold() const
{
QMutexLocker locker(&audioLock);
return minInThreshold;
}
/**
* @brief Set the minimum allowed threshold percentage
*
* @note Default is 0%; usually you don't need to alter this value;
*/
void OpenAL::setMinInputThreshold(qreal percent)
{
QMutexLocker locker(&audioLock);
minInThreshold = percent;
}
/**
* @brief The maximum threshold value for an input device.
*
* @return maximum threshold percentage
*/
qreal OpenAL::maxInputThreshold() const
{
QMutexLocker locker(&audioLock);
return maxInThreshold;
}
/**
* @brief Set the maximum allowed threshold percentage
*
* @note Default is 40%; usually you don't need to alter this value.
*/
void OpenAL::setMaxInputThreshold(qreal percent)
{
QMutexLocker locker(&audioLock);
maxInThreshold = percent;
}
void OpenAL::reinitInput(const QString& inDevDesc)
{
QMutexLocker locker(&audioLock);
@ -278,6 +329,7 @@ bool OpenAL::initInput(const QString& deviceName, uint32_t channels)
}
setInputGain(Settings::getInstance().getAudioInGainDecibel());
setInputThreshold(Settings::getInstance().getAudioThreshold());
qDebug() << "Opened audio input" << deviceName;
alcCaptureStart(alInDev);
@ -482,6 +534,36 @@ void OpenAL::playMono16SoundCleanup()
}
}
/**
* @brief Called by doCapture to calculate volume of the audio buffer
*
* @param[in] buf the current audio buffer
*
* @return volume in percent of max volume
*/
float OpenAL::getVolume(int16_t *buf)
{
quint32 samples = AUDIO_FRAME_SAMPLE_COUNT * AUDIO_CHANNELS;
float sum = 0.0;
for (quint32 i = 0; i < samples; i++) {
float sample = (float)buf[i] / (float)std::numeric_limits<int16_t>::max();
if (sample > 0) {
sum += sample;
} else {
sum -= sample;
}
}
return sum/samples;
}
/**
* @brief Called by voiceTimer's timeout to disable audio broadcasting
*/
void OpenAL::stopActive()
{
isActive = false;
}
/**
* @brief Called on the captureTimer events to capture audio
*/
@ -500,6 +582,19 @@ void OpenAL::doCapture()
int16_t buf[AUDIO_FRAME_SAMPLE_COUNT * AUDIO_CHANNELS];
alcCaptureSamples(alInDev, buf, AUDIO_FRAME_SAMPLE_COUNT);
float volume = getVolume(buf);
if (volume >= inputThreshold)
{
isActive = true;
emit startActive(voiceHold);
}
emit Audio::volumeAvailable(volume);
if (!isActive)
{
return;
}
for (quint32 i = 0; i < AUDIO_FRAME_SAMPLE_COUNT * AUDIO_CHANNELS; ++i) {
// gain amplification with clipping to 16-bit boundaries
int ampPCM =
@ -628,6 +723,11 @@ qreal OpenAL::inputGain() const
return gain;
}
qreal OpenAL::getInputThreshold() const
{
return inputThreshold;
}
qreal OpenAL::inputGainFactor() const
{
return gainFactor;
@ -638,3 +738,9 @@ void OpenAL::setInputGain(qreal dB)
gain = qBound(minInGain, dB, maxInGain);
gainFactor = qPow(10.0, (gain / 20.0));
}
void OpenAL::setInputThreshold(qreal percent)
{
inputThreshold = percent;
}

View File

@ -60,6 +60,15 @@ public:
qreal inputGain() const;
void setInputGain(qreal dB);
qreal minInputThreshold() const;
void setMinInputThreshold(qreal percent);
qreal maxInputThreshold() const;
void setMaxInputThreshold(qreal percent);
qreal getInputThreshold() const;
void setInputThreshold(qreal percent);
void reinitInput(const QString& inDevDesc);
bool reinitOutput(const QString& outDevDesc);
@ -78,6 +87,7 @@ public:
void stopLoop();
void playMono16Sound(const QByteArray& data);
void playMono16Sound(const QString& path);
void stopActive();
void playAudioBuffer(uint sourceId, const int16_t* data, int samples, unsigned channels,
int sampleRate);
@ -99,6 +109,7 @@ private:
virtual bool initInput(const QString& deviceName);
virtual bool initOutput(const QString& outDevDescr);
void playMono16SoundCleanup();
float getVolume(int16_t *buf);
void doCapture();
protected:
@ -120,6 +131,12 @@ protected:
qreal gainFactor;
qreal minInGain = -30;
qreal maxInGain = 30;
qreal inputThreshold;
qreal voiceHold = 250;
bool isActive = false;
QTimer voiceTimer;
qreal minInThreshold = 0.0;
qreal maxInThreshold = 0.4;
};
#endif // OPENAL_H

View File

@ -22,6 +22,9 @@ public:
virtual qreal getAudioInGainDecibel() const = 0;
virtual void setAudioInGainDecibel(qreal dB) = 0;
virtual qreal getAudioThreshold() const = 0;
virtual void setAudioThreshold(qreal percent) = 0;
virtual int getOutVolume() const = 0;
virtual void setOutVolume(int volume) = 0;
@ -41,6 +44,7 @@ public:
DECLARE_SIGNAL(audioOutDevEnabledChanged, bool enabled);
DECLARE_SIGNAL(audioInGainDecibelChanged, qreal dB);
DECLARE_SIGNAL(audioThresholdChanged, qreal dB);
DECLARE_SIGNAL(outVolumeChanged, int volume);
DECLARE_SIGNAL(audioBitrateChanged, int bitrate);
DECLARE_SIGNAL(enableTestSoundChanged, bool newValue);

View File

@ -264,6 +264,7 @@ void Settings::loadGlobal()
outDev = s.value("outDev", "").toString();
audioOutDevEnabled = s.value("audioOutDevEnabled", true).toBool();
audioInGainDecibel = s.value("inGain", 0).toReal();
audioThreshold = s.value("audioThreshold", 0).toReal();
outVolume = s.value("outVolume", 100).toInt();
audioBitrate = s.value("audioBitrate", 64).toInt();
enableBackend2 = false;
@ -566,6 +567,7 @@ void Settings::saveGlobal()
s.setValue("outDev", outDev);
s.setValue("audioOutDevEnabled", audioOutDevEnabled);
s.setValue("inGain", audioInGainDecibel);
s.setValue("audioThreshold", audioThreshold);
s.setValue("outVolume", outVolume);
s.setValue("audioBitrate", audioBitrate);
s.setValue("enableBackend2", enableBackend2);
@ -1820,6 +1822,22 @@ void Settings::setAudioInGainDecibel(qreal dB)
}
}
qreal Settings::getAudioThreshold() const
{
QMutexLocker locker{&bigLock};
return audioThreshold;
}
void Settings::setAudioThreshold(qreal percent)
{
QMutexLocker locker{&bigLock};
if (percent < audioThreshold || percent > audioThreshold) {
audioThreshold = percent;
emit audioThresholdChanged(audioThreshold);
}
}
QString Settings::getVideoDev() const
{
QMutexLocker locker{&bigLock};

View File

@ -100,6 +100,8 @@ class Settings : public QObject, public ICoreSettings, public IAudioSettings, pu
audioInDevEnabledChanged FINAL)
Q_PROPERTY(qreal audioInGainDecibel READ getAudioInGainDecibel WRITE setAudioInGainDecibel
NOTIFY audioInGainDecibelChanged FINAL)
Q_PROPERTY(qreal audioThreshold READ getAudioThreshold WRITE setAudioThreshold
NOTIFY audioThresholdChanged FINAL)
Q_PROPERTY(QString outDev READ getOutDev WRITE setOutDev NOTIFY outDevChanged FINAL)
Q_PROPERTY(bool audioOutDevEnabled READ getAudioOutDevEnabled WRITE setAudioOutDevEnabled NOTIFY
audioOutDevEnabledChanged FINAL)
@ -349,6 +351,9 @@ public:
qreal getAudioInGainDecibel() const override;
void setAudioInGainDecibel(qreal dB) override;
qreal getAudioThreshold() const override;
void setAudioThreshold(qreal percent) override;
int getOutVolume() const override;
void setOutVolume(int volume) override;
@ -368,6 +373,7 @@ public:
SIGNAL_IMPL(Settings, audioOutDevEnabledChanged, bool enabled)
SIGNAL_IMPL(Settings, audioInGainDecibelChanged, qreal dB)
SIGNAL_IMPL(Settings, audioThresholdChanged, qreal percent)
SIGNAL_IMPL(Settings, outVolumeChanged, int volume)
SIGNAL_IMPL(Settings, audioBitrateChanged, int bitrate)
SIGNAL_IMPL(Settings, enableTestSoundChanged, bool newValue)
@ -632,6 +638,7 @@ private:
QString inDev;
bool audioInDevEnabled;
qreal audioInGainDecibel;
qreal audioThreshold;
QString outDev;
bool audioOutDevEnabled;
int outVolume;

View File

@ -84,6 +84,18 @@ AVForm::AVForm(Audio* audio, CoreAV* coreAV, CameraSource& camera,
microphoneSlider->setTracking(false);
microphoneSlider->installEventFilter(this);
audioThresholdSlider->setToolTip(tr("Use slider to set the activation volume for your"
" input device."));
audioThresholdSlider->setMinimum(audio->minInputThreshold() * 1000);
audioThresholdSlider->setMaximum(audio->maxInputThreshold() * 1000);
audioThresholdSlider->setValue(audioSettings->getAudioThreshold() * 1000);
audioThresholdSlider->setTracking(false);
audioThresholdSlider->installEventFilter(this);
connect(audio, &Audio::volumeAvailable, this, &AVForm::setVolume);
volumeDisplay->setMinimum(audio->minInputThreshold() * 1000);
volumeDisplay->setMaximum(audio->maxInputThreshold() * 1000);
fillAudioQualityComboBox();
eventsInit();
@ -149,6 +161,11 @@ void AVForm::rescanDevices()
getVideoDevices();
}
void AVForm::setVolume(float value)
{
volumeDisplay->setValue(value * 1000);
}
void AVForm::on_cbEnableBackend2_stateChanged()
{
audioSettings->setEnableBackend2(cbEnableBackend2->isChecked());
@ -554,6 +571,14 @@ void AVForm::on_microphoneSlider_valueChanged(int value)
audio->setInputGain(dB);
}
void AVForm::on_audioThresholdSlider_valueChanged(int value)
{
const qreal percent = value / 1000.0;
audioSettings->setAudioThreshold(percent);
Audio::getInstance().setInputThreshold(percent);
}
void AVForm::createVideoSurface()
{
if (camVideoSurface)

View File

@ -71,6 +71,7 @@ private slots:
void on_playbackSlider_valueChanged(int value);
void on_cbEnableTestSound_stateChanged();
void on_microphoneSlider_valueChanged(int value);
void on_audioThresholdSlider_valueChanged(int value);
void on_audioQualityComboBox_currentIndexChanged(int index);
// camera
@ -78,6 +79,7 @@ private slots:
void on_videoModescomboBox_currentIndexChanged(int index);
void rescanDevices();
void setVolume(float value);
void on_cbEnableBackend2_stateChanged();

View File

@ -101,6 +101,37 @@
</property>
</widget>
</item>
<item row="4" column="0">
<widget class="QLabel" name="audioThresholdLabel">
<property name="text">
<string>Threshold</string>
</property>
</widget>
</item>
<item row="4" column="1" colspan="2">
<widget class="QSlider" name="audioThresholdSlider">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QLabel" name="volumeDisplayLabel">
<property name="text">
<string>Volume</string>
</property>
</widget>
</item>
<item row="5" column="1" colspan="2">
<widget class="QProgressBar" name="volumeDisplay">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="textVisible">
<bool>false</bool>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QLabel" name="outDevLabel">
<property name="text">
@ -108,14 +139,14 @@
</property>
</widget>
</item>
<item row="4" column="0">
<item row="6" column="0">
<widget class="QLabel" name="audioQualityLabel">
<property name="text">
<string>Audio quality</string>
</property>
</widget>
</item>
<item row="4" column="1" colspan="2">
<item row="6" column="1" colspan="2">
<widget class="QComboBox" name="audioQualityComboBox">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
@ -128,7 +159,7 @@
</property>
</widget>
</item>
<item row="5" column="0" colspan="3">
<item row="7" column="0" colspan="3">
<widget class="QCheckBox" name="cbEnableBackend2">
<property name="enabled">
<bool>true</bool>