Added tolerance to VAD and lowered Jitter buffer default value

This commit is contained in:
mannol 2014-06-21 19:04:00 +02:00
parent d413fef66f
commit 5c0b6c8117
5 changed files with 37 additions and 36 deletions

View File

@ -36,6 +36,8 @@
#include "rtp.h"
#include "codec.h"
const uint16_t min_jbuf_size = 10;
const uint16_t min_readiness_idx = 6; /* when is buffer ready to dqq */
int empty_queue(JitterBuffer *q)
{
@ -66,7 +68,7 @@ JitterBuffer *create_queue(int capacity)
}
q->size = 0;
q->capacity = capacity;
q->capacity = capacity >= min_jbuf_size ? capacity : min_jbuf_size;
q->front = 0;
q->rear = -1;
q->queue_ready = 0;
@ -142,14 +144,12 @@ void queue(JitterBuffer *q, RTPMessage *pk)
empty_queue(q);
}
if (q->size > 8)
q->queue_ready = 1;
if (q->size >= min_readiness_idx) q->queue_ready = 1;
++q->size;
++q->rear;
if (q->rear == q->capacity)
q->rear = 0;
if (q->rear == q->capacity) q->rear = 0;
q->queue[q->rear] = pk;
@ -177,8 +177,7 @@ void queue(JitterBuffer *q, RTPMessage *pk)
a -= 1;
if (a < 0)
a += q->capacity;
if (a < 0) a += q->capacity;
}
}
@ -266,6 +265,7 @@ CodecState *codec_init_session ( uint32_t audio_bitrate,
uint16_t audio_frame_duration,
uint32_t audio_sample_rate,
uint32_t audio_channels,
uint32_t audio_VAD_tolerance_ms,
uint16_t video_width,
uint16_t video_height,
uint32_t video_bitrate )
@ -294,8 +294,9 @@ CodecState *codec_init_session ( uint32_t audio_bitrate,
return NULL;
}
float frame_duration_sec = audio_frame_duration / 1000;
retu->samples_per_frame = audio_sample_rate * frame_duration_sec;
retu->EVAD_tolerance = audio_VAD_tolerance_ms > audio_frame_duration ?
audio_VAD_tolerance_ms / audio_frame_duration : audio_frame_duration;
return retu;
}
@ -308,10 +309,6 @@ void codec_terminate_session ( CodecState *cs )
if ( cs->audio_decoder )
opus_decoder_destroy(cs->audio_decoder);
/* TODO: Terminate video
* Do what?
*/
if ( cs->capabilities & v_decoding )
vpx_codec_destroy(&cs->v_decoder);
@ -319,27 +316,24 @@ void codec_terminate_session ( CodecState *cs )
vpx_codec_destroy(&cs->v_encoder);
}
inline float calculate_sum_sq (int16_t* n, size_t k)
inline float calculate_sum_sq (int16_t* n, uint16_t k)
{
float result = 0;
size_t i = 0;
for ( ; i < k; i ++) {
result += (float) (n[i] * n[i]);
}
uint16_t i = 0;
for ( ; i < k; i ++) result += (float) (n[i] * n[i]);
return result;
}
int calculate_VAD_from_PCM( int16_t* PCM, size_t frame_size, float energy)
int energy_VAD(CodecState* cs, int16_t* PCM, uint16_t frame_size, float energy)
{
// int i = 0;
// for (; i < frame_size; i ++) {
LOGGER_DEBUG("Frame size: %d ref: %f", frame_size, energy);
float frame_energy = sqrt(calculate_sum_sq(PCM, frame_size)) / frame_size;
LOGGER_DEBUG("Frame energy calculated: %f", frame_energy);
if ( frame_energy > energy) return 1;
// }
if ( frame_energy > energy) {
cs->EVAD_tolerance_cr = cs->EVAD_tolerance; /* Reset counter */
return 1;
}
if ( cs->EVAD_tolerance_cr ) { cs->EVAD_tolerance_cr --; return 1; }
return 0;
}

View File

@ -46,7 +46,7 @@ typedef enum _Capabilities {
v_decoding = 1 << 3
} Capabilities;
extern const uint16_t min_jbuf_size;
typedef struct _CodecState {
@ -68,7 +68,8 @@ typedef struct _CodecState {
uint64_t capabilities; /* supports*/
/* Voice activity detection */
float samples_per_frame;
uint32_t EVAD_tolerance; /* In frames */
uint32_t EVAD_tolerance_cr;
} CodecState;
@ -94,6 +95,7 @@ CodecState *codec_init_session ( uint32_t audio_bitrate,
uint16_t audio_frame_duration,
uint32_t audio_sample_rate,
uint32_t audio_channels,
uint32_t audio_VAD_tolerance_ms,
uint16_t video_width,
uint16_t video_height,
uint32_t video_bitrate);
@ -101,7 +103,7 @@ CodecState *codec_init_session ( uint32_t audio_bitrate,
void codec_terminate_session(CodecState *cs);
/* return 1 if has voice, 0 if not */
int calculate_VAD_from_PCM(int16_t* PCM, size_t frame_size, float energy);
/* Calculate energy and return 1 if has voice, 0 if not */
int energy_VAD(CodecState *cs, int16_t* PCM, uint16_t frame_size, float energy);
#endif /* _CODEC_H_ */

View File

@ -257,7 +257,7 @@ iterator = iterator + 2 + _value_size; /* set iterator at new header or end_byte
_it += 3; /* place it at the field value beginning */
size_max -= 3;
switch ( _size ) { /* Compare the size of the hardcoded values ( vary fast and convenient ) */
switch ( _size ) { /* Compare the size of the hardcoded values ( very convenient ) */
case 4: { /* INFO header */
if ON_HEADER ( _it, size_max, msg->info, INFO_FIELD, 4 )

View File

@ -77,7 +77,9 @@ const ToxAvCodecSettings av_DefaultSettings = {
20,
48000,
1,
20
600,
10
};
@ -330,6 +332,7 @@ int toxav_prepare_transmission ( ToxAv *av, int32_t call_index, ToxAvCodecSettin
codec_settings->audio_frame_duration,
codec_settings->audio_sample_rate,
codec_settings->audio_channels,
codec_settings->audio_VAD_tolerance,
codec_settings->video_width,
codec_settings->video_height,
codec_settings->video_bitrate);
@ -733,7 +736,8 @@ inline__ Tox *toxav_get_tox(ToxAv *av)
return (Tox *)av->messenger;
}
int toxav_has_activity(int16_t* PCM, uint16_t frame_size, float ref_energy)
int toxav_has_activity(ToxAv* av, int32_t call_index, int16_t* PCM, uint16_t frame_size, float ref_energy)
{
return calculate_VAD_from_PCM(PCM, frame_size, ref_energy);
if ( !av->calls[call_index].cs ) return ErrorInvalidCodecState;
return energy_VAD(av->calls[call_index].cs, PCM, frame_size, ref_energy);
}

View File

@ -118,6 +118,7 @@ typedef struct _ToxAvCodecSettings {
uint16_t audio_frame_duration; /* In ms */
uint32_t audio_sample_rate; /* In Hz */
uint32_t audio_channels;
uint32_t audio_VAD_tolerance; /* In ms */
uint32_t jbuf_capacity; /* Size of jitter buffer */
} ToxAvCodecSettings;
@ -374,7 +375,7 @@ int toxav_set_video_queue_limit ( ToxAv *av, int32_t call_index, uint64_t limit
Tox *toxav_get_tox(ToxAv *av);
int toxav_has_activity ( int16_t* PCM, uint16_t frame_size, float ref_energy );
int toxav_has_activity ( ToxAv *av, int32_t call_index, int16_t* PCM, uint16_t frame_size, float ref_energy );
#ifdef __cplusplus
}