Added tolerance to VAD and lowered Jitter buffer default value

This commit is contained in:
mannol 2014-06-21 19:04:00 +02:00
parent d413fef66f
commit 5c0b6c8117
5 changed files with 37 additions and 36 deletions

View File

@ -36,6 +36,8 @@
#include "rtp.h" #include "rtp.h"
#include "codec.h" #include "codec.h"
const uint16_t min_jbuf_size = 10;
const uint16_t min_readiness_idx = 6; /* when is buffer ready to dqq */
int empty_queue(JitterBuffer *q) int empty_queue(JitterBuffer *q)
{ {
@ -66,7 +68,7 @@ JitterBuffer *create_queue(int capacity)
} }
q->size = 0; q->size = 0;
q->capacity = capacity; q->capacity = capacity >= min_jbuf_size ? capacity : min_jbuf_size;
q->front = 0; q->front = 0;
q->rear = -1; q->rear = -1;
q->queue_ready = 0; q->queue_ready = 0;
@ -142,14 +144,12 @@ void queue(JitterBuffer *q, RTPMessage *pk)
empty_queue(q); empty_queue(q);
} }
if (q->size > 8) if (q->size >= min_readiness_idx) q->queue_ready = 1;
q->queue_ready = 1;
++q->size; ++q->size;
++q->rear; ++q->rear;
if (q->rear == q->capacity) if (q->rear == q->capacity) q->rear = 0;
q->rear = 0;
q->queue[q->rear] = pk; q->queue[q->rear] = pk;
@ -177,8 +177,7 @@ void queue(JitterBuffer *q, RTPMessage *pk)
a -= 1; a -= 1;
if (a < 0) if (a < 0) a += q->capacity;
a += q->capacity;
} }
} }
@ -266,6 +265,7 @@ CodecState *codec_init_session ( uint32_t audio_bitrate,
uint16_t audio_frame_duration, uint16_t audio_frame_duration,
uint32_t audio_sample_rate, uint32_t audio_sample_rate,
uint32_t audio_channels, uint32_t audio_channels,
uint32_t audio_VAD_tolerance_ms,
uint16_t video_width, uint16_t video_width,
uint16_t video_height, uint16_t video_height,
uint32_t video_bitrate ) uint32_t video_bitrate )
@ -294,8 +294,9 @@ CodecState *codec_init_session ( uint32_t audio_bitrate,
return NULL; return NULL;
} }
float frame_duration_sec = audio_frame_duration / 1000;
retu->samples_per_frame = audio_sample_rate * frame_duration_sec; retu->EVAD_tolerance = audio_VAD_tolerance_ms > audio_frame_duration ?
audio_VAD_tolerance_ms / audio_frame_duration : audio_frame_duration;
return retu; return retu;
} }
@ -308,10 +309,6 @@ void codec_terminate_session ( CodecState *cs )
if ( cs->audio_decoder ) if ( cs->audio_decoder )
opus_decoder_destroy(cs->audio_decoder); opus_decoder_destroy(cs->audio_decoder);
/* TODO: Terminate video
* Do what?
*/
if ( cs->capabilities & v_decoding ) if ( cs->capabilities & v_decoding )
vpx_codec_destroy(&cs->v_decoder); vpx_codec_destroy(&cs->v_decoder);
@ -319,27 +316,24 @@ void codec_terminate_session ( CodecState *cs )
vpx_codec_destroy(&cs->v_encoder); vpx_codec_destroy(&cs->v_encoder);
} }
inline float calculate_sum_sq (int16_t* n, size_t k) inline float calculate_sum_sq (int16_t* n, uint16_t k)
{ {
float result = 0; float result = 0;
size_t i = 0; uint16_t i = 0;
for ( ; i < k; i ++) {
result += (float) (n[i] * n[i]);
}
for ( ; i < k; i ++) result += (float) (n[i] * n[i]);
return result; return result;
} }
int calculate_VAD_from_PCM( int16_t* PCM, size_t frame_size, float energy) int energy_VAD(CodecState* cs, int16_t* PCM, uint16_t frame_size, float energy)
{ {
// int i = 0;
// for (; i < frame_size; i ++) {
LOGGER_DEBUG("Frame size: %d ref: %f", frame_size, energy);
float frame_energy = sqrt(calculate_sum_sq(PCM, frame_size)) / frame_size; float frame_energy = sqrt(calculate_sum_sq(PCM, frame_size)) / frame_size;
LOGGER_DEBUG("Frame energy calculated: %f", frame_energy);
if ( frame_energy > energy) return 1;
// }
if ( frame_energy > energy) {
cs->EVAD_tolerance_cr = cs->EVAD_tolerance; /* Reset counter */
return 1;
}
if ( cs->EVAD_tolerance_cr ) { cs->EVAD_tolerance_cr --; return 1; }
return 0; return 0;
} }

View File

@ -46,7 +46,7 @@ typedef enum _Capabilities {
v_decoding = 1 << 3 v_decoding = 1 << 3
} Capabilities; } Capabilities;
extern const uint16_t min_jbuf_size;
typedef struct _CodecState { typedef struct _CodecState {
@ -68,7 +68,8 @@ typedef struct _CodecState {
uint64_t capabilities; /* supports*/ uint64_t capabilities; /* supports*/
/* Voice activity detection */ /* Voice activity detection */
float samples_per_frame; uint32_t EVAD_tolerance; /* In frames */
uint32_t EVAD_tolerance_cr;
} CodecState; } CodecState;
@ -94,6 +95,7 @@ CodecState *codec_init_session ( uint32_t audio_bitrate,
uint16_t audio_frame_duration, uint16_t audio_frame_duration,
uint32_t audio_sample_rate, uint32_t audio_sample_rate,
uint32_t audio_channels, uint32_t audio_channels,
uint32_t audio_VAD_tolerance_ms,
uint16_t video_width, uint16_t video_width,
uint16_t video_height, uint16_t video_height,
uint32_t video_bitrate); uint32_t video_bitrate);
@ -101,7 +103,7 @@ CodecState *codec_init_session ( uint32_t audio_bitrate,
void codec_terminate_session(CodecState *cs); void codec_terminate_session(CodecState *cs);
/* return 1 if has voice, 0 if not */ /* Calculate energy and return 1 if has voice, 0 if not */
int calculate_VAD_from_PCM(int16_t* PCM, size_t frame_size, float energy); int energy_VAD(CodecState *cs, int16_t* PCM, uint16_t frame_size, float energy);
#endif /* _CODEC_H_ */ #endif /* _CODEC_H_ */

View File

@ -257,7 +257,7 @@ iterator = iterator + 2 + _value_size; /* set iterator at new header or end_byte
_it += 3; /* place it at the field value beginning */ _it += 3; /* place it at the field value beginning */
size_max -= 3; size_max -= 3;
switch ( _size ) { /* Compare the size of the hardcoded values ( vary fast and convenient ) */ switch ( _size ) { /* Compare the size of the hardcoded values ( very convenient ) */
case 4: { /* INFO header */ case 4: { /* INFO header */
if ON_HEADER ( _it, size_max, msg->info, INFO_FIELD, 4 ) if ON_HEADER ( _it, size_max, msg->info, INFO_FIELD, 4 )

View File

@ -77,7 +77,9 @@ const ToxAvCodecSettings av_DefaultSettings = {
20, 20,
48000, 48000,
1, 1,
20 600,
10
}; };
@ -330,6 +332,7 @@ int toxav_prepare_transmission ( ToxAv *av, int32_t call_index, ToxAvCodecSettin
codec_settings->audio_frame_duration, codec_settings->audio_frame_duration,
codec_settings->audio_sample_rate, codec_settings->audio_sample_rate,
codec_settings->audio_channels, codec_settings->audio_channels,
codec_settings->audio_VAD_tolerance,
codec_settings->video_width, codec_settings->video_width,
codec_settings->video_height, codec_settings->video_height,
codec_settings->video_bitrate); codec_settings->video_bitrate);
@ -733,7 +736,8 @@ inline__ Tox *toxav_get_tox(ToxAv *av)
return (Tox *)av->messenger; return (Tox *)av->messenger;
} }
int toxav_has_activity(int16_t* PCM, uint16_t frame_size, float ref_energy) int toxav_has_activity(ToxAv* av, int32_t call_index, int16_t* PCM, uint16_t frame_size, float ref_energy)
{ {
return calculate_VAD_from_PCM(PCM, frame_size, ref_energy); if ( !av->calls[call_index].cs ) return ErrorInvalidCodecState;
return energy_VAD(av->calls[call_index].cs, PCM, frame_size, ref_energy);
} }

View File

@ -118,6 +118,7 @@ typedef struct _ToxAvCodecSettings {
uint16_t audio_frame_duration; /* In ms */ uint16_t audio_frame_duration; /* In ms */
uint32_t audio_sample_rate; /* In Hz */ uint32_t audio_sample_rate; /* In Hz */
uint32_t audio_channels; uint32_t audio_channels;
uint32_t audio_VAD_tolerance; /* In ms */
uint32_t jbuf_capacity; /* Size of jitter buffer */ uint32_t jbuf_capacity; /* Size of jitter buffer */
} ToxAvCodecSettings; } ToxAvCodecSettings;
@ -374,7 +375,7 @@ int toxav_set_video_queue_limit ( ToxAv *av, int32_t call_index, uint64_t limit
Tox *toxav_get_tox(ToxAv *av); Tox *toxav_get_tox(ToxAv *av);
int toxav_has_activity ( int16_t* PCM, uint16_t frame_size, float ref_energy ); int toxav_has_activity ( ToxAv *av, int32_t call_index, int16_t* PCM, uint16_t frame_size, float ref_energy );
#ifdef __cplusplus #ifdef __cplusplus
} }