Added tolerance to VAD and lowered Jitter buffer default value

2024-03-22 13:30:51 +08:00 · 2014-06-21 19:04:00 +02:00 · 2014-06-21 19:04:00 +02:00 · 5c0b6c8117
commit 5c0b6c8117
parent d413fef66f
5 changed files with 37 additions and 36 deletions
--- a/toxav/codec.c
+++ b/toxav/codec.c
@ -36,6 +36,8 @@
 #include "rtp.h"
 #include "codec.h"

+const uint16_t min_jbuf_size = 10;
+const uint16_t min_readiness_idx = 6; /* when is buffer ready to dqq */

 int empty_queue(JitterBuffer *q)
 {
@ -66,7 +68,7 @@ JitterBuffer *create_queue(int capacity)
    }

    q->size = 0;
-    q->capacity = capacity;
+    q->capacity = capacity >= min_jbuf_size ? capacity : min_jbuf_size;
    q->front = 0;
    q->rear = -1;
    q->queue_ready = 0;
@ -142,14 +144,12 @@ void queue(JitterBuffer *q, RTPMessage *pk)
        empty_queue(q);
    }

-    if (q->size > 8)
-        q->queue_ready = 1;
+    if (q->size >= min_readiness_idx) q->queue_ready = 1;

    ++q->size;
    ++q->rear;

-    if (q->rear == q->capacity)
-        q->rear = 0;
+    if (q->rear == q->capacity) q->rear = 0;

    q->queue[q->rear] = pk;

@ -177,8 +177,7 @@ void queue(JitterBuffer *q, RTPMessage *pk)

        a -= 1;

-        if (a < 0)
-            a += q->capacity;
+        if (a < 0) a += q->capacity;
    }
 }

@ -266,6 +265,7 @@ CodecState *codec_init_session ( uint32_t audio_bitrate,
                                 uint16_t audio_frame_duration,
                                 uint32_t audio_sample_rate,
                                 uint32_t audio_channels,
+                                 uint32_t audio_VAD_tolerance_ms,
                                 uint16_t video_width,
                                 uint16_t video_height,
                                 uint32_t video_bitrate )
@ -294,8 +294,9 @@ CodecState *codec_init_session ( uint32_t audio_bitrate,
        return NULL;
    }
    
-    float frame_duration_sec = audio_frame_duration / 1000;
-    retu->samples_per_frame = audio_sample_rate * frame_duration_sec;    
+    
+    retu->EVAD_tolerance = audio_VAD_tolerance_ms > audio_frame_duration ? 
+            audio_VAD_tolerance_ms / audio_frame_duration : audio_frame_duration;
    
    return retu;
 }
@ -308,10 +309,6 @@ void codec_terminate_session ( CodecState *cs )
    if ( cs->audio_decoder )
        opus_decoder_destroy(cs->audio_decoder);

-
-    /* TODO: Terminate video
-     *           Do what?
-     */
    if ( cs->capabilities & v_decoding )
        vpx_codec_destroy(&cs->v_decoder);

@ -319,27 +316,24 @@ void codec_terminate_session ( CodecState *cs )
        vpx_codec_destroy(&cs->v_encoder);
 }

-inline float calculate_sum_sq (int16_t* n, size_t k)
+inline float calculate_sum_sq (int16_t* n, uint16_t k)
 {
    float result = 0;
-    size_t i = 0;
-    
-    for ( ; i < k; i ++) {
-        result += (float) (n[i] * n[i]);
-    }
+    uint16_t i = 0;
    
+    for ( ; i < k; i ++) result += (float) (n[i] * n[i]);
    return result;
 }

-int calculate_VAD_from_PCM( int16_t* PCM, size_t frame_size, float energy)
+int energy_VAD(CodecState* cs, int16_t* PCM, uint16_t frame_size, float energy)
 {
-//     int i = 0;
-//     for (; i < frame_size; i ++) {
-    LOGGER_DEBUG("Frame size: %d ref: %f", frame_size, energy);
    float frame_energy = sqrt(calculate_sum_sq(PCM, frame_size)) / frame_size;
-    LOGGER_DEBUG("Frame energy calculated: %f", frame_energy);
-    if ( frame_energy > energy) return 1;
-//     }
-
+    
+    if ( frame_energy > energy) { 
+        cs->EVAD_tolerance_cr = cs->EVAD_tolerance; /* Reset counter */
+        return 1;
+    }
+    
+    if ( cs->EVAD_tolerance_cr ) { cs->EVAD_tolerance_cr --; return 1; }    
    return 0;
 }
--- a/toxav/codec.h
+++ b/toxav/codec.h
@ -46,7 +46,7 @@ typedef enum _Capabilities {
    v_decoding = 1 << 3
 } Capabilities;

-
+extern const uint16_t min_jbuf_size;

 typedef struct _CodecState {

@ -68,7 +68,8 @@ typedef struct _CodecState {
    uint64_t capabilities; /* supports*/
    
    /* Voice activity detection */
-    float samples_per_frame;
+    uint32_t EVAD_tolerance; /* In frames */
+    uint32_t EVAD_tolerance_cr;
 } CodecState;


@ -94,6 +95,7 @@ CodecState *codec_init_session ( uint32_t audio_bitrate,
                                 uint16_t audio_frame_duration,
                                 uint32_t audio_sample_rate,
                                 uint32_t audio_channels,
+                                 uint32_t audio_VAD_tolerance_ms,
                                 uint16_t video_width,
                                 uint16_t video_height,
                                 uint32_t video_bitrate);
@ -101,7 +103,7 @@ CodecState *codec_init_session ( uint32_t audio_bitrate,
 void codec_terminate_session(CodecState *cs);


-/* return 1 if has voice, 0 if not */
-int calculate_VAD_from_PCM(int16_t* PCM, size_t frame_size, float energy);
+/* Calculate energy and return 1 if has voice, 0 if not */
+int energy_VAD(CodecState *cs, int16_t* PCM, uint16_t frame_size, float energy);

 #endif /* _CODEC_H_ */
--- a/toxav/msi.c
+++ b/toxav/msi.c
@ -257,7 +257,7 @@ iterator = iterator + 2 + _value_size; /* set iterator at new header or end_byte
            _it += 3; /* place it at the field value beginning */
            size_max -= 3;

-            switch ( _size ) { /* Compare the size of the hardcoded values ( vary fast and convenient ) */
+            switch ( _size ) { /* Compare the size of the hardcoded values ( very convenient ) */

                case 4: { /* INFO header */
                    if ON_HEADER ( _it, size_max, msg->info, INFO_FIELD, 4 )
--- a/toxav/toxav.c
+++ b/toxav/toxav.c
@ -77,7 +77,9 @@ const ToxAvCodecSettings av_DefaultSettings = {
    20,
    48000,
    1,
-    20
+    600,
+    
+    10
 };


@ -330,6 +332,7 @@ int toxav_prepare_transmission ( ToxAv *av, int32_t call_index, ToxAvCodecSettin
                                  codec_settings->audio_frame_duration,
                                  codec_settings->audio_sample_rate,
                                  codec_settings->audio_channels,
+                                  codec_settings->audio_VAD_tolerance,
                                  codec_settings->video_width,
                                  codec_settings->video_height,
                                  codec_settings->video_bitrate);
@ -733,7 +736,8 @@ inline__ Tox *toxav_get_tox(ToxAv *av)
    return (Tox *)av->messenger;
 }

-int toxav_has_activity(int16_t* PCM, uint16_t frame_size, float ref_energy)
+int toxav_has_activity(ToxAv* av, int32_t call_index, int16_t* PCM, uint16_t frame_size, float ref_energy)
 {
-    return calculate_VAD_from_PCM(PCM, frame_size, ref_energy);
+    if ( !av->calls[call_index].cs ) return ErrorInvalidCodecState;
+    return energy_VAD(av->calls[call_index].cs, PCM, frame_size, ref_energy);
 }
--- a/toxav/toxav.h
+++ b/toxav/toxav.h
@ -118,6 +118,7 @@ typedef struct _ToxAvCodecSettings {
    uint16_t audio_frame_duration; /* In ms */
    uint32_t audio_sample_rate; /* In Hz */
    uint32_t audio_channels;
+    uint32_t audio_VAD_tolerance; /* In ms */

    uint32_t jbuf_capacity; /* Size of jitter buffer */
 } ToxAvCodecSettings;
@ -374,7 +375,7 @@ int toxav_set_video_queue_limit ( ToxAv *av, int32_t call_index, uint64_t limit

 Tox *toxav_get_tox(ToxAv *av);

-int toxav_has_activity ( int16_t* PCM, uint16_t frame_size, float ref_energy );
+int toxav_has_activity ( ToxAv *av, int32_t call_index, int16_t* PCM, uint16_t frame_size, float ref_energy );

 #ifdef __cplusplus
 }