toxcore/toxav/codec.c
2015-02-01 23:43:54 +01:00

872 lines
23 KiB
C

/** codec.c
*
* Audio and video codec intitialization, encoding/decoding and playback
*
* Copyright (C) 2013 Tox project All Rights Reserved.
*
* This file is part of Tox.
*
* Tox is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Tox is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Tox. If not, see <http://www.gnu.org/licenses/>.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif /* HAVE_CONFIG_H */
#include "../toxcore/logger.h"
#include "../toxcore/util.h"
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <math.h>
#include <assert.h>
#include <time.h>
#include "msi.h"
#include "rtp.h"
#include "codec.h"
#define DEFAULT_JBUF 6
/* Good quality encode. */
#define MAX_ENCODE_TIME_US VPX_DL_GOOD_QUALITY
#define MAX_DECODE_TIME_US 0
// TODO this has to be exchanged in msi
#define MAX_VIDEOFRAME_SIZE 0x40000 /* 256KiB */
#define VIDEOFRAME_PIECE_SIZE 0x500 /* 1.25 KiB*/
#define VIDEOFRAME_HEADER_SIZE 0x2
/* FIXME: Might not be enough */
#define VIDEO_DECODE_BUFFER_SIZE 20
#define ARRAY(TYPE__) struct { uint16_t size; TYPE__ data[]; }
typedef ARRAY(uint8_t) Payload;
typedef struct {
uint16_t size; /* Max size */
uint16_t start;
uint16_t end;
Payload **packets;
} PayloadBuffer;
static bool buffer_full(const PayloadBuffer *b)
{
return (b->end + 1) % b->size == b->start;
}
static bool buffer_empty(const PayloadBuffer *b)
{
return b->end == b->start;
}
static void buffer_write(PayloadBuffer *b, Payload *p)
{
b->packets[b->end] = p;
b->end = (b->end + 1) % b->size;
if (b->end == b->start) b->start = (b->start + 1) % b->size; /* full, overwrite */
}
static void buffer_read(PayloadBuffer *b, Payload **p)
{
*p = b->packets[b->start];
b->start = (b->start + 1) % b->size;
}
static void buffer_clear(PayloadBuffer *b)
{
while (!buffer_empty(b)) {
Payload *p;
buffer_read(b, &p);
free(p);
}
}
static PayloadBuffer *buffer_new(int size)
{
PayloadBuffer *buf = calloc(sizeof(PayloadBuffer), 1);
if (!buf) return NULL;
buf->size = size + 1; /* include empty elem */
if (!(buf->packets = calloc(buf->size, sizeof(Payload *)))) {
free(buf);
return NULL;
}
return buf;
}
static void buffer_free(PayloadBuffer *b)
{
if (b) {
buffer_clear(b);
free(b->packets);
free(b);
}
}
/* JITTER BUFFER WORK */
typedef struct {
RTPMessage **queue;
uint32_t size;
uint32_t capacity;
uint16_t bottom;
uint16_t top;
} JitterBuffer;
static JitterBuffer *jbuf_new(uint32_t capacity)
{
unsigned int size = 1;
while (size <= (capacity * 4)) {
size *= 2;
}
JitterBuffer *q;
if ( !(q = calloc(sizeof(JitterBuffer), 1)) ) return NULL;
if (!(q->queue = calloc(sizeof(RTPMessage *), size))) {
free(q);
return NULL;
}
q->size = size;
q->capacity = capacity;
return q;
}
static void jbuf_clear(JitterBuffer *q)
{
for (; q->bottom != q->top; ++q->bottom) {
if (q->queue[q->bottom % q->size]) {
rtp_free_msg(NULL, q->queue[q->bottom % q->size]);
q->queue[q->bottom % q->size] = NULL;
}
}
}
static void jbuf_free(JitterBuffer *q)
{
if (!q) return;
jbuf_clear(q);
free(q->queue);
free(q);
}
static int jbuf_write(JitterBuffer *q, RTPMessage *m)
{
uint16_t sequnum = m->header->sequnum;
unsigned int num = sequnum % q->size;
if ((uint32_t)(sequnum - q->bottom) > q->size) {
jbuf_clear(q);
q->bottom = sequnum - q->capacity;
q->queue[num] = m;
q->top = sequnum + 1;
return 0;
}
if (q->queue[num])
return -1;
q->queue[num] = m;
if ((sequnum - q->bottom) >= (q->top - q->bottom))
q->top = sequnum + 1;
return 0;
}
/* Success is 0 when there is nothing to dequeue,
* 1 when there's a good packet,
* 2 when there's a lost packet */
static RTPMessage *jbuf_read(JitterBuffer *q, int32_t *success)
{
if (q->top == q->bottom) {
*success = 0;
return NULL;
}
unsigned int num = q->bottom % q->size;
if (q->queue[num]) {
RTPMessage *ret = q->queue[num];
q->queue[num] = NULL;
++q->bottom;
*success = 1;
return ret;
}
if ((uint32_t)(q->top - q->bottom) > q->capacity) {
++q->bottom;
*success = 2;
return NULL;
}
*success = 0;
return NULL;
}
static int convert_bw_to_sampling_rate(int bw)
{
switch(bw)
{
case OPUS_BANDWIDTH_NARROWBAND: return 8000;
case OPUS_BANDWIDTH_MEDIUMBAND: return 12000;
case OPUS_BANDWIDTH_WIDEBAND: return 16000;
case OPUS_BANDWIDTH_SUPERWIDEBAND: return 24000;
case OPUS_BANDWIDTH_FULLBAND: return 48000;
default: return -1;
}
}
/* PUBLIC */
void cs_do(CSSession *cs)
{
/* Codec session should always be protected by call mutex so no need to check for cs validity
*/
if (!cs)
return;
Payload *p;
int rc;
int success = 0;
pthread_mutex_lock(cs->queue_mutex);
RTPMessage *msg;
uint16_t fsize = 5760; /* Max frame size for 48 kHz */
int16_t tmp[fsize * 2];
while ((msg = jbuf_read(cs->j_buf, &success)) || success == 2) {
pthread_mutex_unlock(cs->queue_mutex);
if (success == 2) {
rc = opus_decode(cs->audio_decoder, 0, 0, tmp, fsize, 1);
} else {
/* Get values from packet and decode.
* It also checks for validity of an opus packet
*/
rc = convert_bw_to_sampling_rate(opus_packet_get_bandwidth(msg->data));
if (rc != -1) {
cs->last_packet_sampling_rate = rc;
cs->last_pack_channels = opus_packet_get_nb_channels(msg->data);
cs->last_packet_frame_duration =
( opus_packet_get_samples_per_frame(msg->data, cs->last_packet_sampling_rate) * 1000 )
/ cs->last_packet_sampling_rate;
} else {
LOGGER_WARNING("Failed to load packet values!");
rtp_free_msg(NULL, msg);
continue;
}
rc = opus_decode(cs->audio_decoder, msg->data, msg->length, tmp, fsize, 0);
rtp_free_msg(NULL, msg);
}
if (rc < 0) {
LOGGER_WARNING("Decoding error: %s", opus_strerror(rc));
} else if (((ToxAV*)cs->agent)->acb.first) {
/* Play */
((ToxAV*)cs->agent)->acb.first(cs->agent, cs->call_idx, tmp, rc,
((ToxAV*)cs->agent)->acb.second);
}
pthread_mutex_lock(cs->queue_mutex);
}
if (cs->vbuf_raw && !buffer_empty(cs->vbuf_raw)) {
/* Decode video */
buffer_read(cs->vbuf_raw, &p);
/* Leave space for (possibly) other thread to queue more data after we read it here */
pthread_mutex_unlock(cs->queue_mutex);
rc = vpx_codec_decode(cs->v_decoder, p->data, p->size, NULL, MAX_DECODE_TIME_US);
free(p);
if (rc != VPX_CODEC_OK) {
LOGGER_ERROR("Error decoding video: %s", vpx_codec_err_to_string(rc));
} else {
vpx_codec_iter_t iter = NULL;
vpx_image_t *dest = vpx_codec_get_frame(cs->v_decoder, &iter);
/* Play decoded images */
for (; dest; dest = vpx_codec_get_frame(cs->v_decoder, &iter)) {
if (((ToxAV*)cs->agent)->vcb.first)
((ToxAV*)cs->agent)->vcb.first(cs->agent, cs->call_idx, dest,
((ToxAV*)cs->agent)->vcb.second);
vpx_img_free(dest);
}
}
return;
}
pthread_mutex_unlock(cs->queue_mutex);
}
CSSession *cs_new(uint32_t s_audio_b, uint32_t p_audio_b, uint32_t s_video_b, uint32_t p_video_b)
{
CSSession *cs = calloc(sizeof(CSSession), 1);
if (!cs) {
LOGGER_WARNING("Allocation failed! Application might misbehave!");
return NULL;
}
/* TODO this has to be exchanged in msi */
cs->max_video_frame_size = MAX_VIDEOFRAME_SIZE;
cs->video_frame_piece_size = VIDEOFRAME_PIECE_SIZE;
if (s_audio_b > 0 && 0 != cs_enable_audio_sending(cs, s_audio_b, 2)) { /* Sending audio enabled */
LOGGER_WARNING("Failed to enable audio sending!");
goto FAILURE;
}
if (p_audio_b > 0 && 0 != cs_enable_audio_receiving(cs)) { /* Receiving audio enabled */
LOGGER_WARNING("Failed to enable audio receiving!");
goto FAILURE;
}
if (s_video_b > 0 && 0 != cs_enable_video_sending(cs, s_video_b)) { /* Sending video enabled */
LOGGER_WARNING("Failed to enable video sending!");
goto FAILURE;
}
if (p_video_b > 0 && 0 != cs_enable_video_receiving(cs)) { /* Receiving video enabled */
LOGGER_WARNING("Failed to enable video receiving!");
goto FAILURE;
}
return cs;
FAILURE:
LOGGER_WARNING("Error initializing codec session! Application might misbehave!");
cs_disable_audio_sending(cs);
cs_disable_audio_receiving(cs);
cs_disable_video_sending(cs);
cs_disable_video_receiving(cs);
free(cs);
return NULL;
}
void cs_kill(CSSession *cs)
{
if (!cs)
return;
/* NOTE: queue_message() will not be called since
* the callback is unregistered before cs_kill is called.
*/
cs_disable_audio_sending(cs);
cs_disable_audio_receiving(cs);
cs_disable_video_sending(cs);
cs_disable_video_receiving(cs);
LOGGER_DEBUG("Terminated codec state: %p", cs);
free(cs);
}
void cs_init_video_splitter_cycle(CSSession* cs)
{
cs->split_video_frame[0] = cs->frameid_out++;
cs->split_video_frame[1] = 0;
}
int cs_update_video_splitter_cycle(CSSession *cs, const uint8_t *payload, uint16_t length)
{
cs->processing_video_frame = payload;
cs->processing_video_frame_size = length;
return ((length - 1) / cs->video_frame_piece_size) + 1;
}
const uint8_t *cs_iterate_split_video_frame(CSSession *cs, uint16_t *size)
{
if (!cs || !size) return NULL;
if (cs->processing_video_frame_size > cs->video_frame_piece_size) {
memcpy(cs->split_video_frame + VIDEOFRAME_HEADER_SIZE,
cs->processing_video_frame,
cs->video_frame_piece_size);
cs->processing_video_frame += cs->video_frame_piece_size;
cs->processing_video_frame_size -= cs->video_frame_piece_size;
*size = cs->video_frame_piece_size + VIDEOFRAME_HEADER_SIZE;
} else {
memcpy(cs->split_video_frame + VIDEOFRAME_HEADER_SIZE,
cs->processing_video_frame,
cs->processing_video_frame_size);
*size = cs->processing_video_frame_size + VIDEOFRAME_HEADER_SIZE;
}
cs->split_video_frame[1]++;
return cs->split_video_frame;
}
int cs_set_sending_video_resolution(CSSession *cs, uint16_t width, uint16_t height)
{
if (!cs->v_encoding)
return -1;
/* TODO FIXME reference is safe? */
vpx_codec_enc_cfg_t cfg = *cs->v_encoder[0].config.enc;
if (cfg.g_w == width && cfg.g_h == height)
return 0;
/*
if (width * height > cs->max_width * cs->max_height) {
vpx_codec_ctx_t v_encoder = cs->v_encoder;
if (init_video_encoder(cs, width, height, cs->video_bitrate) == -1) {
cs->v_encoder = v_encoder;
return cs_ErrorSettingVideoResolution;
}
vpx_codec_destroy(&v_encoder);
return 0;
}*/
LOGGER_DEBUG("New video resolution: %u %u", width, height);
cfg.g_w = width;
cfg.g_h = height;
int rc = vpx_codec_enc_config_set(cs->v_encoder, &cfg);
if ( rc != VPX_CODEC_OK) {
LOGGER_ERROR("Failed to set encoder control setting: %s", vpx_codec_err_to_string(rc));
return cs_ErrorSettingVideoResolution;
}
return 0;
}
int cs_set_sending_video_bitrate(CSSession *cs, uint32_t bitrate)
{
if (!cs->v_encoding)
return -1;
/* TODO FIXME reference is safe? */
vpx_codec_enc_cfg_t cfg = *cs->v_encoder[0].config.enc;
if (cfg.rc_target_bitrate == bitrate)
return 0;
LOGGER_DEBUG("New video bitrate: %u", video_bitrate);
cfg.rc_target_bitrate = bitrate;
int rc = vpx_codec_enc_config_set(cs->v_encoder, &cfg);
if ( rc != VPX_CODEC_OK) {
LOGGER_ERROR("Failed to set encoder control setting: %s", vpx_codec_err_to_string(rc));
return cs_ErrorSettingVideoBitrate;
}
return 0;
}
int cs_enable_video_sending(CSSession* cs, uint32_t bitrate)
{
if (cs->v_encoding)
return 0;
vpx_codec_enc_cfg_t cfg;
int rc = vpx_codec_enc_config_default(VIDEO_CODEC_ENCODER_INTERFACE, &cfg, 0);
if (rc != VPX_CODEC_OK) {
LOGGER_ERROR("Failed to get config: %s", vpx_codec_err_to_string(rc));
return -1;
}
rc = vpx_codec_enc_init_ver(cs->v_encoder, VIDEO_CODEC_ENCODER_INTERFACE, &cfg, 0,
VPX_ENCODER_ABI_VERSION);
if ( rc != VPX_CODEC_OK) {
LOGGER_ERROR("Failed to initialize encoder: %s", vpx_codec_err_to_string(rc));
return -1;
}
/* So that we can use cs_disable_video_sending to clean up */
cs->v_encoding = true;
if ( !(cs->split_video_frame = calloc(cs->video_frame_piece_size + VIDEOFRAME_HEADER_SIZE, 1)) )
goto FAILURE;
cfg.rc_target_bitrate = bitrate;
cfg.g_w = 800;
cfg.g_h = 600;
cfg.g_pass = VPX_RC_ONE_PASS;
cfg.g_error_resilient = VPX_ERROR_RESILIENT_DEFAULT | VPX_ERROR_RESILIENT_PARTITIONS;
cfg.g_lag_in_frames = 0;
cfg.kf_min_dist = 0;
cfg.kf_max_dist = 48;
cfg.kf_mode = VPX_KF_AUTO;
rc = vpx_codec_control(cs->v_encoder, VP8E_SET_CPUUSED, 8);
if ( rc != VPX_CODEC_OK) {
LOGGER_ERROR("Failed to set encoder control setting: %s", vpx_codec_err_to_string(rc));
goto FAILURE;
}
return 0;
FAILURE:
cs_disable_video_sending(cs);
return -1;
}
int cs_enable_video_receiving(CSSession* cs)
{
if (cs->v_decoding)
return 0;
if (create_recursive_mutex(cs->queue_mutex) != 0) {
LOGGER_WARNING("Failed to create recursive mutex!");
return -1;
}
int rc = vpx_codec_dec_init_ver(cs->v_decoder, VIDEO_CODEC_DECODER_INTERFACE,
NULL, 0, VPX_DECODER_ABI_VERSION);
if ( rc != VPX_CODEC_OK) {
LOGGER_ERROR("Init video_decoder failed: %s", vpx_codec_err_to_string(rc));
pthread_mutex_destroy(cs->queue_mutex);
return -1;
}
/* So that we can use cs_disable_video_sending to clean up */
cs->v_decoding = true;
if ( !(cs->frame_buf = calloc(cs->max_video_frame_size, 1)) )
goto FAILURE;
if ( !(cs->vbuf_raw = buffer_new(VIDEO_DECODE_BUFFER_SIZE)) )
goto FAILURE;
return 0;
FAILURE:
cs_disable_video_receiving(cs);
return -1;
}
void cs_disable_video_sending(CSSession* cs)
{
if (cs->v_encoding) {
cs->v_encoding = false;
free(cs->split_video_frame);
cs->split_video_frame = NULL;
vpx_codec_destroy(cs->v_encoder);
}
}
void cs_disable_video_receiving(CSSession* cs)
{
if (cs->v_decoding) {
cs->v_decoding = false;
buffer_free(cs->vbuf_raw);
cs->vbuf_raw = NULL;
free(cs->frame_buf);
cs->frame_buf = NULL;
vpx_codec_destroy(cs->v_decoder);
pthread_mutex_destroy(cs->queue_mutex);
}
}
int cs_set_sending_audio_bitrate(CSSession *cs, int32_t rate)
{
if (cs->audio_encoder == NULL)
return -1;
int rc = opus_encoder_ctl(cs->audio_encoder, OPUS_SET_BITRATE(rate));
if ( rc != OPUS_OK ) {
LOGGER_ERROR("Error while setting encoder ctl: %s", opus_strerror(rc));
return -1;
}
return 0;
}
int cs_set_sending_audio_sampling_rate(CSSession* cs, int32_t rate)
{
/* TODO Find a better way? */
if (cs->audio_encoder == NULL)
return -1;
int rc = OPUS_OK;
int bitrate = 0;
int channels = cs->encoder_channels;
rc = opus_encoder_ctl(cs->audio_encoder, OPUS_GET_BITRATE(&bitrate));
if ( rc != OPUS_OK ) {
LOGGER_ERROR("Error while getting encoder ctl: %s", opus_strerror(rc));
return -1;
}
cs_disable_audio_sending(cs);
return cs_enable_audio_sending(cs, bitrate, channels);
}
int cs_set_sending_audio_channels(CSSession* cs, int32_t count)
{
/* TODO Find a better way? */
if (cs->audio_encoder == NULL)
return -1;
if (cs->encoder_channels == count)
return 0;
int rc = OPUS_OK;
int bitrate = 0;
rc = opus_encoder_ctl(cs->audio_encoder, OPUS_GET_BITRATE(&bitrate));
if ( rc != OPUS_OK ) {
LOGGER_ERROR("Error while getting encoder ctl: %s", opus_strerror(rc));
return -1;
}
cs_disable_audio_sending(cs);
return cs_enable_audio_sending(cs, bitrate, count);
}
void cs_disable_audio_sending(CSSession* cs)
{
if ( cs->audio_encoder ) {
opus_encoder_destroy(cs->audio_encoder);
cs->audio_encoder = NULL;
cs->encoder_channels = 0;
}
}
void cs_disable_audio_receiving(CSSession* cs)
{
if ( cs->audio_decoder ) {
opus_decoder_destroy(cs->audio_decoder);
cs->audio_decoder = NULL;
jbuf_free(cs->j_buf);
cs->j_buf = NULL;
/* It's used for measuring iteration interval so this has to be some value.
* To avoid unecessary checking we set this to 500
*/
cs->last_packet_frame_duration = 500;
}
}
int cs_enable_audio_sending(CSSession* cs, uint32_t bitrate, int channels)
{
if (cs->audio_encoder)
return 0;
/**
* Encoder is initialized with default values. These values (Sampling rate, channel count)
* change on the fly from toxav.
*/
int rc = OPUS_OK;
cs->audio_encoder = opus_encoder_create(48000, channels, OPUS_APPLICATION_AUDIO, &rc);
if ( rc != OPUS_OK ) {
LOGGER_ERROR("Error while starting audio encoder: %s", opus_strerror(rc));
return -1;
}
rc = opus_encoder_ctl(cs->audio_encoder, OPUS_SET_BITRATE(bitrate));
if ( rc != OPUS_OK ) {
LOGGER_ERROR("Error while setting encoder ctl: %s", opus_strerror(rc));
goto FAILURE;
}
rc = opus_encoder_ctl(cs->audio_encoder, OPUS_SET_COMPLEXITY(10));
if ( rc != OPUS_OK ) {
LOGGER_ERROR("Error while setting encoder ctl: %s", opus_strerror(rc));
goto FAILURE;
}
cs->encoder_channels = channels;
return 0;
FAILURE:
cs_disable_audio_sending(cs);
return -1;
}
int cs_enable_audio_receiving(CSSession* cs)
{
if (cs->audio_decoder)
return 0;
/**
* Decoder is initialized with default values. These values (Sampling rate, channel count)
* change on the fly from toxav.
*/
int rc;
cs->audio_decoder = opus_decoder_create(48000, 2, &rc );
if ( rc != OPUS_OK ) {
LOGGER_ERROR("Error while starting audio decoder: %s", opus_strerror(rc));
return -1;
}
if ( !(cs->j_buf = jbuf_new(DEFAULT_JBUF)) ) {
LOGGER_WARNING("Jitter buffer creaton failed!");
opus_decoder_destroy(cs->audio_decoder);
cs->audio_decoder = NULL;
return -1;
}
/* It's used for measuring iteration interval so this has to be some value.
* To avoid unecessary checking we set this to 500
*/
cs->last_packet_frame_duration = 500;
return 0;
}
/* Called from RTP */
void queue_message(RTPSession *session, RTPMessage *msg)
{
/* This function is unregistered during call termination befor destroing
* Codec session so no need to check for validity of cs
*/
CSSession *cs = session->cs;
if (!cs) return;
/* Audio */
if (session->payload_type == msi_TypeAudio % 128) {
pthread_mutex_lock(cs->queue_mutex);
int ret = jbuf_write(cs->j_buf, msg);
pthread_mutex_unlock(cs->queue_mutex);
if (ret == -1) {
rtp_free_msg(NULL, msg);
}
}
/* Video */
else {
uint8_t *packet = msg->data;
uint32_t packet_size = msg->length;
if (packet_size < VIDEOFRAME_HEADER_SIZE)
goto end;
uint8_t diff = packet[0] - cs->frameid_in;
if (diff != 0) {
if (diff < 225) { /* New frame */
/* Flush last frames' data and get ready for this frame */
Payload *p = malloc(sizeof(Payload) + cs->frame_size);
if (p) {
pthread_mutex_lock(cs->queue_mutex);
if (buffer_full(cs->vbuf_raw)) {
LOGGER_DEBUG("Dropped video frame");
Payload *tp;
buffer_read(cs->vbuf_raw, &tp);
free(tp);
} else {
p->size = cs->frame_size;
memcpy(p->data, cs->frame_buf, cs->frame_size);
}
buffer_write(cs->vbuf_raw, p);
pthread_mutex_unlock(cs->queue_mutex);
} else {
LOGGER_WARNING("Allocation failed! Program might misbehave!");
goto end;
}
cs->last_timestamp = msg->header->timestamp;
cs->frameid_in = packet[0];
memset(cs->frame_buf, 0, cs->frame_size);
cs->frame_size = 0;
} else { /* Old frame; drop */
LOGGER_DEBUG("Old packet: %u", packet[0]);
goto end;
}
}
uint8_t piece_number = packet[1];
uint32_t length_before_piece = ((piece_number - 1) * cs->video_frame_piece_size);
uint32_t framebuf_new_length = length_before_piece + (packet_size - VIDEOFRAME_HEADER_SIZE);
if (framebuf_new_length > cs->max_video_frame_size) {
goto end;
}
/* Otherwise it's part of the frame so just process */
/* LOGGER_DEBUG("Video Packet: %u %u", packet[0], packet[1]); */
memcpy(cs->frame_buf + length_before_piece,
packet + VIDEOFRAME_HEADER_SIZE,
packet_size - VIDEOFRAME_HEADER_SIZE);
if (framebuf_new_length > cs->frame_size) {
cs->frame_size = framebuf_new_length;
}
end:
rtp_free_msg(NULL, msg);
}
}