--- /dev/null
+Subject: func_jitterbuffer
+
+The JITTERBUFFER dialplan function now has an option to enable video synchronization
+support. When enabled and used with a compatible channel driver (chan_sip, chan_pjsip)
+the video is buffered according to the size of the audio jitterbuffer and is
+synchronized to the audio.
</syntax>
<description>
<para>Jitterbuffers are constructed in two different ways.
- The first always take three arguments: <replaceable>max_size</replaceable>,
- <replaceable>resync_threshold</replaceable>, and <replaceable>target_extra</replaceable>.
+ The first always take four arguments: <replaceable>max_size</replaceable>,
+ <replaceable>resync_threshold</replaceable>, <replaceable>target_extra</replaceable>,
+ and <replaceable>sync_video</replaceable>.
Alternatively, a single argument of <literal>default</literal> can be provided,
which will construct the default jitterbuffer for the given
<replaceable>jitterbuffer type</replaceable>.</para>
<para>target_extra: This option only affects the adaptive jitterbuffer. It represents
the amount time in milliseconds by which the new jitter buffer will pad its size.
Defaults to 40ms.</para>
+ <para>sync_video: This option enables video synchronization with the audio stream. It can be
+ turned on and off. Defaults to off.</para>
<example title="Fixed with defaults" language="text">
exten => 1,1,Set(JITTERBUFFER(fixed)=default)
</example>
<example title="Fixed with 200ms max size" language="text">
exten => 1,1,Set(JITTERBUFFER(fixed)=200)
</example>
+ <example title="Fixed with 200ms max size and video sync support" language="text">
+ exten => 1,1,Set(JITTERBUFFER(fixed)=200,,,yes)
+ </example>
<example title="Fixed with 200ms max size, resync threshold 1500" language="text">
exten => 1,1,Set(JITTERBUFFER(fixed)=200,1500)
</example>
<example title="Adaptive with 200ms max size, 60ms target extra" language="text">
exten => 1,1,Set(JITTERBUFFER(adaptive)=200,,60)
</example>
+ <example title="Adaptive with 200ms max size and video sync support" language="text">
+ exten => 1,1,Set(JITTERBUFFER(adaptive)=200,,,yes)
+ </example>
<example title="Set a fixed jitterbuffer with defaults; then remove it" language="text">
exten => 1,1,Set(JITTERBUFFER(fixed)=default)
exten => 1,n,Set(JITTERBUFFER(disabled)=)
AST_APP_ARG(max_size);
AST_APP_ARG(resync_threshold);
AST_APP_ARG(target_extra);
+ AST_APP_ARG(sync_video);
);
AST_STANDARD_APP_ARGS(args, parse);
"jbtargetextra",
args.target_extra);
}
+ if (!ast_strlen_zero(args.sync_video)) {
+ res |= ast_jb_read_conf(&jb_conf,
+ "jbsyncvideo",
+ args.sync_video);
+ }
if (res) {
ast_log(LOG_WARNING, "Invalid jitterbuffer parameters %s\n", value);
}
enum {
AST_JB_ENABLED = (1 << 0),
AST_JB_FORCED = (1 << 1),
- AST_JB_LOG = (1 << 2)
+ AST_JB_LOG = (1 << 2),
+ AST_JB_SYNC_VIDEO = (1 << 3)
};
enum ast_jb_type {
#define AST_JB_CONF_TARGET_EXTRA "targetextra"
#define AST_JB_CONF_IMPL "impl"
#define AST_JB_CONF_LOG "log"
+#define AST_JB_CONF_SYNC_VIDEO "syncvideo"
/* Hooks for the abstract jb implementation */
/*! \brief Create */
*/
struct ast_json *ast_rtp_instance_get_stats_all_json(struct ast_rtp_instance *instance);
+/*!
+ * \brief Retrieve the sample rate of a format according to RTP specifications
+ * \since 16.7.0
+ * \since 17.1.0
+ *
+ * \param format The media format
+ *
+ * \retval The sample rate
+ */
+int ast_rtp_get_rate(const struct ast_format *format);
+
/*!
* \since 12
* \brief \ref stasis topic for RTP and RTCP related messages
#include "asterisk/utils.h"
#include "asterisk/pbx.h"
#include "asterisk/timing.h"
+#include "asterisk/rtp_engine.h"
+#include "asterisk/format_cache.h"
#include "asterisk/abstract_jb.h"
#include "fixedjitterbuf.h"
JB_CREATED = (1 << 2)
};
+/*! The maximum size we allow the early frame buffer to get */
+#define MAXIMUM_EARLY_FRAME_COUNT 200
+
/* Implementation functions */
/* fixed */
}
} else if (!strcasecmp(name, AST_JB_CONF_LOG)) {
ast_set2_flag(conf, ast_true(value), AST_JB_LOG);
+ } else if (!strcasecmp(name, AST_JB_CONF_SYNC_VIDEO)) {
+ ast_set2_flag(conf, ast_true(value), AST_JB_SYNC_VIDEO);
} else {
return -1;
}
#define DEFAULT_RESYNC 1000
#define DEFAULT_TYPE AST_JB_FIXED
+struct jb_stream_sync {
+ unsigned int timestamp;
+ struct timeval ntp;
+};
+
struct jb_framedata {
const struct ast_jb_impl *jb_impl;
struct ast_jb_conf jb_conf;
int timer_interval; /* ms between deliveries */
int timer_fd;
int first;
+ int audio_stream_id;
+ struct jb_stream_sync audio_stream_sync;
+ int video_stream_id;
+ struct jb_stream_sync video_stream_sync;
+ AST_LIST_HEAD_NOLOCK(, ast_frame) early_frames;
+ unsigned int early_frame_count;
+ struct timeval last_audio_ntp_timestamp;
+ int audio_flowing;
void *jb_obj;
};
static void jb_framedata_destroy(struct jb_framedata *framedata)
{
+ struct ast_frame *frame;
+
if (framedata->timer) {
ast_timer_close(framedata->timer);
framedata->timer = NULL;
framedata->jb_obj = NULL;
}
ao2_cleanup(framedata->last_format);
+ while ((frame = AST_LIST_REMOVE_HEAD(&framedata->early_frames, frame_list))) {
+ ast_frfree(frame);
+ }
ast_free(framedata);
}
void ast_jb_conf_default(struct ast_jb_conf *conf)
{
+ ast_clear_flag(conf, AST_FLAGS_ALL);
conf->max_size = DEFAULT_SIZE;
conf->resync_threshold = DEFAULT_RESYNC;
ast_copy_string(conf->impl, "fixed", sizeof(conf->impl));
jb_framedata_destroy((struct jb_framedata *) framedata);
}
+static struct timeval jitterbuffer_frame_get_ntp_timestamp(const struct jb_stream_sync *stream_sync, const struct ast_frame *frame)
+{
+ int timestamp_diff;
+ unsigned int rate;
+
+ /* It's possible for us to receive frames before we receive the information allowing
+ * us to do NTP/RTP timestamp calculations. Since the information isn't available we
+ * can't generate one and give an empty timestamp.
+ */
+ if (ast_tvzero(stream_sync->ntp)) {
+ return ast_tv(0, 0);
+ }
+
+ /* Convert the Asterisk timestamp into an RTP timestamp, and then based on the difference we can
+ * determine how many samples are in the frame and how long has elapsed since the synchronization
+ * RTP and NTP timestamps were received giving us the NTP timestamp for this frame.
+ */
+ if (frame->frametype == AST_FRAME_VOICE) {
+ rate = ast_rtp_get_rate(frame->subclass.format);
+ timestamp_diff = (frame->ts * (rate / 1000)) - stream_sync->timestamp;
+ } else {
+ /* Video is special - internally we reference it as 1000 to preserve the RTP timestamp but
+ * it is actualy 90000, this is why we can just directly subtract the timestamp.
+ */
+ rate = 90000;
+ timestamp_diff = frame->ts - stream_sync->timestamp;
+ }
+
+ if (timestamp_diff < 0) {
+ /* It's possible for us to be asked for an NTP timestamp from before our latest
+ * RTCP SR report. To handle this we subtract so we go back in time.
+ */
+ return ast_tvsub(stream_sync->ntp, ast_samp2tv(abs(timestamp_diff), rate));
+ } else {
+ return ast_tvadd(stream_sync->ntp, ast_samp2tv(timestamp_diff, rate));
+ }
+}
+
static struct ast_frame *hook_event_cb(struct ast_channel *chan, struct ast_frame *frame, enum ast_framehook_event event, void *data)
{
struct jb_framedata *framedata = data;
return frame;
}
+ if (ast_test_flag(&framedata->jb_conf, AST_JB_SYNC_VIDEO)) {
+ if (frame->frametype == AST_FRAME_VOICE) {
+ /* Store the stream identifier for the audio stream so we can associate the incoming RTCP SR
+ * with the correct stream sync structure.
+ */
+ framedata->audio_stream_id = frame->stream_num;
+ } else if (frame->frametype == AST_FRAME_RTCP && frame->subclass.integer == AST_RTP_RTCP_SR) {
+ struct ast_rtp_rtcp_report *rtcp_report = frame->data.ptr;
+ struct jb_stream_sync *stream_sync = NULL;
+
+ /* Determine which stream this RTCP is in regards to */
+ if (framedata->audio_stream_id == frame->stream_num) {
+ stream_sync = &framedata->audio_stream_sync;
+ } else if (framedata->video_stream_id == frame->stream_num) {
+ stream_sync = &framedata->video_stream_sync;
+ }
+
+ if (stream_sync) {
+ /* Store the RTP and NTP timestamp mapping so we can derive an NTP timestamp for each frame */
+ stream_sync->timestamp = rtcp_report->sender_information.rtp_timestamp;
+ stream_sync->ntp = rtcp_report->sender_information.ntp_timestamp;
+ }
+ } else if (frame->frametype == AST_FRAME_VIDEO) {
+ /* If a video frame is late according to the audio timestamp don't stash it away, just return it.
+ * If however it is ahead then we keep it until such time as the audio catches up.
+ */
+ struct ast_frame *jbframe;
+
+ framedata->video_stream_id = frame->stream_num;
+
+ /* If no timing information is available we can't store this away, so just let it through now */
+ if (!ast_test_flag(frame, AST_FRFLAG_HAS_TIMING_INFO)) {
+ return frame;
+ }
+
+ /* To ensure that the video starts when the audio starts we only start allowing frames through once
+ * audio starts flowing.
+ */
+ if (framedata->audio_flowing) {
+ struct timeval video_timestamp;
+
+ video_timestamp = jitterbuffer_frame_get_ntp_timestamp(&framedata->video_stream_sync, frame);
+ if (ast_tvdiff_ms(framedata->last_audio_ntp_timestamp, video_timestamp) >= 0) {
+ return frame;
+ }
+ }
+
+ /* To prevent the early frame buffer from growing uncontrolled we impose a maximum count that it can
+ * get to. If this is reached then we drop a video frame, which should cause the receiver to ask for a
+ * new key frame.
+ */
+ if (framedata->early_frame_count == MAXIMUM_EARLY_FRAME_COUNT) {
+ jbframe = AST_LIST_REMOVE_HEAD(&framedata->early_frames, frame_list);
+ framedata->early_frame_count--;
+ ast_frfree(jbframe);
+ }
+
+ jbframe = ast_frisolate(frame);
+ if (!jbframe) {
+ /* If we can't isolate the frame the safest thing we can do is return it, even if the A/V sync
+ * may be off.
+ */
+ return frame;
+ }
+
+ AST_LIST_INSERT_TAIL(&framedata->early_frames, jbframe, frame_list);
+ framedata->early_frame_count++;
+ return &ast_null_frame;
+ }
+ }
+
now_tv = ast_tvnow();
now = ast_tvdiff_ms(now_tv, framedata->start_tv);
}
if (frame->frametype == AST_FRAME_CONTROL) {
+ struct ast_frame *early_frame;
+
switch(frame->subclass.integer) {
case AST_CONTROL_HOLD:
case AST_CONTROL_UNHOLD:
case AST_CONTROL_SRCUPDATE:
case AST_CONTROL_SRCCHANGE:
framedata->jb_impl->force_resync(framedata->jb_obj);
+ /* Since we are resyncing go ahead and clear out the video frames too */
+ while ((early_frame = AST_LIST_REMOVE_HEAD(&framedata->early_frames, frame_list))) {
+ ast_frfree(early_frame);
+ }
+ framedata->audio_flowing = 0;
+ framedata->early_frame_count = 0;
break;
default:
break;
}
}
+ /* If a voice frame is being passed through see if we need to add any additional frames to it */
+ if (ast_test_flag(&framedata->jb_conf, AST_JB_SYNC_VIDEO) && frame->frametype == AST_FRAME_VOICE) {
+ AST_LIST_HEAD_NOLOCK(, ast_frame) additional_frames;
+ struct ast_frame *early_frame;
+
+ /* We store the last NTP timestamp for the audio given to the core so that subsequents frames which
+ * are late can be passed immediately through (this will occur for video frames which are returned here)
+ */
+ framedata->last_audio_ntp_timestamp = jitterbuffer_frame_get_ntp_timestamp(&framedata->audio_stream_sync, frame);
+ framedata->audio_flowing = 1;
+
+ AST_LIST_HEAD_INIT_NOLOCK(&additional_frames);
+
+ AST_LIST_TRAVERSE_SAFE_BEGIN(&framedata->early_frames, early_frame, frame_list) {
+ struct timeval early_timestamp = jitterbuffer_frame_get_ntp_timestamp(&framedata->video_stream_sync, early_frame);
+ int diff = ast_tvdiff_ms(framedata->last_audio_ntp_timestamp, early_timestamp);
+
+ /* If this frame is from the past we need to include it with the audio frame that is going
+ * out.
+ */
+ if (diff >= 0) {
+ AST_LIST_REMOVE_CURRENT(frame_list);
+ framedata->early_frame_count--;
+ AST_LIST_INSERT_TAIL(&additional_frames, early_frame, frame_list);
+ }
+ }
+ AST_LIST_TRAVERSE_SAFE_END;
+
+ /* Append any additional frames we may want to include (such as video) */
+ AST_LIST_NEXT(frame, frame_list) = AST_LIST_FIRST(&additional_frames);
+ }
+
return frame;
}
return -1;
}
+ framedata->audio_stream_id = -1;
+ framedata->video_stream_id = -1;
+ AST_LIST_HEAD_INIT_NOLOCK(&framedata->early_frames);
framedata->timer_fd = ast_timer_fd(framedata->timer);
framedata->timer_interval = DEFAULT_TIMER_INTERVAL;
ast_timer_set_rate(framedata->timer, 1000 / framedata->timer_interval);
return ast_rtp_convert_stats_json(&stats);
}
+
+int ast_rtp_get_rate(const struct ast_format *format)
+{
+ /* For those wondering: due to a fluke in RFC publication, G.722 is advertised
+ * as having a sample rate of 8kHz, while implementations must know that its
+ * real rate is 16kHz. Seriously.
+ */
+ return (ast_format_cmp(format, ast_format_g722) == AST_FORMAT_CMP_EQUAL) ? 8000 : (int)ast_format_get_sample_rate(format);
+}
return res;
}
-static int rtp_get_rate(struct ast_format *format)
-{
- /* For those wondering: due to a fluke in RFC publication, G.722 is advertised
- * as having a sample rate of 8kHz, while implementations must know that its
- * real rate is 16kHz. Seriously.
- */
- return (ast_format_cmp(format, ast_format_g722) == AST_FORMAT_CMP_EQUAL) ? 8000 : (int)ast_format_get_sample_rate(format);
-}
-
static unsigned int ast_rtcp_calc_interval(struct ast_rtp *rtp)
{
unsigned int interval;
rtp->dtmfmute = ast_tvadd(ast_tvnow(), ast_tv(0, 500000));
- if (duration > 0 && (measured_samples = duration * rtp_get_rate(rtp->f.subclass.format) / 1000) > rtp->send_duration) {
+ if (duration > 0 && (measured_samples = duration * ast_rtp_get_rate(rtp->f.subclass.format) / 1000) > rtp->send_duration) {
ast_debug(2, "Adjusting final end duration from %d to %u\n", rtp->send_duration, measured_samples);
rtp->send_duration = measured_samples;
}
report_block->lost_count.fraction = (fraction_lost & 0xff);
report_block->lost_count.packets = (lost_packets & 0xffffff);
report_block->highest_seq_no = (rtp->cycles | (rtp->lastrxseqno & 0xffff));
- report_block->ia_jitter = (unsigned int)(rtp->rxjitter * rtp_get_rate(rtp->f.subclass.format));
+ report_block->ia_jitter = (unsigned int)(rtp->rxjitter * ast_rtp_get_rate(rtp->f.subclass.format));
report_block->lsr = rtp->rtcp->themrxlsr;
/* If we haven't received an SR report, DLSR should be 0 */
if (!ast_tvzero(rtp->rtcp->rxlsr)) {
ast_verbose(" Fraction lost: %d\n", report_block->lost_count.fraction);
ast_verbose(" Cumulative loss: %u\n", report_block->lost_count.packets);
ast_verbose(" Highest seq no: %u\n", report_block->highest_seq_no);
- ast_verbose(" IA jitter: %.4f\n", (double)report_block->ia_jitter / rtp_get_rate(rtp->f.subclass.format));
+ ast_verbose(" IA jitter: %.4f\n", (double)report_block->ia_jitter / ast_rtp_get_rate(rtp->f.subclass.format));
ast_verbose(" Their last SR: %u\n", report_block->lsr);
ast_verbose(" DLSR: %4.4f (sec)\n\n", (double)(report_block->dlsr / 65536.0));
}
int pred, mark = 0;
unsigned int ms = calc_txstamp(rtp, &frame->delivery);
struct ast_sockaddr remote_address = { {0,} };
- int rate = rtp_get_rate(frame->subclass.format) / 1000;
+ int rate = ast_rtp_get_rate(frame->subclass.format) / 1000;
unsigned int seqno;
#ifdef TEST_FRAMEWORK
struct ast_rtp_engine_test *test = ast_rtp_instance_get_test(instance);
double d;
double dtv;
double prog;
- int rate = rtp_get_rate(rtp->f.subclass.format);
+ int rate = ast_rtp_get_rate(rtp->f.subclass.format);
double normdev_rxjitter_current;
if ((!rtp->rxcore.tv_sec && !rtp->rxcore.tv_usec) || mark) {
rtp->dtmf_duration = new_duration;
rtp->resp = resp;
f = ast_frdup(create_dtmf_frame(instance, AST_FRAME_DTMF_END, 0));
- f->len = ast_tvdiff_ms(ast_samp2tv(rtp->dtmf_duration, rtp_get_rate(f->subclass.format)), ast_tv(0, 0));
+ f->len = ast_tvdiff_ms(ast_samp2tv(rtp->dtmf_duration, ast_rtp_get_rate(f->subclass.format)), ast_tv(0, 0));
rtp->resp = 0;
rtp->dtmf_duration = rtp->dtmf_timeout = 0;
AST_LIST_INSERT_TAIL(frames, f, frame_list);
if (rtp->resp && rtp->resp != resp) {
/* Another digit already began. End it */
f = ast_frdup(create_dtmf_frame(instance, AST_FRAME_DTMF_END, 0));
- f->len = ast_tvdiff_ms(ast_samp2tv(rtp->dtmf_duration, rtp_get_rate(f->subclass.format)), ast_tv(0, 0));
+ f->len = ast_tvdiff_ms(ast_samp2tv(rtp->dtmf_duration, ast_rtp_get_rate(f->subclass.format)), ast_tv(0, 0));
rtp->resp = 0;
rtp->dtmf_duration = rtp->dtmf_timeout = 0;
AST_LIST_INSERT_TAIL(frames, f, frame_list);
}
} else if ((rtp->resp == resp) && !power) {
f = create_dtmf_frame(instance, AST_FRAME_DTMF_END, ast_rtp_instance_get_prop(instance, AST_RTP_PROPERTY_DTMF_COMPENSATE));
- f->samples = rtp->dtmfsamples * (rtp_get_rate(rtp->lastrxformat) / 1000);
+ f->samples = rtp->dtmfsamples * (ast_rtp_get_rate(rtp->lastrxformat) / 1000);
rtp->resp = 0;
} else if (rtp->resp == resp) {
- rtp->dtmfsamples += 20 * (rtp_get_rate(rtp->lastrxformat) / 1000);
+ rtp->dtmfsamples += 20 * (ast_rtp_get_rate(rtp->lastrxformat) / 1000);
}
rtp->dtmf_timeout = 0;
transport_rtp->f.delivery.tv_sec = 0;
transport_rtp->f.delivery.tv_usec = 0;
transport_rtp->f.src = "RTP";
+ transport_rtp->f.stream_num = rtp->stream_num;
f = &transport_rtp->f;
break;
case AST_RTP_RTCP_RTPFB:
if (rtp->resp) {
struct ast_frame *f;
f = create_dtmf_frame(instance, AST_FRAME_DTMF_END, 0);
- f->len = ast_tvdiff_ms(ast_samp2tv(rtp->dtmf_duration, rtp_get_rate(f->subclass.format)), ast_tv(0, 0));
+ f->len = ast_tvdiff_ms(ast_samp2tv(rtp->dtmf_duration, ast_rtp_get_rate(f->subclass.format)), ast_tv(0, 0));
rtp->resp = 0;
rtp->dtmf_timeout = rtp->dtmf_duration = 0;
AST_LIST_INSERT_TAIL(&frames, f, frame_list);
calc_rxstamp(&rtp->f.delivery, rtp, timestamp, mark);
/* Add timing data to let ast_generic_bridge() put the frame into a jitterbuf */
ast_set_flag(&rtp->f, AST_FRFLAG_HAS_TIMING_INFO);
- rtp->f.ts = timestamp / (rtp_get_rate(rtp->f.subclass.format) / 1000);
+ rtp->f.ts = timestamp / (ast_rtp_get_rate(rtp->f.subclass.format) / 1000);
rtp->f.len = rtp->f.samples / ((ast_format_get_sample_rate(rtp->f.subclass.format) / 1000));
} else if (ast_format_get_type(rtp->f.subclass.format) == AST_MEDIA_TYPE_VIDEO) {
/* Video -- samples is # of samples vs. 90000 */
rtp->lastividtimestamp = timestamp;
calc_rxstamp(&rtp->f.delivery, rtp, timestamp, mark);
ast_set_flag(&rtp->f, AST_FRFLAG_HAS_TIMING_INFO);
- rtp->f.ts = timestamp / (rtp_get_rate(rtp->f.subclass.format) / 1000);
+ rtp->f.ts = timestamp / (ast_rtp_get_rate(rtp->f.subclass.format) / 1000);
rtp->f.samples = timestamp - rtp->lastividtimestamp;
rtp->lastividtimestamp = timestamp;
rtp->f.delivery.tv_sec = 0;