- tune.pool-high-fd-ratio
- tune.pool-low-fd-ratio
- tune.pt.zero-copy-forwarding
+ - tune.quic.cc-hystart
- tune.quic.frontend.conn-tx-buffers.limit
- tune.quic.frontend.max-idle-timeout
- tune.quic.frontend.max-streams-bidi
See also: tune.disable-zero-copy-forwarding, option splice-auto,
option splice-request and option splice-response
+tune.quic.cc-hystart { on | off }
+ Enables ('on') or disabled ('off') the HyStart++ (RFC 9406) algorithm for
+ QUIC connections used as a replacement for the slow start phase of congestion
+ control algorithms which may cause high packet loss. It is disabled by default.
+
tune.quic.frontend.conn-tx-buffers.limit <number>
This settings defines the maximum number of buffers allocated for a QUIC
connection on data emission. By default, it is set to 30. QUIC buffers are
#define GTUNE_LISTENER_MQ_FAIR (1<<27)
#define GTUNE_LISTENER_MQ_OPT (1<<28)
#define GTUNE_LISTENER_MQ_ANY (GTUNE_LISTENER_MQ_FAIR | GTUNE_LISTENER_MQ_OPT)
+#define GTUNE_QUIC_CC_HYSTART (1<<29)
#define NO_ZERO_COPY_FWD 0x0001 /* Globally disable zero-copy FF */
#define NO_ZERO_COPY_FWD_PT 0x0002 /* disable zero-copy FF for PT (recv & send are disabled automatically) */
enum quic_cc_algo_state_type {
/* Slow start. */
QUIC_CC_ST_SS,
+ /* Conservative slow start (HyStart++ only) */
+ QUIC_CC_ST_CS,
/* Congestion avoidance. */
QUIC_CC_ST_CA,
/* Recovery period. */
union {
struct ack {
uint64_t acked;
+ uint64_t pn;
unsigned int time_sent;
} ack;
struct loss {
/* <conn> is there only for debugging purpose. */
struct quic_conn *qc;
struct quic_cc_algo *algo;
- uint32_t priv[16];
+ uint32_t priv[18];
};
struct quic_cc_path {
void (*event)(struct quic_cc *cc, struct quic_cc_event *ev);
void (*slow_start)(struct quic_cc *cc);
void (*state_trace)(struct buffer *buf, const struct quic_cc *cc);
+ void (*hystart_start_round)(struct quic_cc *cc, uint64_t pn);
};
#endif /* USE_QUIC */
--- /dev/null
+/* RFC 9406: HyStart++: Modified Slow Start for TCP. */
+
+/* HyStart++ constants */
+#define HYSTART_MIN_RTT_THRESH 4U /* ms */
+#define HYSTART_MAX_RTT_THRESH 16U /* ms */
+#define HYSTART_MIN_RTT_DIVISOR 8
+#define HYSTART_N_RTT_SAMPLE 8
+#define HYSTART_CSS_GROWTH_DIVISOR 4
+#define HYSTART_CSS_ROUNDS 5
+#define HYSTART_LIMIT 8 /* Must be infinite if paced */
+
+#define QUIC_CLAMP(a, b, c) ({ \
+ typeof(a) _a = (a); \
+ typeof(b) _b = (b); \
+ typeof(c) _c = (c); \
+ (void) (&_a == &_b); \
+ (void) (&_b == &_c); \
+ _b < _a ? _a : _b > _c ? _c : _b; })
+
+struct quic_hystart {
+ /* Current round minimum RTT. */
+ uint32_t curr_rnd_min_rtt;
+ /* Last round minimum RTT. */
+ uint32_t last_rnd_min_rtt;
+ /* Conservative Slow State baseline minimum RTT */
+ uint32_t css_baseline_min_rtt;
+ uint32_t rtt_sample_count;
+ uint32_t css_rnd_count;
+ uint64_t wnd_end;
+};
+
+/* Reset <h> Hystart++ algorithm state.
+ * Never fail.
+ */
+static inline void quic_cc_hystart_reset(struct quic_hystart *h)
+{
+ h->curr_rnd_min_rtt = UINT32_MAX;
+ h->last_rnd_min_rtt = UINT32_MAX;
+ h->css_baseline_min_rtt = UINT32_MAX;
+ h->rtt_sample_count = 0;
+ h->css_rnd_count = 0;
+ h->wnd_end = UINT64_MAX;
+}
+
+/* Track the minimum RTT. */
+static inline void quic_cc_hystart_track_min_rtt(struct quic_cc *cc,
+ struct quic_hystart *h,
+ unsigned int latest_rtt)
+{
+ if (h->wnd_end == UINT64_MAX)
+ return;
+
+ h->curr_rnd_min_rtt = QUIC_MIN(h->curr_rnd_min_rtt, latest_rtt);
+ h->rtt_sample_count++;
+}
+
+/* RFC 9406 4.2. Algorithm Details
+ * At the start of each round during standard slow start [RFC5681] and CSS,
+ * initialize the variables used to compute the last round's and current round's
+ * minimum RTT.
+ *
+ * Never fail.
+ */
+static inline void quic_cc_hystart_start_round(struct quic_hystart *h, uint64_t pn)
+{
+ if (h->wnd_end != UINT64_MAX) {
+ /* Round already started */
+ return;
+ }
+
+ h->wnd_end = pn;
+ h->last_rnd_min_rtt = h->curr_rnd_min_rtt;
+ h->rtt_sample_count = 0;
+}
+
+/* RFC 9406 4.2. Algorithm Details
+ * For rounds where at least N_RTT_SAMPLE RTT samples have been obtained and
+ * currentRoundMinRTT and lastRoundMinRTT are valid, check to see if delay
+ *increase triggers slow start exit.
+ *
+ * Depending on <h> HyStart++ algorithm state, returns 1 if the underlying
+ * congestion control algorithm may enter the Conservative Slow Start (CSS)
+ * state, 0 if not.
+ */
+static inline int quic_cc_hystart_may_enter_cs(struct quic_hystart *h)
+{
+ uint32_t rtt_thresh;
+
+ if (h->rtt_sample_count < HYSTART_N_RTT_SAMPLE ||
+ h->curr_rnd_min_rtt == UINT32_MAX || h->last_rnd_min_rtt == UINT32_MAX)
+ return 0;
+
+ rtt_thresh = QUIC_CLAMP(HYSTART_MIN_RTT_THRESH,
+ h->last_rnd_min_rtt / HYSTART_MIN_RTT_DIVISOR,
+ HYSTART_MAX_RTT_THRESH);
+ if (h->curr_rnd_min_rtt + rtt_thresh >= h->last_rnd_min_rtt) {
+ h->css_baseline_min_rtt = h->curr_rnd_min_rtt;
+ h->rtt_sample_count = 0;
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/* RFC 9406 4.2. Algorithm Details
+ * For CSS rounds where at least N_RTT_SAMPLE RTT samples have been obtained,
+ * check to see if the current round's minRTT drops below baseline (cssBaselineMinRtt)
+ * indicating that slow start exit was spurious.
+ *
+ * Return 1 if slow start exit was spurious, 0 if not. If the slow start
+ * exist was spurious, the caller must update the underlying congestion control
+ * algorithm to make it re-enter slow start state.
+ */
+static inline int quic_cc_hystart_may_reenter_ss(struct quic_hystart *h)
+{
+ if (h->rtt_sample_count < HYSTART_N_RTT_SAMPLE)
+ return 0;
+
+ h->css_rnd_count++;
+ h->rtt_sample_count = 0;
+
+ if (h->curr_rnd_min_rtt >= h->css_baseline_min_rtt) {
+ return 0;
+ }
+
+ h->css_baseline_min_rtt = UINT32_MAX;
+ return 1;
+}
return 0;
}
-/* config parser for global "tune.quic.zero-copy-fwd-send" */
-static int cfg_parse_quic_zero_copy_fwd_snd(char **args, int section_type, struct proxy *curpx,
- const struct proxy *defpx, const char *file, int line,
- char **err)
+/* config parser for global "tune.quic.* {on|off}" */
+static int cfg_parse_quic_tune_on_off(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
{
+ int on;
+ int prefix_len = strlen("tune.quic.");
+ const char *suffix;
+
if (too_many_args(1, args, err, NULL))
return -1;
if (strcmp(args[1], "on") == 0)
- global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_QUIC_SND;
+ on = 1;
else if (strcmp(args[1], "off") == 0)
- global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_QUIC_SND;
+ on = 0;
else {
memprintf(err, "'%s' expects 'on' or 'off'.", args[0]);
return -1;
}
+
+ suffix = args[0] + prefix_len;
+ if (strcmp(suffix, "zero-copy-fwd-send") == 0 ) {
+ if (on)
+ global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_QUIC_SND;
+ else
+ global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_QUIC_SND;
+ }
+ else if (strcmp(suffix, "cc-hystart") == 0) {
+ if (on)
+ global.tune.options |= GTUNE_QUIC_CC_HYSTART;
+ else
+ global.tune.options &= ~GTUNE_QUIC_CC_HYSTART;
+ }
+
return 0;
}
static struct cfg_kw_list cfg_kws = {ILH, {
{ CFG_GLOBAL, "tune.quic.socket-owner", cfg_parse_quic_tune_socket_owner },
{ CFG_GLOBAL, "tune.quic.backend.max-idle-timeou", cfg_parse_quic_time },
+ { CFG_GLOBAL, "tune.quic.cc-hystart", cfg_parse_quic_tune_on_off },
{ CFG_GLOBAL, "tune.quic.frontend.conn-tx-buffers.limit", cfg_parse_quic_tune_setting },
{ CFG_GLOBAL, "tune.quic.frontend.max-streams-bidi", cfg_parse_quic_tune_setting },
{ CFG_GLOBAL, "tune.quic.frontend.max-idle-timeout", cfg_parse_quic_time },
{ CFG_GLOBAL, "tune.quic.max-frame-loss", cfg_parse_quic_tune_setting },
{ CFG_GLOBAL, "tune.quic.reorder-ratio", cfg_parse_quic_tune_setting },
{ CFG_GLOBAL, "tune.quic.retry-threshold", cfg_parse_quic_tune_setting },
- { CFG_GLOBAL, "tune.quic.zero-copy-fwd-send", cfg_parse_quic_zero_copy_fwd_snd },
+ { CFG_GLOBAL, "tune.quic.zero-copy-fwd-send", cfg_parse_quic_tune_on_off },
{ 0, NULL, NULL }
}};
+#include <haproxy/global-t.h>
#include <haproxy/quic_cc.h>
+#include <haproxy/quic_cc_hystart.h>
#include <haproxy/quic_trace.h>
#include <haproxy/ticks.h>
#include <haproxy/trace.h>
* in recovery period) (in ms).
*/
uint32_t recovery_start_time;
+ /* HyStart++ state. */
+ struct quic_hystart hystart;
};
static void quic_cc_cubic_reset(struct quic_cc *cc)
c->last_w_max = 0;
c->W_est = 0;
c->recovery_start_time = 0;
+ if (global.tune.options & GTUNE_QUIC_CC_HYSTART)
+ quic_cc_hystart_reset(&c->hystart);
TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
}
TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev);
switch (ev->type) {
case QUIC_CC_EVT_ACK:
- if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) {
+ if (global.tune.options & GTUNE_QUIC_CC_HYSTART) {
+ struct quic_hystart *h = &c->hystart;
+ unsigned int acked = QUIC_MIN(ev->ack.acked, HYSTART_LIMIT * path->mtu);
+
+ if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked)
+ goto out;
+
+ path->cwnd += acked;
+ path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt);
+ if (ev->ack.pn >= h->wnd_end)
+ h->wnd_end = UINT64_MAX;
+ if (quic_cc_hystart_may_enter_cs(&c->hystart)) {
+ /* Exit slow start and enter conservative slow start */
+ c->state = QUIC_CC_ST_CS;
+ goto out;
+ }
+ }
+ else if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) {
path->cwnd += ev->ack.acked;
path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
}
TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
}
+/* Conservative slow start callback. */
+static void quic_cc_cubic_cs_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ struct quic_cc_path *path = container_of(cc, struct quic_cc_path, cc);
+
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
+ TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev);
+
+ switch (ev->type) {
+ case QUIC_CC_EVT_ACK:
+ {
+ struct cubic *c = quic_cc_priv(cc);
+ struct quic_hystart *h = &c->hystart;
+ unsigned int acked =
+ QUIC_MIN(ev->ack.acked, HYSTART_LIMIT * path->mtu) / HYSTART_CSS_GROWTH_DIVISOR;
+
+ if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked)
+ goto out;
+
+ path->cwnd += acked;
+ path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt);
+ if (quic_cc_hystart_may_reenter_ss(h)) {
+ /* Exit to slow start */
+ c->state = QUIC_CC_ST_SS;
+ goto out;
+ }
+
+ if (h->css_rnd_count >= HYSTART_CSS_ROUNDS) {
+ /* Exit to congestion avoidance
+ *
+ * RFC 9438 4.10. Slow start
+ *
+ * When CUBIC uses HyStart++ [RFC9406], it may exit the first slow start
+ * without incurring any packet loss and thus _W_max_ is undefined. In
+ * this special case, CUBIC sets _cwnd_prior = cwnd_ and switches to
+ * congestion avoidance. It then increases its congestion window size
+ * using Figure 1, where _t_ is the elapsed time since the beginning of
+ * the current congestion avoidance stage, _K_ is set to 0, and _W_max_
+ * is set to the congestion window size at the beginning of the current
+ * congestion avoidance stage.
+ */
+ c->last_w_max = path->cwnd;
+ c->t_epoch = 0;
+ c->state = QUIC_CC_ST_CA;
+ }
+
+ break;
+ }
+
+ case QUIC_CC_EVT_LOSS:
+ quic_enter_recovery(cc);
+ break;
+ case QUIC_CC_EVT_ECN_CE:
+ /* TODO */
+ break;
+ }
+
+ out:
+ TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc);
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
+}
+
/* Recovery period callback */
static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev)
{
static void (*quic_cc_cubic_state_cbs[])(struct quic_cc *cc,
struct quic_cc_event *ev) = {
[QUIC_CC_ST_SS] = quic_cc_cubic_ss_cb,
+ [QUIC_CC_ST_CS] = quic_cc_cubic_cs_cb,
[QUIC_CC_ST_CA] = quic_cc_cubic_ca_cb,
[QUIC_CC_ST_RP] = quic_cc_cubic_rp_cb,
};
return quic_cc_cubic_state_cbs[c->state](cc, ev);
}
+static void quic_cc_cubic_hystart_start_round(struct quic_cc *cc, uint64_t pn)
+{
+ struct cubic *c = quic_cc_priv(cc);
+ struct quic_hystart *h = &c->hystart;
+
+ if (c->state != QUIC_CC_ST_SS && c->state != QUIC_CC_ST_CS)
+ return;
+
+ quic_cc_hystart_start_round(h, pn);
+}
+
static void quic_cc_cubic_state_trace(struct buffer *buf, const struct quic_cc *cc)
{
struct quic_cc_path *path;
.init = quic_cc_cubic_init,
.event = quic_cc_cubic_event,
.slow_start = quic_cc_cubic_slow_start,
+ .hystart_start_round = quic_cc_cubic_hystart_start_round,
.state_trace = quic_cc_cubic_state_trace,
};
(unsigned long long)path->loss.nb_lost_pkt);
}
+static void quic_cc_nr_hystart_start_round(struct quic_cc *cc, uint64_t pn)
+{
+}
+
static void (*quic_cc_nr_state_cbs[])(struct quic_cc *cc,
struct quic_cc_event *ev) = {
[QUIC_CC_ST_SS] = quic_cc_nr_ss_cb,
.init = quic_cc_nr_init,
.event = quic_cc_nr_event,
.slow_start = quic_cc_nr_slow_start,
+ .hystart_start_round = quic_cc_nr_hystart_start_round,
.state_trace = quic_cc_nr_state_trace,
};
qc_treat_ack_of_ack(qc, &pkt->pktns->rx.arngs, pkt->largest_acked_pn);
ev.ack.acked = pkt->in_flight_len;
ev.ack.time_sent = pkt->time_sent;
+ ev.ack.pn = pkt->pn_node.key;
quic_cc_event(&qc->path->cc, &ev);
LIST_DEL_INIT(&pkt->list);
quic_tx_packet_refdec(pkt);
time_sent = now_ms;
for (pkt = first_pkt; pkt; pkt = next_pkt) {
+ struct quic_cc *cc = &qc->path->cc;
/* RFC 9000 14.1 Initial datagram size
* a server MUST expand the payload of all UDP datagrams carrying ack-eliciting
* Initial packets to at least the smallest allowed maximum datagram size of
}
qc->path->in_flight += pkt->in_flight_len;
pkt->pktns->tx.in_flight += pkt->in_flight_len;
+ if ((global.tune.options & GTUNE_QUIC_CC_HYSTART) && pkt->pktns == qc->apktns)
+ cc->algo->hystart_start_round(cc, pkt->pn_node.key);
if (pkt->in_flight_len)
qc_set_timer(qc);
TRACE_PROTO("TX pkt", QUIC_EV_CONN_SPPKTS, qc, pkt);