From: Frederic Lecaille Date: Tue, 5 Mar 2024 17:30:41 +0000 (+0100) Subject: MINOR: quic: HyStart++ implementation (RFC 9406) X-Git-Tag: v3.0-dev7~36 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a305bb92b990e505b9cc08ba6c5f2aab1649135b;p=thirdparty%2Fhaproxy.git MINOR: quic: HyStart++ implementation (RFC 9406) This is a simple algorithm to replace the classic slow start phase of the congestion control algorithms. It should reduce the high packet loss during this step. Implemented only for Cubic. --- diff --git a/doc/configuration.txt b/doc/configuration.txt index dd2b7ff58e..68af0a844a 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -1400,6 +1400,7 @@ The following keywords are supported in the "global" section : - tune.pool-high-fd-ratio - tune.pool-low-fd-ratio - tune.pt.zero-copy-forwarding + - tune.quic.cc-hystart - tune.quic.frontend.conn-tx-buffers.limit - tune.quic.frontend.max-idle-timeout - tune.quic.frontend.max-streams-bidi @@ -3650,6 +3651,11 @@ tune.pt.zero-copy-forwarding { on | off } See also: tune.disable-zero-copy-forwarding, option splice-auto, option splice-request and option splice-response +tune.quic.cc-hystart { on | off } + Enables ('on') or disabled ('off') the HyStart++ (RFC 9406) algorithm for + QUIC connections used as a replacement for the slow start phase of congestion + control algorithms which may cause high packet loss. It is disabled by default. + tune.quic.frontend.conn-tx-buffers.limit This settings defines the maximum number of buffers allocated for a QUIC connection on data emission. By default, it is set to 30. QUIC buffers are diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h index f26b13f21b..b6d8c0723d 100644 --- a/include/haproxy/global-t.h +++ b/include/haproxy/global-t.h @@ -84,6 +84,7 @@ #define GTUNE_LISTENER_MQ_FAIR (1<<27) #define GTUNE_LISTENER_MQ_OPT (1<<28) #define GTUNE_LISTENER_MQ_ANY (GTUNE_LISTENER_MQ_FAIR | GTUNE_LISTENER_MQ_OPT) +#define GTUNE_QUIC_CC_HYSTART (1<<29) #define NO_ZERO_COPY_FWD 0x0001 /* Globally disable zero-copy FF */ #define NO_ZERO_COPY_FWD_PT 0x0002 /* disable zero-copy FF for PT (recv & send are disabled automatically) */ diff --git a/include/haproxy/quic_cc-t.h b/include/haproxy/quic_cc-t.h index 888efca971..e67817239a 100644 --- a/include/haproxy/quic_cc-t.h +++ b/include/haproxy/quic_cc-t.h @@ -46,6 +46,8 @@ extern unsigned long long last_ts; enum quic_cc_algo_state_type { /* Slow start. */ QUIC_CC_ST_SS, + /* Conservative slow start (HyStart++ only) */ + QUIC_CC_ST_CS, /* Congestion avoidance. */ QUIC_CC_ST_CA, /* Recovery period. */ @@ -66,6 +68,7 @@ struct quic_cc_event { union { struct ack { uint64_t acked; + uint64_t pn; unsigned int time_sent; } ack; struct loss { @@ -84,7 +87,7 @@ struct quic_cc { /* is there only for debugging purpose. */ struct quic_conn *qc; struct quic_cc_algo *algo; - uint32_t priv[16]; + uint32_t priv[18]; }; struct quic_cc_path { @@ -117,6 +120,7 @@ struct quic_cc_algo { void (*event)(struct quic_cc *cc, struct quic_cc_event *ev); void (*slow_start)(struct quic_cc *cc); void (*state_trace)(struct buffer *buf, const struct quic_cc *cc); + void (*hystart_start_round)(struct quic_cc *cc, uint64_t pn); }; #endif /* USE_QUIC */ diff --git a/include/haproxy/quic_cc_hystart.h b/include/haproxy/quic_cc_hystart.h new file mode 100644 index 0000000000..4ed122cdad --- /dev/null +++ b/include/haproxy/quic_cc_hystart.h @@ -0,0 +1,129 @@ +/* RFC 9406: HyStart++: Modified Slow Start for TCP. */ + +/* HyStart++ constants */ +#define HYSTART_MIN_RTT_THRESH 4U /* ms */ +#define HYSTART_MAX_RTT_THRESH 16U /* ms */ +#define HYSTART_MIN_RTT_DIVISOR 8 +#define HYSTART_N_RTT_SAMPLE 8 +#define HYSTART_CSS_GROWTH_DIVISOR 4 +#define HYSTART_CSS_ROUNDS 5 +#define HYSTART_LIMIT 8 /* Must be infinite if paced */ + +#define QUIC_CLAMP(a, b, c) ({ \ + typeof(a) _a = (a); \ + typeof(b) _b = (b); \ + typeof(c) _c = (c); \ + (void) (&_a == &_b); \ + (void) (&_b == &_c); \ + _b < _a ? _a : _b > _c ? _c : _b; }) + +struct quic_hystart { + /* Current round minimum RTT. */ + uint32_t curr_rnd_min_rtt; + /* Last round minimum RTT. */ + uint32_t last_rnd_min_rtt; + /* Conservative Slow State baseline minimum RTT */ + uint32_t css_baseline_min_rtt; + uint32_t rtt_sample_count; + uint32_t css_rnd_count; + uint64_t wnd_end; +}; + +/* Reset Hystart++ algorithm state. + * Never fail. + */ +static inline void quic_cc_hystart_reset(struct quic_hystart *h) +{ + h->curr_rnd_min_rtt = UINT32_MAX; + h->last_rnd_min_rtt = UINT32_MAX; + h->css_baseline_min_rtt = UINT32_MAX; + h->rtt_sample_count = 0; + h->css_rnd_count = 0; + h->wnd_end = UINT64_MAX; +} + +/* Track the minimum RTT. */ +static inline void quic_cc_hystart_track_min_rtt(struct quic_cc *cc, + struct quic_hystart *h, + unsigned int latest_rtt) +{ + if (h->wnd_end == UINT64_MAX) + return; + + h->curr_rnd_min_rtt = QUIC_MIN(h->curr_rnd_min_rtt, latest_rtt); + h->rtt_sample_count++; +} + +/* RFC 9406 4.2. Algorithm Details + * At the start of each round during standard slow start [RFC5681] and CSS, + * initialize the variables used to compute the last round's and current round's + * minimum RTT. + * + * Never fail. + */ +static inline void quic_cc_hystart_start_round(struct quic_hystart *h, uint64_t pn) +{ + if (h->wnd_end != UINT64_MAX) { + /* Round already started */ + return; + } + + h->wnd_end = pn; + h->last_rnd_min_rtt = h->curr_rnd_min_rtt; + h->rtt_sample_count = 0; +} + +/* RFC 9406 4.2. Algorithm Details + * For rounds where at least N_RTT_SAMPLE RTT samples have been obtained and + * currentRoundMinRTT and lastRoundMinRTT are valid, check to see if delay + *increase triggers slow start exit. + * + * Depending on HyStart++ algorithm state, returns 1 if the underlying + * congestion control algorithm may enter the Conservative Slow Start (CSS) + * state, 0 if not. + */ +static inline int quic_cc_hystart_may_enter_cs(struct quic_hystart *h) +{ + uint32_t rtt_thresh; + + if (h->rtt_sample_count < HYSTART_N_RTT_SAMPLE || + h->curr_rnd_min_rtt == UINT32_MAX || h->last_rnd_min_rtt == UINT32_MAX) + return 0; + + rtt_thresh = QUIC_CLAMP(HYSTART_MIN_RTT_THRESH, + h->last_rnd_min_rtt / HYSTART_MIN_RTT_DIVISOR, + HYSTART_MAX_RTT_THRESH); + if (h->curr_rnd_min_rtt + rtt_thresh >= h->last_rnd_min_rtt) { + h->css_baseline_min_rtt = h->curr_rnd_min_rtt; + h->rtt_sample_count = 0; + return 1; + } + + return 0; +} + + +/* RFC 9406 4.2. Algorithm Details + * For CSS rounds where at least N_RTT_SAMPLE RTT samples have been obtained, + * check to see if the current round's minRTT drops below baseline (cssBaselineMinRtt) + * indicating that slow start exit was spurious. + * + * Return 1 if slow start exit was spurious, 0 if not. If the slow start + * exist was spurious, the caller must update the underlying congestion control + * algorithm to make it re-enter slow start state. + */ +static inline int quic_cc_hystart_may_reenter_ss(struct quic_hystart *h) +{ + if (h->rtt_sample_count < HYSTART_N_RTT_SAMPLE) + return 0; + + h->css_rnd_count++; + h->rtt_sample_count = 0; + + if (h->curr_rnd_min_rtt >= h->css_baseline_min_rtt) { + return 0; + } + + h->css_baseline_min_rtt = UINT32_MAX; + return 1; +} diff --git a/src/cfgparse-quic.c b/src/cfgparse-quic.c index 3b38efa720..b2ab934d73 100644 --- a/src/cfgparse-quic.c +++ b/src/cfgparse-quic.c @@ -257,35 +257,55 @@ static int cfg_parse_quic_tune_setting(char **args, int section_type, return 0; } -/* config parser for global "tune.quic.zero-copy-fwd-send" */ -static int cfg_parse_quic_zero_copy_fwd_snd(char **args, int section_type, struct proxy *curpx, - const struct proxy *defpx, const char *file, int line, - char **err) +/* config parser for global "tune.quic.* {on|off}" */ +static int cfg_parse_quic_tune_on_off(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) { + int on; + int prefix_len = strlen("tune.quic."); + const char *suffix; + if (too_many_args(1, args, err, NULL)) return -1; if (strcmp(args[1], "on") == 0) - global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_QUIC_SND; + on = 1; else if (strcmp(args[1], "off") == 0) - global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_QUIC_SND; + on = 0; else { memprintf(err, "'%s' expects 'on' or 'off'.", args[0]); return -1; } + + suffix = args[0] + prefix_len; + if (strcmp(suffix, "zero-copy-fwd-send") == 0 ) { + if (on) + global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_QUIC_SND; + else + global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_QUIC_SND; + } + else if (strcmp(suffix, "cc-hystart") == 0) { + if (on) + global.tune.options |= GTUNE_QUIC_CC_HYSTART; + else + global.tune.options &= ~GTUNE_QUIC_CC_HYSTART; + } + return 0; } static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "tune.quic.socket-owner", cfg_parse_quic_tune_socket_owner }, { CFG_GLOBAL, "tune.quic.backend.max-idle-timeou", cfg_parse_quic_time }, + { CFG_GLOBAL, "tune.quic.cc-hystart", cfg_parse_quic_tune_on_off }, { CFG_GLOBAL, "tune.quic.frontend.conn-tx-buffers.limit", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.frontend.max-streams-bidi", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.frontend.max-idle-timeout", cfg_parse_quic_time }, { CFG_GLOBAL, "tune.quic.max-frame-loss", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.reorder-ratio", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.retry-threshold", cfg_parse_quic_tune_setting }, - { CFG_GLOBAL, "tune.quic.zero-copy-fwd-send", cfg_parse_quic_zero_copy_fwd_snd }, + { CFG_GLOBAL, "tune.quic.zero-copy-fwd-send", cfg_parse_quic_tune_on_off }, { 0, NULL, NULL } }}; diff --git a/src/quic_cc_cubic.c b/src/quic_cc_cubic.c index d121bdb117..cc10a01c85 100644 --- a/src/quic_cc_cubic.c +++ b/src/quic_cc_cubic.c @@ -1,4 +1,6 @@ +#include #include +#include #include #include #include @@ -79,6 +81,8 @@ struct cubic { * in recovery period) (in ms). */ uint32_t recovery_start_time; + /* HyStart++ state. */ + struct quic_hystart hystart; }; static void quic_cc_cubic_reset(struct quic_cc *cc) @@ -96,6 +100,8 @@ static void quic_cc_cubic_reset(struct quic_cc *cc) c->last_w_max = 0; c->W_est = 0; c->recovery_start_time = 0; + if (global.tune.options & GTUNE_QUIC_CC_HYSTART) + quic_cc_hystart_reset(&c->hystart); TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); } @@ -424,7 +430,25 @@ static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev) TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev); switch (ev->type) { case QUIC_CC_EVT_ACK: - if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) { + if (global.tune.options & GTUNE_QUIC_CC_HYSTART) { + struct quic_hystart *h = &c->hystart; + unsigned int acked = QUIC_MIN(ev->ack.acked, HYSTART_LIMIT * path->mtu); + + if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked) + goto out; + + path->cwnd += acked; + path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt); + if (ev->ack.pn >= h->wnd_end) + h->wnd_end = UINT64_MAX; + if (quic_cc_hystart_may_enter_cs(&c->hystart)) { + /* Exit slow start and enter conservative slow start */ + c->state = QUIC_CC_ST_CS; + goto out; + } + } + else if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) { path->cwnd += ev->ack.acked; path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd); } @@ -470,6 +494,69 @@ static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev) TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); } +/* Conservative slow start callback. */ +static void quic_cc_cubic_cs_cb(struct quic_cc *cc, struct quic_cc_event *ev) +{ + struct quic_cc_path *path = container_of(cc, struct quic_cc_path, cc); + + TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); + TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev); + + switch (ev->type) { + case QUIC_CC_EVT_ACK: + { + struct cubic *c = quic_cc_priv(cc); + struct quic_hystart *h = &c->hystart; + unsigned int acked = + QUIC_MIN(ev->ack.acked, HYSTART_LIMIT * path->mtu) / HYSTART_CSS_GROWTH_DIVISOR; + + if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked) + goto out; + + path->cwnd += acked; + path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt); + if (quic_cc_hystart_may_reenter_ss(h)) { + /* Exit to slow start */ + c->state = QUIC_CC_ST_SS; + goto out; + } + + if (h->css_rnd_count >= HYSTART_CSS_ROUNDS) { + /* Exit to congestion avoidance + * + * RFC 9438 4.10. Slow start + * + * When CUBIC uses HyStart++ [RFC9406], it may exit the first slow start + * without incurring any packet loss and thus _W_max_ is undefined. In + * this special case, CUBIC sets _cwnd_prior = cwnd_ and switches to + * congestion avoidance. It then increases its congestion window size + * using Figure 1, where _t_ is the elapsed time since the beginning of + * the current congestion avoidance stage, _K_ is set to 0, and _W_max_ + * is set to the congestion window size at the beginning of the current + * congestion avoidance stage. + */ + c->last_w_max = path->cwnd; + c->t_epoch = 0; + c->state = QUIC_CC_ST_CA; + } + + break; + } + + case QUIC_CC_EVT_LOSS: + quic_enter_recovery(cc); + break; + case QUIC_CC_EVT_ECN_CE: + /* TODO */ + break; + } + + out: + TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc); + TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); +} + /* Recovery period callback */ static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev) { @@ -507,6 +594,7 @@ static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev) static void (*quic_cc_cubic_state_cbs[])(struct quic_cc *cc, struct quic_cc_event *ev) = { [QUIC_CC_ST_SS] = quic_cc_cubic_ss_cb, + [QUIC_CC_ST_CS] = quic_cc_cubic_cs_cb, [QUIC_CC_ST_CA] = quic_cc_cubic_ca_cb, [QUIC_CC_ST_RP] = quic_cc_cubic_rp_cb, }; @@ -518,6 +606,17 @@ static void quic_cc_cubic_event(struct quic_cc *cc, struct quic_cc_event *ev) return quic_cc_cubic_state_cbs[c->state](cc, ev); } +static void quic_cc_cubic_hystart_start_round(struct quic_cc *cc, uint64_t pn) +{ + struct cubic *c = quic_cc_priv(cc); + struct quic_hystart *h = &c->hystart; + + if (c->state != QUIC_CC_ST_SS && c->state != QUIC_CC_ST_CS) + return; + + quic_cc_hystart_start_round(h, pn); +} + static void quic_cc_cubic_state_trace(struct buffer *buf, const struct quic_cc *cc) { struct quic_cc_path *path; @@ -538,5 +637,6 @@ struct quic_cc_algo quic_cc_algo_cubic = { .init = quic_cc_cubic_init, .event = quic_cc_cubic_event, .slow_start = quic_cc_cubic_slow_start, + .hystart_start_round = quic_cc_cubic_hystart_start_round, .state_trace = quic_cc_cubic_state_trace, }; diff --git a/src/quic_cc_newreno.c b/src/quic_cc_newreno.c index 405b0babcc..ca298776c4 100644 --- a/src/quic_cc_newreno.c +++ b/src/quic_cc_newreno.c @@ -196,6 +196,10 @@ static void quic_cc_nr_state_trace(struct buffer *buf, const struct quic_cc *cc) (unsigned long long)path->loss.nb_lost_pkt); } +static void quic_cc_nr_hystart_start_round(struct quic_cc *cc, uint64_t pn) +{ +} + static void (*quic_cc_nr_state_cbs[])(struct quic_cc *cc, struct quic_cc_event *ev) = { [QUIC_CC_ST_SS] = quic_cc_nr_ss_cb, @@ -215,6 +219,7 @@ struct quic_cc_algo quic_cc_algo_nr = { .init = quic_cc_nr_init, .event = quic_cc_nr_event, .slow_start = quic_cc_nr_slow_start, + .hystart_start_round = quic_cc_nr_hystart_start_round, .state_trace = quic_cc_nr_state_trace, }; diff --git a/src/quic_rx.c b/src/quic_rx.c index c6a23f3ce6..8612c3f007 100644 --- a/src/quic_rx.c +++ b/src/quic_rx.c @@ -506,6 +506,7 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc, qc_treat_ack_of_ack(qc, &pkt->pktns->rx.arngs, pkt->largest_acked_pn); ev.ack.acked = pkt->in_flight_len; ev.ack.time_sent = pkt->time_sent; + ev.ack.pn = pkt->pn_node.key; quic_cc_event(&qc->path->cc, &ev); LIST_DEL_INIT(&pkt->list); quic_tx_packet_refdec(pkt); diff --git a/src/quic_tx.c b/src/quic_tx.c index 306b4c268c..f9f021cfce 100644 --- a/src/quic_tx.c +++ b/src/quic_tx.c @@ -427,6 +427,7 @@ int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx) time_sent = now_ms; for (pkt = first_pkt; pkt; pkt = next_pkt) { + struct quic_cc *cc = &qc->path->cc; /* RFC 9000 14.1 Initial datagram size * a server MUST expand the payload of all UDP datagrams carrying ack-eliciting * Initial packets to at least the smallest allowed maximum datagram size of @@ -466,6 +467,8 @@ int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx) } qc->path->in_flight += pkt->in_flight_len; pkt->pktns->tx.in_flight += pkt->in_flight_len; + if ((global.tune.options & GTUNE_QUIC_CC_HYSTART) && pkt->pktns == qc->apktns) + cc->algo->hystart_start_round(cc, pkt->pn_node.key); if (pkt->in_flight_len) qc_set_timer(qc); TRACE_PROTO("TX pkt", QUIC_EV_CONN_SPPKTS, qc, pkt);