]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MINOR: quic: HyStart++ implementation (RFC 9406)
authorFrederic Lecaille <flecaille@haproxy.com>
Tue, 5 Mar 2024 17:30:41 +0000 (18:30 +0100)
committerFrederic Lecaille <flecaille@haproxy.com>
Tue, 2 Apr 2024 16:47:19 +0000 (18:47 +0200)
This is a simple algorithm to replace the classic slow start phase of the
congestion control algorithms. It should reduce the high packet loss during
this step.

Implemented only for Cubic.

doc/configuration.txt
include/haproxy/global-t.h
include/haproxy/quic_cc-t.h
include/haproxy/quic_cc_hystart.h [new file with mode: 0644]
src/cfgparse-quic.c
src/quic_cc_cubic.c
src/quic_cc_newreno.c
src/quic_rx.c
src/quic_tx.c

index dd2b7ff58e580cb435c984cf10a7e6fc87fa7dff..68af0a844adcad180afaee755222b541d8477b54 100644 (file)
@@ -1400,6 +1400,7 @@ The following keywords are supported in the "global" section :
    - tune.pool-high-fd-ratio
    - tune.pool-low-fd-ratio
    - tune.pt.zero-copy-forwarding
+   - tune.quic.cc-hystart
    - tune.quic.frontend.conn-tx-buffers.limit
    - tune.quic.frontend.max-idle-timeout
    - tune.quic.frontend.max-streams-bidi
@@ -3650,6 +3651,11 @@ tune.pt.zero-copy-forwarding { on | off }
   See also: tune.disable-zero-copy-forwarding, option splice-auto,
             option splice-request and option splice-response
 
+tune.quic.cc-hystart { on | off }
+  Enables ('on') or disabled ('off') the HyStart++ (RFC 9406) algorithm for
+  QUIC connections used as a replacement for the slow start phase of congestion
+  control algorithms which may cause high packet loss. It is disabled by default.
+
 tune.quic.frontend.conn-tx-buffers.limit <number>
   This settings defines the maximum number of buffers allocated for a QUIC
   connection on data emission. By default, it is set to 30. QUIC buffers are
index f26b13f21b7d208138bd0eefb90b01c004b9fbca..b6d8c0723d78e3501af07698f04086632a949a7b 100644 (file)
@@ -84,6 +84,7 @@
 #define GTUNE_LISTENER_MQ_FAIR   (1<<27)
 #define GTUNE_LISTENER_MQ_OPT    (1<<28)
 #define GTUNE_LISTENER_MQ_ANY    (GTUNE_LISTENER_MQ_FAIR | GTUNE_LISTENER_MQ_OPT)
+#define GTUNE_QUIC_CC_HYSTART    (1<<29)
 
 #define NO_ZERO_COPY_FWD             0x0001 /* Globally disable zero-copy FF */
 #define NO_ZERO_COPY_FWD_PT          0x0002 /* disable zero-copy FF for PT (recv & send are disabled automatically) */
index 888efca9716cd17f36ae6aaf2c03a2e08a03fd2f..e67817239ad8ec835cef32573e7506fb8d0c0d3a 100644 (file)
@@ -46,6 +46,8 @@ extern unsigned long long last_ts;
 enum quic_cc_algo_state_type {
        /* Slow start. */
        QUIC_CC_ST_SS,
+       /* Conservative slow start (HyStart++ only) */
+       QUIC_CC_ST_CS,
        /* Congestion avoidance. */
        QUIC_CC_ST_CA,
        /* Recovery period. */
@@ -66,6 +68,7 @@ struct quic_cc_event {
        union {
                struct ack {
                        uint64_t acked;
+                       uint64_t pn;
                        unsigned int time_sent;
                } ack;
                struct loss {
@@ -84,7 +87,7 @@ struct quic_cc {
        /* <conn> is there only for debugging purpose. */
        struct quic_conn *qc;
        struct quic_cc_algo *algo;
-       uint32_t priv[16];
+       uint32_t priv[18];
 };
 
 struct quic_cc_path {
@@ -117,6 +120,7 @@ struct quic_cc_algo {
        void (*event)(struct quic_cc *cc, struct quic_cc_event *ev);
        void (*slow_start)(struct quic_cc *cc);
        void (*state_trace)(struct buffer *buf, const struct quic_cc *cc);
+       void (*hystart_start_round)(struct quic_cc *cc, uint64_t pn);
 };
 
 #endif /* USE_QUIC */
diff --git a/include/haproxy/quic_cc_hystart.h b/include/haproxy/quic_cc_hystart.h
new file mode 100644 (file)
index 0000000..4ed122c
--- /dev/null
@@ -0,0 +1,129 @@
+/* RFC 9406: HyStart++: Modified Slow Start for TCP. */
+
+/* HyStart++ constants */
+#define HYSTART_MIN_RTT_THRESH      4U /* ms */
+#define HYSTART_MAX_RTT_THRESH     16U /* ms */
+#define HYSTART_MIN_RTT_DIVISOR      8
+#define HYSTART_N_RTT_SAMPLE         8
+#define HYSTART_CSS_GROWTH_DIVISOR   4
+#define HYSTART_CSS_ROUNDS           5
+#define HYSTART_LIMIT                8 /* Must be infinite if paced */
+
+#define QUIC_CLAMP(a, b, c) ({ \
+    typeof(a) _a = (a);   \
+    typeof(b) _b = (b);   \
+    typeof(c) _c = (c);   \
+    (void) (&_a == &_b);  \
+    (void) (&_b == &_c);  \
+    _b < _a ? _a : _b > _c ? _c : _b; })
+
+struct quic_hystart {
+       /* Current round minimum RTT. */
+       uint32_t curr_rnd_min_rtt;
+       /* Last round minimum RTT. */
+       uint32_t last_rnd_min_rtt;
+       /* Conservative Slow State baseline minimum RTT */
+       uint32_t css_baseline_min_rtt;
+       uint32_t rtt_sample_count;
+       uint32_t css_rnd_count;
+       uint64_t wnd_end;
+};
+
+/* Reset <h> Hystart++ algorithm state.
+ * Never fail.
+ */
+static inline void quic_cc_hystart_reset(struct quic_hystart *h)
+{
+       h->curr_rnd_min_rtt = UINT32_MAX;
+       h->last_rnd_min_rtt = UINT32_MAX;
+       h->css_baseline_min_rtt = UINT32_MAX;
+       h->rtt_sample_count = 0;
+       h->css_rnd_count = 0;
+       h->wnd_end = UINT64_MAX;
+}
+
+/* Track the minimum RTT. */
+static inline void quic_cc_hystart_track_min_rtt(struct quic_cc *cc,
+                                                 struct quic_hystart *h,
+                                                 unsigned int latest_rtt)
+{
+       if (h->wnd_end == UINT64_MAX)
+               return;
+
+       h->curr_rnd_min_rtt = QUIC_MIN(h->curr_rnd_min_rtt, latest_rtt);
+       h->rtt_sample_count++;
+}
+
+/* RFC 9406 4.2. Algorithm Details
+ * At the start of each round during standard slow start [RFC5681] and CSS,
+ * initialize the variables used to compute the last round's and current round's
+ * minimum RTT.
+ *
+ * Never fail.
+ */
+static inline void quic_cc_hystart_start_round(struct quic_hystart *h, uint64_t pn)
+{
+       if (h->wnd_end != UINT64_MAX) {
+               /* Round already started */
+               return;
+       }
+
+       h->wnd_end = pn;
+       h->last_rnd_min_rtt = h->curr_rnd_min_rtt;
+       h->rtt_sample_count = 0;
+}
+
+/* RFC 9406 4.2. Algorithm Details
+ * For rounds where at least N_RTT_SAMPLE RTT samples have been obtained and
+ * currentRoundMinRTT and lastRoundMinRTT are valid, check to see if delay
+ *increase triggers slow start exit.
+ *
+ * Depending on <h> HyStart++ algorithm state, returns 1 if the underlying
+ * congestion control algorithm may enter the Conservative Slow Start (CSS)
+ * state, 0 if not.
+ */
+static inline int quic_cc_hystart_may_enter_cs(struct quic_hystart *h)
+{
+       uint32_t rtt_thresh;
+
+       if (h->rtt_sample_count < HYSTART_N_RTT_SAMPLE ||
+           h->curr_rnd_min_rtt == UINT32_MAX || h->last_rnd_min_rtt == UINT32_MAX)
+               return 0;
+
+       rtt_thresh = QUIC_CLAMP(HYSTART_MIN_RTT_THRESH,
+                               h->last_rnd_min_rtt / HYSTART_MIN_RTT_DIVISOR,
+                               HYSTART_MAX_RTT_THRESH);
+       if (h->curr_rnd_min_rtt + rtt_thresh >= h->last_rnd_min_rtt) {
+               h->css_baseline_min_rtt = h->curr_rnd_min_rtt;
+               h->rtt_sample_count = 0;
+               return 1;
+       }
+
+       return 0;
+}
+
+
+/* RFC 9406 4.2. Algorithm Details
+ * For CSS rounds where at least N_RTT_SAMPLE RTT samples have been obtained,
+ * check to see if the current round's minRTT drops below baseline (cssBaselineMinRtt)
+ * indicating that slow start exit was spurious.
+ *
+ * Return 1 if slow start exit was spurious, 0 if not. If the slow start
+ * exist was spurious, the caller must update the underlying congestion control
+ * algorithm to make it re-enter slow start state.
+ */
+static inline int quic_cc_hystart_may_reenter_ss(struct quic_hystart *h)
+{
+       if (h->rtt_sample_count < HYSTART_N_RTT_SAMPLE)
+               return 0;
+
+       h->css_rnd_count++;
+       h->rtt_sample_count = 0;
+
+       if (h->curr_rnd_min_rtt >= h->css_baseline_min_rtt) {
+               return 0;
+       }
+
+       h->css_baseline_min_rtt = UINT32_MAX;
+       return 1;
+}
index 3b38efa72081012cd3c261502647791de088ee91..b2ab934d7387059d6f56301082dfae829d0944b6 100644 (file)
@@ -257,35 +257,55 @@ static int cfg_parse_quic_tune_setting(char **args, int section_type,
        return 0;
 }
 
-/* config parser for global "tune.quic.zero-copy-fwd-send" */
-static int cfg_parse_quic_zero_copy_fwd_snd(char **args, int section_type, struct proxy *curpx,
-                                           const struct proxy *defpx, const char *file, int line,
-                                           char **err)
+/* config parser for global "tune.quic.* {on|off}" */
+static int cfg_parse_quic_tune_on_off(char **args, int section_type, struct proxy *curpx,
+                                      const struct proxy *defpx, const char *file, int line,
+                                      char **err)
 {
+       int on;
+       int prefix_len = strlen("tune.quic.");
+       const char *suffix;
+
        if (too_many_args(1, args, err, NULL))
                return -1;
 
        if (strcmp(args[1], "on") == 0)
-               global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_QUIC_SND;
+               on = 1;
        else if (strcmp(args[1], "off") == 0)
-               global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_QUIC_SND;
+               on = 0;
        else {
                memprintf(err, "'%s' expects 'on' or 'off'.", args[0]);
                return -1;
        }
+
+       suffix = args[0] + prefix_len;
+       if (strcmp(suffix, "zero-copy-fwd-send") == 0 ) {
+               if (on)
+                       global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_QUIC_SND;
+               else
+                       global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_QUIC_SND;
+       }
+       else if (strcmp(suffix, "cc-hystart") == 0) {
+               if (on)
+                       global.tune.options |= GTUNE_QUIC_CC_HYSTART;
+               else
+                       global.tune.options &= ~GTUNE_QUIC_CC_HYSTART;
+       }
+
        return 0;
 }
 
 static struct cfg_kw_list cfg_kws = {ILH, {
        { CFG_GLOBAL, "tune.quic.socket-owner", cfg_parse_quic_tune_socket_owner },
        { CFG_GLOBAL, "tune.quic.backend.max-idle-timeou", cfg_parse_quic_time },
+       { CFG_GLOBAL, "tune.quic.cc-hystart", cfg_parse_quic_tune_on_off },
        { CFG_GLOBAL, "tune.quic.frontend.conn-tx-buffers.limit", cfg_parse_quic_tune_setting },
        { CFG_GLOBAL, "tune.quic.frontend.max-streams-bidi", cfg_parse_quic_tune_setting },
        { CFG_GLOBAL, "tune.quic.frontend.max-idle-timeout", cfg_parse_quic_time },
        { CFG_GLOBAL, "tune.quic.max-frame-loss", cfg_parse_quic_tune_setting },
        { CFG_GLOBAL, "tune.quic.reorder-ratio", cfg_parse_quic_tune_setting },
        { CFG_GLOBAL, "tune.quic.retry-threshold", cfg_parse_quic_tune_setting },
-       { CFG_GLOBAL, "tune.quic.zero-copy-fwd-send", cfg_parse_quic_zero_copy_fwd_snd },
+       { CFG_GLOBAL, "tune.quic.zero-copy-fwd-send", cfg_parse_quic_tune_on_off },
        { 0, NULL, NULL }
 }};
 
index d121bdb117b64089527c65e7e87a7e3f3926ab0a..cc10a01c85ea37b5f8f196f8823e822e60892813 100644 (file)
@@ -1,4 +1,6 @@
+#include <haproxy/global-t.h>
 #include <haproxy/quic_cc.h>
+#include <haproxy/quic_cc_hystart.h>
 #include <haproxy/quic_trace.h>
 #include <haproxy/ticks.h>
 #include <haproxy/trace.h>
@@ -79,6 +81,8 @@ struct cubic {
         * in recovery period) (in ms).
         */
        uint32_t recovery_start_time;
+       /* HyStart++ state. */
+       struct quic_hystart hystart;
 };
 
 static void quic_cc_cubic_reset(struct quic_cc *cc)
@@ -96,6 +100,8 @@ static void quic_cc_cubic_reset(struct quic_cc *cc)
        c->last_w_max = 0;
        c->W_est = 0;
        c->recovery_start_time = 0;
+       if (global.tune.options & GTUNE_QUIC_CC_HYSTART)
+               quic_cc_hystart_reset(&c->hystart);
        TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
 }
 
@@ -424,7 +430,25 @@ static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
        TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev);
        switch (ev->type) {
        case QUIC_CC_EVT_ACK:
-               if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) {
+               if (global.tune.options & GTUNE_QUIC_CC_HYSTART) {
+                       struct quic_hystart *h = &c->hystart;
+                       unsigned int acked = QUIC_MIN(ev->ack.acked, HYSTART_LIMIT * path->mtu);
+
+                       if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked)
+                               goto out;
+
+                       path->cwnd += acked;
+                       path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+                       quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt);
+                       if (ev->ack.pn >= h->wnd_end)
+                               h->wnd_end = UINT64_MAX;
+                       if (quic_cc_hystart_may_enter_cs(&c->hystart)) {
+                               /* Exit slow start and enter conservative slow start */
+                               c->state = QUIC_CC_ST_CS;
+                               goto out;
+                       }
+               }
+               else if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) {
                        path->cwnd += ev->ack.acked;
                        path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
                }
@@ -470,6 +494,69 @@ static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev)
        TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
 }
 
+/* Conservative slow start callback. */
+static void quic_cc_cubic_cs_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+       struct quic_cc_path *path = container_of(cc, struct quic_cc_path, cc);
+
+       TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
+       TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev);
+
+       switch (ev->type) {
+       case QUIC_CC_EVT_ACK:
+       {
+               struct cubic *c = quic_cc_priv(cc);
+               struct quic_hystart *h = &c->hystart;
+               unsigned int acked =
+                       QUIC_MIN(ev->ack.acked, HYSTART_LIMIT * path->mtu) / HYSTART_CSS_GROWTH_DIVISOR;
+
+               if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked)
+                       goto out;
+
+               path->cwnd += acked;
+               path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+               quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt);
+               if (quic_cc_hystart_may_reenter_ss(h)) {
+                       /* Exit to slow start */
+                       c->state = QUIC_CC_ST_SS;
+                       goto out;
+               }
+
+               if (h->css_rnd_count >= HYSTART_CSS_ROUNDS) {
+                       /* Exit to congestion avoidance
+                        *
+                        * RFC 9438 4.10. Slow start
+                        *
+                        * When CUBIC uses HyStart++ [RFC9406], it may exit the first slow start
+                        * without incurring any packet loss and thus _W_max_ is undefined. In
+                        * this special case, CUBIC sets _cwnd_prior = cwnd_ and switches to
+                        * congestion avoidance. It then increases its congestion window size
+                        * using Figure 1, where _t_ is the elapsed time since the beginning of
+                        * the current congestion avoidance stage, _K_ is set to 0, and _W_max_
+                        * is set to the congestion window size at the beginning of the current
+                        * congestion avoidance stage.
+                        */
+                       c->last_w_max = path->cwnd;
+                       c->t_epoch = 0;
+                       c->state = QUIC_CC_ST_CA;
+               }
+
+               break;
+       }
+
+       case QUIC_CC_EVT_LOSS:
+               quic_enter_recovery(cc);
+               break;
+       case QUIC_CC_EVT_ECN_CE:
+               /* TODO */
+               break;
+       }
+
+ out:
+       TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc);
+       TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
+}
+
 /* Recovery period callback */
 static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev)
 {
@@ -507,6 +594,7 @@ static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev)
 static void (*quic_cc_cubic_state_cbs[])(struct quic_cc *cc,
                                       struct quic_cc_event *ev) = {
        [QUIC_CC_ST_SS] = quic_cc_cubic_ss_cb,
+       [QUIC_CC_ST_CS] = quic_cc_cubic_cs_cb,
        [QUIC_CC_ST_CA] = quic_cc_cubic_ca_cb,
        [QUIC_CC_ST_RP] = quic_cc_cubic_rp_cb,
 };
@@ -518,6 +606,17 @@ static void quic_cc_cubic_event(struct quic_cc *cc, struct quic_cc_event *ev)
        return quic_cc_cubic_state_cbs[c->state](cc, ev);
 }
 
+static void quic_cc_cubic_hystart_start_round(struct quic_cc *cc, uint64_t pn)
+{
+       struct cubic *c = quic_cc_priv(cc);
+       struct quic_hystart *h = &c->hystart;
+
+       if (c->state != QUIC_CC_ST_SS && c->state != QUIC_CC_ST_CS)
+               return;
+
+       quic_cc_hystart_start_round(h, pn);
+}
+
 static void quic_cc_cubic_state_trace(struct buffer *buf, const struct quic_cc *cc)
 {
        struct quic_cc_path *path;
@@ -538,5 +637,6 @@ struct quic_cc_algo quic_cc_algo_cubic = {
        .init        = quic_cc_cubic_init,
        .event       = quic_cc_cubic_event,
        .slow_start  = quic_cc_cubic_slow_start,
+       .hystart_start_round = quic_cc_cubic_hystart_start_round,
        .state_trace = quic_cc_cubic_state_trace,
 };
index 405b0babccbe524e26b7c3752230e60105d35325..ca298776c43d93ba3cab33fa32e83a73f0decc21 100644 (file)
@@ -196,6 +196,10 @@ static void quic_cc_nr_state_trace(struct buffer *buf, const struct quic_cc *cc)
                      (unsigned long long)path->loss.nb_lost_pkt);
 }
 
+static void quic_cc_nr_hystart_start_round(struct quic_cc *cc, uint64_t pn)
+{
+}
+
 static void (*quic_cc_nr_state_cbs[])(struct quic_cc *cc,
                                       struct quic_cc_event *ev) = {
        [QUIC_CC_ST_SS] = quic_cc_nr_ss_cb,
@@ -215,6 +219,7 @@ struct quic_cc_algo quic_cc_algo_nr = {
        .init        = quic_cc_nr_init,
        .event       = quic_cc_nr_event,
        .slow_start  = quic_cc_nr_slow_start,
+       .hystart_start_round = quic_cc_nr_hystart_start_round,
        .state_trace = quic_cc_nr_state_trace,
 };
 
index c6a23f3ce6e846f0257420351dd4b6cc9ba85c1a..8612c3f00707974658e35b90a151b4b9e31a2993 100644 (file)
@@ -506,6 +506,7 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc,
                        qc_treat_ack_of_ack(qc, &pkt->pktns->rx.arngs, pkt->largest_acked_pn);
                ev.ack.acked = pkt->in_flight_len;
                ev.ack.time_sent = pkt->time_sent;
+               ev.ack.pn = pkt->pn_node.key;
                quic_cc_event(&qc->path->cc, &ev);
                LIST_DEL_INIT(&pkt->list);
                quic_tx_packet_refdec(pkt);
index 306b4c268c7ab8f598e87305066da87a35ad0845..f9f021cfceeac2abc10bdcb58bb3652c3d530573 100644 (file)
@@ -427,6 +427,7 @@ int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx)
                time_sent = now_ms;
 
                for (pkt = first_pkt; pkt; pkt = next_pkt) {
+                       struct quic_cc *cc = &qc->path->cc;
                        /* RFC 9000 14.1 Initial datagram size
                         * a server MUST expand the payload of all UDP datagrams carrying ack-eliciting
                         * Initial packets to at least the smallest allowed maximum datagram size of
@@ -466,6 +467,8 @@ int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx)
                        }
                        qc->path->in_flight += pkt->in_flight_len;
                        pkt->pktns->tx.in_flight += pkt->in_flight_len;
+                       if ((global.tune.options & GTUNE_QUIC_CC_HYSTART) && pkt->pktns == qc->apktns)
+                               cc->algo->hystart_start_round(cc, pkt->pn_node.key);
                        if (pkt->in_flight_len)
                                qc_set_timer(qc);
                        TRACE_PROTO("TX pkt", QUIC_EV_CONN_SPPKTS, qc, pkt);