]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
BUG/MINOR: quic: do not increase congestion window if app limited
authorAmaury Denoyelle <adenoyelle@haproxy.com>
Thu, 9 Jan 2025 16:26:37 +0000 (17:26 +0100)
committerAmaury Denoyelle <adenoyelle@haproxy.com>
Thu, 23 Jan 2025 13:49:35 +0000 (14:49 +0100)
Previously, congestion window was increased any time each time a new
acknowledge was received. However, it did not take into account the
window filling level. In a network condition with negligible loss, this
will cause the window to be incremented until the maximum value (by
default 480k), even though the application does not have enough data to
fill it.

In most cases, this issue is not noticeable. However, it may lead to
excessive memory consumption when a QUIC connection is suddendly
interrupted, as in this case haproxy will fill the window with
retransmission. It even has caused OOM crash when thousands of clients
were interrupted at once on a local network benchmark.

Fix this by first checking window level prior to every incrementation
via a new helper function quic_cwnd_may_increase(). It was arbitrarily
decided that the window must be at least 50% full when the ACK is
handled prior to increment it. This value is a good compromise to keep
window in check while still allowing fast increment when needed.

Note that this patch only concerns cubic and newreno algorithm. BBR has
already its notion of application limited which ensures the window is
only incremented when necessary.

This should be backported up to 2.6.

include/haproxy/quic_cc.h
src/quic_cc.c
src/quic_cc_cubic.c
src/quic_cc_newreno.c
src/quic_rx.c

index edc22ee989d27b99b1b3415a08262348091177ec..b16996d0c60a4146d7bbfae700952fea7f6982d6 100644 (file)
@@ -117,5 +117,7 @@ static inline size_t quic_cc_path_prep_data(struct quic_cc_path *path)
        return path->cwnd - path->prep_in_flight;
 }
 
+int quic_cwnd_may_increase(const struct quic_cc_path *path);
+
 #endif /* USE_QUIC */
 #endif /* _PROTO_QUIC_CC_H */
index 811a766dd98377c118fe37351e25394a85a0df01..603914cd455a87a57e1bd78a7f6f0e7ba3199b09 100644 (file)
@@ -65,3 +65,22 @@ uint quic_cc_default_pacing_burst(const struct quic_cc *cc)
        struct quic_cc_path *path = container_of(cc, struct quic_cc_path, cc);
        return path->pacing_burst;
 }
+
+/* Returns true if congestion window on path ought to be increased. */
+int quic_cwnd_may_increase(const struct quic_cc_path *path)
+{
+       /* RFC 9002 7.8. Underutilizing the Congestion Window
+        *
+        * When bytes in flight is smaller than the congestion window and
+        * sending is not pacing limited, the congestion window is
+        * underutilized. This can happen due to insufficient application data
+        * or flow control limits. When this occurs, the congestion window
+        * SHOULD NOT be increased in either slow start or congestion avoidance.
+        */
+
+       /* Consider that congestion window can be increased if it is at least
+        * half full or window size is less than 16k. These conditions should
+        * not be restricted too much to prevent slow window growing.
+        */
+       return 2 * path->in_flight >= path->cwnd  || path->cwnd < 16384;
+}
index a1768a151930d7eff7013753073ec61ba6dbcb5a..9783d8f857ba67b5110e3574565dd8dd7ebb8195 100644 (file)
@@ -382,9 +382,11 @@ static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked)
                        inc = W_est_inc;
        }
 
-       path->cwnd += inc;
-       path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
-       path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+       if (quic_cwnd_may_increase(path)) {
+               path->cwnd += inc;
+               path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+               path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+       }
  leave:
        TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
 }
@@ -453,8 +455,10 @@ static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
                        if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked)
                                goto out;
 
-                       path->cwnd += acked;
-                       path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+                       if (quic_cwnd_may_increase(path)) {
+                               path->cwnd += acked;
+                               path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+                       }
                        quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt);
                        if (ev->ack.pn >= h->wnd_end)
                                h->wnd_end = UINT64_MAX;
@@ -465,8 +469,10 @@ static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
                        }
                }
                else if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) {
-                       path->cwnd += ev->ack.acked;
-                       path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+                       if (quic_cwnd_may_increase(path)) {
+                               path->cwnd += ev->ack.acked;
+                               path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+                       }
                }
                /* Exit to congestion avoidance if slow start threshold is reached. */
                if (path->cwnd >= c->ssthresh)
@@ -544,8 +550,10 @@ static void quic_cc_cubic_cs_cb(struct quic_cc *cc, struct quic_cc_event *ev)
                if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked)
                        goto out;
 
-               path->cwnd += acked;
-               path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+               if (quic_cwnd_may_increase(path)) {
+                       path->cwnd += acked;
+                       path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+               }
                quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt);
                if (quic_cc_hystart_may_reenter_ss(h)) {
                        /* Exit to slow start */
index ec63bc3e8e680a9e4449514ebd532d5540564c97..989a7c1057e6574f77168fe1495bee643eab002c 100644 (file)
@@ -86,9 +86,11 @@ static void quic_cc_nr_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
        path = container_of(cc, struct quic_cc_path, cc);
        switch (ev->type) {
        case QUIC_CC_EVT_ACK:
-               path->cwnd += ev->ack.acked;
-               path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
-               path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+               if (quic_cwnd_may_increase(path)) {
+                       path->cwnd += ev->ack.acked;
+                       path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+                       path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+               }
                /* Exit to congestion avoidance if slow start threshold is reached. */
                if (path->cwnd > nr->ssthresh)
                        nr->state = QUIC_CC_ST_CA;
@@ -124,9 +126,11 @@ static void quic_cc_nr_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev)
                 */
                acked = ev->ack.acked * path->mtu + nr->remain_acked;
                nr->remain_acked = acked % path->cwnd;
-               path->cwnd += acked / path->cwnd;
-               path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
-               path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+               if (quic_cwnd_may_increase(path)) {
+                       path->cwnd += acked / path->cwnd;
+                       path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+                       path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+               }
                break;
        }
 
index c3510975c72ad3ff4c127e71675cb4d8acca3ff6..27eebec0dbfd4f748db77f1b46b2e04e2565724a 100644 (file)
@@ -439,7 +439,6 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc,
        list_for_each_entry_safe(pkt, tmp, newly_acked_pkts, list) {
                pkt->pktns->tx.in_flight -= pkt->in_flight_len;
                p->prep_in_flight -= pkt->in_flight_len;
-               p->in_flight -= pkt->in_flight_len;
                if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)
                        p->ifae_pkts--;
                /* If this packet contained an ACK frame, proceed to the
@@ -455,6 +454,7 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc,
                ev.ack.pn = pkt->pn_node.key;
                /* Note that this event is not emitted for BBR. */
                quic_cc_event(&p->cc, &ev);
+               p->in_flight -= pkt->in_flight_len;
                if (drs && (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING))
                        quic_cc_drs_update_rate_sample(drs, pkt, time_ns);
                LIST_DEL_INIT(&pkt->list);