]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
BUG/MINOR: improve BBR throughput on very fast links
authorFrederic Lecaille <flecaille@haproxy.com>
Wed, 27 Nov 2024 18:39:34 +0000 (19:39 +0100)
committerFrederic Lecaille <flecaille@haproxy.com>
Thu, 28 Nov 2024 20:39:05 +0000 (21:39 +0100)
This patch fixes the loss of information when computing the delivery rate
(quic_cc_drs.c) on links with very low latency due to usage of 32bits
variables with the millisecond as precision.

Initialize the quic_conn task with TASK_F_WANTS_TIME flag ask it to ask
the scheduler to update the call date of this task. This allows this task to get
a nanosecond resolution on the call date calling task_mono_time(). This is enabled
only for congestion control algorithms with delivery rate estimation support
(BBR only at this time).

Store the send date with nanosecond precision of each TX packet into
->time_sent_ns new quic_tx_packet struct member to store the date a packet was
sent in nanoseconds thanks to task_mono_time().

Make use of this new timestamp by the delivery rate estimation algorithm (quic_cc_drs.c).

Rename current ->time_sent member from quic_tx_packet struct to ->time_sent_ms to
distinguish the unit used by this variable (millisecond) and update the code which
uses this variable. The logic found in quic_loss.c is not modified at all.

Must be backported to 3.1.

include/haproxy/quic_cc-t.h
include/haproxy/quic_cc_drs.h
include/haproxy/quic_tx-t.h
src/quic_cc_drs.c
src/quic_conn.c
src/quic_loss.c
src/quic_rx.c
src/quic_tx.c

index c49707533cda2111bcca3bf8eb5990a179f21191..a0dee3918429dbca1659636a6f9591873e83f064 100644 (file)
@@ -91,7 +91,7 @@ struct quic_cc {
        /* <conn> is there only for debugging purpose. */
        struct quic_conn *qc;
        struct quic_cc_algo *algo;
-       uint32_t priv[158];
+       uint32_t priv[160];
 };
 
 struct quic_cc_path {
index fd44078bb073770ef6b021b41a771964760c745b..a910306d5632f8ca20cee114de03d235dfce0b27 100644 (file)
@@ -10,10 +10,10 @@ struct quic_cc_rs {
        uint64_t lost;
        uint64_t prior_lost;
        int64_t last_end_seq;
-       uint32_t interval;
-       uint32_t prior_time;
-       uint32_t send_elapsed;
-       uint32_t ack_elapsed;
+       uint64_t prior_time_ns;
+       uint32_t interval_us;
+       uint32_t send_elapsed_us;
+       uint32_t ack_elapsed_us;
        uint32_t is_app_limited;
 };
 
@@ -26,8 +26,8 @@ struct quic_cc_drs {
        uint64_t delivered;
        uint64_t lost;
        int64_t last_seq;
-       uint32_t delivered_time;
-       uint32_t first_sent_time;
+       uint64_t delivered_time_ns;
+       uint64_t first_sent_time_ns;
        int is_cwnd_limited; /* boolean */
        int app_limited; /* boolean */
 };
@@ -36,6 +36,6 @@ void quic_cc_drs_init(struct quic_cc_drs *drs);
 void quic_cc_drs_on_pkt_sent(struct quic_cc_path *path,
                              struct quic_tx_packet *pkt, struct quic_cc_drs *drs);
 void quic_cc_drs_update_rate_sample(struct quic_cc_drs *drs,
-                                    struct quic_tx_packet *pkt);
+                                    struct quic_tx_packet *pkt, uint64_t time_ns);
 void quic_cc_drs_on_ack_recv(struct quic_cc_drs *drs, struct quic_cc_path *path,
                              uint64_t pkt_delivered);
index cf2d98c587705f0a7f25a5db74955d592d867c74..19ad777edce15ec91479081e8ac093eb51838b77 100644 (file)
@@ -51,7 +51,8 @@ struct quic_tx_packet {
        /* The list of frames of this packet. */
        struct list frms;
        /* The time this packet was sent (ms). */
-       unsigned int time_sent;
+       unsigned int time_sent_ms;
+       uint64_t time_sent_ns;
        /* Packet number spakce. */
        struct quic_pktns *pktns;
        /* Flags. */
@@ -70,8 +71,8 @@ struct quic_tx_packet {
                uint64_t tx_in_flight;
                uint64_t lost;
                int64_t end_seq;
-               uint32_t delivered_time;
-               uint32_t first_sent_time;
+               uint64_t delivered_time_ns;
+               uint64_t first_sent_time_ns;
                int is_app_limited;
        } rs;
        unsigned char type;
index 8187de50a7a7a6334bce49b956c7e7cd429ec762..8e9d3112f336cb85a3c46a92c113548c9c5e9434 100644 (file)
@@ -4,20 +4,21 @@
 #include <haproxy/quic_cc-t.h>
 #include <haproxy/quic_cc_drs.h>
 #include <haproxy/quic_tx-t.h>
+#include <haproxy/task.h>
 #include <haproxy/ticks.h>
 #include <haproxy/window_filter.h>
 
 static void quic_cc_rs_init(struct quic_cc_rs *rs)
 {
-       rs->interval = UINT32_MAX;
+       rs->interval_us = UINT32_MAX;
        rs->delivered = 0;
        rs->prior_delivered = 0;
-       rs->prior_time = TICK_ETERNITY;
+       rs->prior_time_ns = 0;
        rs->tx_in_flight = 0;
        rs->lost = 0;
        rs->prior_lost = 0;
-       rs->send_elapsed = 0;
-       rs->ack_elapsed = 0;
+       rs->send_elapsed_us = 0;
+       rs->ack_elapsed_us = 0;
        rs->last_end_seq = -1;
        rs->is_app_limited = 0;
 }
@@ -31,8 +32,8 @@ void quic_cc_drs_init(struct quic_cc_drs *drs)
        drs->delivered = 0;
        drs->lost = 0;
        drs->last_seq = -1;
-       drs->delivered_time = TICK_ETERNITY;
-       drs->first_sent_time = TICK_ETERNITY;
+       drs->delivered_time_ns = 0;
+       drs->first_sent_time_ns = 0;
        drs->app_limited = 0;
        drs->is_cwnd_limited = 0;
 }
@@ -44,12 +45,12 @@ void quic_cc_drs_on_pkt_sent(struct quic_cc_path *path,
                              struct quic_tx_packet *pkt, struct quic_cc_drs *drs)
 {
        if (!path->in_flight)
-               drs->first_sent_time  = drs->delivered_time = pkt->time_sent;
+               drs->first_sent_time_ns = drs->delivered_time_ns = pkt->time_sent_ns;
 
-       pkt->rs.first_sent_time = drs->first_sent_time;
-       pkt->rs.delivered_time  = drs->delivered_time;
-       pkt->rs.delivered       = drs->delivered;
-       pkt->rs.is_app_limited  = drs->app_limited != 0;
+       pkt->rs.first_sent_time_ns = drs->first_sent_time_ns;
+       pkt->rs.delivered_time_ns  = drs->delivered_time_ns;
+       pkt->rs.delivered          = drs->delivered;
+       pkt->rs.is_app_limited     = drs->app_limited != 0;
 
        pkt->rs.tx_in_flight = path->in_flight + pkt->len;
        pkt->rs.lost = drs->lost;
@@ -62,8 +63,8 @@ void quic_cc_drs_on_pkt_sent(struct quic_cc_path *path,
 static inline int quic_cc_drs_is_newest_packet(struct quic_cc_drs *drs,
                                                struct quic_tx_packet *pkt)
 {
-       return tick_is_lt(drs->first_sent_time, pkt->time_sent) ||
-               (pkt->time_sent == drs->first_sent_time &&
+       return drs->first_sent_time_ns < pkt->time_sent_ns ||
+               (pkt->time_sent_ns == drs->first_sent_time_ns &&
                 pkt->rs.end_seq > drs->rs.last_end_seq);
 }
 
@@ -83,32 +84,32 @@ static inline int quic_cc_drs_is_newest_packet(struct quic_cc_drs *drs,
  * RFC UpdateRateSample() called from first part of GenerateRateSample().
  */
 void quic_cc_drs_update_rate_sample(struct quic_cc_drs *drs,
-                                    struct quic_tx_packet *pkt)
+                                    struct quic_tx_packet *pkt, uint64_t time_ns)
 {
        struct quic_cc_rs *rs = &drs->rs;
 
-       if (!tick_isset(pkt->rs.delivered_time))
+       if (!pkt->rs.delivered_time_ns)
                return;
 
        drs->delivered += pkt->len;
-       drs->delivered_time = now_ms;
+       drs->delivered_time_ns = time_ns;
        /* Update info using the newest packet. */
-       if (tick_isset(rs->prior_time) && !quic_cc_drs_is_newest_packet(drs, pkt))
+       if (rs->prior_time_ns && !quic_cc_drs_is_newest_packet(drs, pkt))
                return;
 
        rs->prior_delivered  = pkt->rs.delivered;
-       rs->prior_time       = pkt->rs.delivered_time;
+       rs->prior_time_ns    = pkt->rs.delivered_time_ns;
        rs->is_app_limited   = pkt->rs.is_app_limited;
-       rs->send_elapsed     = pkt->time_sent - pkt->rs.first_sent_time;
-       rs->ack_elapsed      = drs->delivered_time - pkt->rs.delivered_time;
+       rs->send_elapsed_us  = (pkt->time_sent_ns - pkt->rs.first_sent_time_ns) / 1000;
+       rs->ack_elapsed_us   = (drs->delivered_time_ns - pkt->rs.delivered_time_ns) / 1000;
        rs->tx_in_flight     = pkt->rs.tx_in_flight;
        rs->prior_lost       = pkt->rs.lost;
        rs->last_end_seq     = pkt->rs.end_seq;
-       drs->first_sent_time = pkt->time_sent;
+       drs->first_sent_time_ns = pkt->time_sent_ns;
        /* Mark the packet as delivered once it's SACKed to
         * avoid being used again when it's cumulatively acked.
         */
-       pkt->rs.delivered_time = TICK_ETERNITY;
+       pkt->rs.delivered_time_ns = 0;
 }
 
 /* RFC https://datatracker.ietf.org/doc/draft-ietf-ccwg-bbr/
@@ -131,25 +132,26 @@ void quic_cc_drs_on_ack_recv(struct quic_cc_drs *drs, struct quic_cc_path *path,
                ++drs->round_count;
        }
 
-       if (!tick_isset(rs->prior_time))
+       if (!rs->prior_time_ns)
                return;
 
-       rs->interval = MAX(rs->send_elapsed, rs->ack_elapsed);
+       rs->interval_us = MAX(rs->send_elapsed_us, rs->ack_elapsed_us);
 
        BUG_ON(drs->delivered <= rs->prior_delivered);
        rs->delivered = drs->delivered - rs->prior_delivered;
        BUG_ON(drs->lost < rs->prior_lost);
        rs->lost = drs->lost - rs->prior_lost;
 
-       if (rs->interval < path->loss.rtt_min) {
-               rs->interval = UINT32_MAX;
+       if (rs->interval_us < path->loss.rtt_min * 1000) {
+               rs->interval_us = UINT32_MAX;
                return;
        }
 
-       if (!rs->interval)
+       if (!rs->interval_us)
                return;
 
-       rate = rs->delivered * 1000 / rs->interval;
+       /* <rate> is in bytes/s. */
+       rate = rs->delivered * 1000000 / rs->interval_us;
        if (rate >= wf_get_max(&drs->wf) || !drs->app_limited)
                path->delivery_rate = wf_max_update(&drs->wf, rate, drs->round_count);
 }
index a0ff0c331a22909ae2268852eddd0a7a2a58f704..901a57a423abe264f8a3206db4c1a8aa7f8e6b4d 100644 (file)
@@ -1247,6 +1247,11 @@ struct quic_conn *qc_new_conn(const struct quic_version *qv, int ipv4,
        }
        qc->wait_event.tasklet->process = quic_conn_io_cb;
        qc->wait_event.tasklet->context = qc;
+       /* Enable TASK_F_WANTS_TIME task flag for congestion control algorithms with
+        * delivery rate estimation only.
+        */
+       if (qc->path->cc.algo->get_drs)
+               qc->wait_event.tasklet->state |= TASK_F_WANTS_TIME;
        qc->wait_event.events = 0;
        qc->subs = NULL;
 
index 592b613c750bffc5bfc51c83c4d7e7f52def2e99..affa6f2064097c9c1cd2075d1642be4e68ad1e9e 100644 (file)
@@ -206,7 +206,7 @@ void qc_packet_loss_lookup(struct quic_pktns *pktns, struct quic_conn *qc,
                if ((int64_t)pkt->pn_node.key > largest_acked_pn)
                        break;
 
-               time_sent = pkt->time_sent;
+               time_sent = pkt->time_sent_ms;
                loss_time_limit = tick_add(time_sent, loss_delay);
 
                reordered = (int64_t)largest_acked_pn >= pkt->pn_node.key + pktthresh;
@@ -290,12 +290,12 @@ int qc_release_lost_pkts(struct quic_conn *qc, struct quic_pktns *pktns,
                        struct quic_cc_event ev = { };
 
                        ev.type = QUIC_CC_EVT_LOSS;
-                       ev.loss.time_sent = newest_lost->time_sent;
+                       ev.loss.time_sent = newest_lost->time_sent_ms;
                        ev.loss.count = tot_lost;
 
                        quic_cc_event(cc, &ev);
                        if (cc->algo->congestion_event)
-                           cc->algo->congestion_event(cc, newest_lost->time_sent);
+                           cc->algo->congestion_event(cc, newest_lost->time_sent_ms);
                }
 
                /* If an RTT have been already sampled, <rtt_min> has been set.
@@ -304,7 +304,7 @@ int qc_release_lost_pkts(struct quic_conn *qc, struct quic_pktns *pktns,
                 * slow start state.
                 */
                if (qc->path->loss.rtt_min && newest_lost != oldest_lost) {
-                       unsigned int period = newest_lost->time_sent - oldest_lost->time_sent;
+                       unsigned int period = newest_lost->time_sent_ms - oldest_lost->time_sent_ms;
 
                        if (quic_loss_persistent_congestion(&qc->path->loss, period,
                                                            now_ms, qc->max_ack_delay) &&
index 9d138555d6572bf6509c08b09e4aaa21b818f6f2..b675e720b8574dfa3e8055065910b928902cb067 100644 (file)
@@ -432,9 +432,13 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc,
        struct quic_cc_drs *drs =
                p->cc.algo->get_drs ? p->cc.algo->get_drs(&p->cc) : NULL;
        unsigned int bytes_delivered = 0, pkt_delivered = 0;
+       uint64_t time_ns;
 
        TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
 
+       if (drs)
+               time_ns = task_mono_time();
+
        list_for_each_entry_safe(pkt, tmp, newly_acked_pkts, list) {
                pkt->pktns->tx.in_flight -= pkt->in_flight_len;
                p->prep_in_flight -= pkt->in_flight_len;
@@ -450,12 +454,12 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc,
                bytes_delivered += pkt->len;
                pkt_delivered = pkt->rs.delivered;
                ev.ack.acked = pkt->in_flight_len;
-               ev.ack.time_sent = pkt->time_sent;
+               ev.ack.time_sent = pkt->time_sent_ms;
                ev.ack.pn = pkt->pn_node.key;
                /* Note that this event is not emitted for BBR. */
                quic_cc_event(&p->cc, &ev);
                if (drs && (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING))
-                       quic_cc_drs_update_rate_sample(drs, pkt);
+                       quic_cc_drs_update_rate_sample(drs, pkt, time_ns);
                LIST_DEL_INIT(&pkt->list);
                quic_tx_packet_refdec(pkt);
        }
@@ -523,7 +527,7 @@ static int qc_parse_ack_frm(struct quic_conn *qc,
                }
                else {
                        time_sent = eb64_entry(largest_node,
-                                              struct quic_tx_packet, pn_node)->time_sent;
+                                              struct quic_tx_packet, pn_node)->time_sent_ms;
                        new_largest_acked_pn = 1;
                }
        }
index 14cf2bb43508a118f7a3ac4a173dfef5d094e1bd..19bcad018a4fe90c4abf755814d9971e5ebfc4e8 100644 (file)
@@ -295,7 +295,8 @@ static int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx)
                struct buffer tmpbuf = { };
                struct quic_tx_packet *first_pkt, *pkt, *next_pkt;
                uint16_t dglen, gso = 0, gso_fallback = 0;
-               unsigned int time_sent;
+               uint64_t time_sent_ns;
+               unsigned int time_sent_ms;
 
                pos = (unsigned char *)b_head(buf);
                dglen = read_u16(pos);
@@ -360,7 +361,8 @@ static int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx)
 
                b_del(buf, dglen + QUIC_DGRAM_HEADLEN);
                qc->bytes.tx += tmpbuf.data;
-               time_sent = now_ms;
+               time_sent_ms = now_ms;
+               time_sent_ns = task_mono_time();
 
                for (pkt = first_pkt; pkt; pkt = next_pkt) {
                        struct quic_cc *cc = &qc->path->cc;
@@ -391,9 +393,10 @@ static int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx)
 
                        qc->cntrs.sent_pkt++;
 
-                       pkt->time_sent = time_sent;
+                       pkt->time_sent_ns = time_sent_ns;
+                       pkt->time_sent_ms = time_sent_ms;
                        if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) {
-                               pkt->pktns->tx.time_of_last_eliciting = time_sent;
+                               pkt->pktns->tx.time_of_last_eliciting = time_sent_ms;
                                qc->path->ifae_pkts++;
                                if (qc->flags & QUIC_FL_CONN_IDLE_TIMER_RESTARTED_AFTER_READ)
                                        qc_idle_timer_rearm(qc, 0, 0);
@@ -2202,7 +2205,8 @@ static inline void quic_tx_packet_init(struct quic_tx_packet *pkt, int type)
        pkt->in_flight_len = 0;
        pkt->pn_node.key = (uint64_t)-1;
        LIST_INIT(&pkt->frms);
-       pkt->time_sent = TICK_ETERNITY;
+       pkt->time_sent_ms = TICK_ETERNITY;
+       pkt->time_sent_ns = 0;
        pkt->next = NULL;
        pkt->prev = NULL;
        pkt->largest_acked_pn = -1;