]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MEDIUM: mux-h2: do not needlessly refrain from sending data early
authorWilly Tarreau <w@1wt.eu>
Wed, 29 Oct 2025 16:38:07 +0000 (17:38 +0100)
committerWilly Tarreau <w@1wt.eu>
Thu, 30 Oct 2025 17:16:54 +0000 (18:16 +0100)
The mux currently refrains from sending data before H2_CS_FRAME_H, i.e.
before the peer's SETTINGS frame was received. While it makes sense on
the frontend, it's causing harm on the backend because it forces the
first request to be sent in two halves over an extra RTT: first the
preface and settings, second the request once the settings are received.
This is totally contrary to the philosophy of the H2 protocol, consisting
in permitting the client to send as soon as possible.

Actually what happens is the following:
  - process_stream() calls connect_server()
  - connect_server() creates a connection, and if the proto/alpn is guessed
    or known, the mux is instantiated for the current request.
  - the H2 init code wakes the h2 tasklet up and returns
  - process_stream() tries to send the request using h2_snd_buf(), but that
    one sees that we're before H2_CS_FRAME_H, refrains from doing so and
    returns.
  - process_stream() subscribes and quits
  - the h2 tasklet can now execute to send the preface and settings, which
    leave as a first TCP segment. The connection is ready.
  - the iocb is woken again once the server's SETTINGS frame is received,
    turning the connection to the H2_CS_FRAME_H state, and the iocb wake
    up process_stream().
  - process_stream() executes again and can try to send again.
  - h2_snd_buf() is called and finally sends the request as a second TCP
    segment.

Not only this is inefficient, but it also renders 0-RTT and TFO impossible
on H2 connections. When 0-RTT is used, only the preface and settings leave
as early data (the very first data of that connection), which is totally
pointless.

In order to fix this, we have to go through a few steps:
  - first we need to let data be sent to a server immediately after the
    SETTINGS frame was sent (i.e. in H2_CS_SETTINGS1 state instead of
    H2_CS_FRAME_H). However, some protocol extensions are advertised by
    the server using SETTINGS (e.g. RFC8441) and some requests might need
    to know the existence of such extensions. For this reason we're adding
    a new h2c flag, H2_CF_SETTINGS_NEEDED, which indicates that some
    operations were not done because a server's SETTINGS frame is needed.
    This is set when trying to send a protocol upgrade or extended CONNECT
    during H2_CS_SETTINGS1, indicating that it's needed to wait for
    H2_CS_FRAME_H in this case. The flag is always set on frontend
    connections. This is what is being done in this patch.

  - second, we need to be able to push the preface opportunistically with
    the first h2_snd_buf() so that it's not needed to wake the tasklet up
    just to send that and wake process_stream() again. This will be in a
    separate patch.

By doing the first step, we're at least saving one needless tasklet
wakeup per connection (~9%), which results in ~5% backend connection
rate increase.

include/haproxy/mux_h2-t.h
src/mux_h2.c

index a16085987d82a8a174c8ce430efc6b4deb18796d..fd8f5abd8bbe2d75ed17e24064e4ed7ecdbcefe0 100644 (file)
@@ -62,6 +62,7 @@
 #define H2_CF_RCVD_SHUT         0x00020000  // a recv() attempt already failed on a shutdown
 #define H2_CF_END_REACHED       0x00040000  // pending data too short with RCVD_SHUT present
 
+#define H2_CF_SETTINGS_NEEDED   0x00080000  // can't proceed without knowing settings (frontend or extensions)
 #define H2_CF_RCVD_RFC8441      0x00100000  // settings from RFC8441 has been received indicating support for Extended CONNECT
 #define H2_CF_SHTS_UPDATED      0x00200000  // SETTINGS_HEADER_TABLE_SIZE updated
 #define H2_CF_DTSU_EMITTED      0x00400000  // HPACK Dynamic Table Size Update opcode emitted
index 406a953bdfd6e07e8a0fa9ee422edaeb2fcd3381..f8b1ca661cbc1a0ba23c1ecd9a5f528aabdf7f41 100644 (file)
@@ -1315,6 +1315,7 @@ static int h2_init(struct connection *conn, struct proxy *prx, struct session *s
                                                      &h2_stats_module);
        } else {
                h2c->flags = H2_CF_NONE;
+               h2c->flags |= H2_CF_SETTINGS_NEEDED;
                h2c->shut_timeout = h2c->timeout = prx->timeout.client;
                if (tick_isset(prx->timeout.clientfin))
                        h2c->shut_timeout = prx->timeout.clientfin;
@@ -2847,6 +2848,7 @@ static int h2c_handle_settings(struct h2c *h2c)
 
        /* need to ACK this frame now */
        h2c->st0 = H2_CS_FRAME_A;
+       h2c->flags &= ~H2_CF_SETTINGS_NEEDED;
  done:
        TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_SETTINGS, h2c->conn);
        return 1;
@@ -4648,7 +4650,7 @@ static int h2_process_mux(struct h2c *h2c)
 {
        TRACE_ENTER(H2_EV_H2C_WAKE, h2c->conn);
 
-       if (unlikely(h2c->st0 < H2_CS_FRAME_H)) {
+       if (unlikely(h2c->st0 < (h2c->flags & H2_CF_SETTINGS_NEEDED ? H2_CS_FRAME_H : H2_CS_SETTINGS1))) {
                if (unlikely(h2c->st0 == H2_CS_PREFACE && (h2c->flags & H2_CF_IS_BACK))) {
                        if (unlikely(h2c_bck_send_preface(h2c) <= 0)) {
                                /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
@@ -4659,7 +4661,7 @@ static int h2_process_mux(struct h2c *h2c)
                        h2c->st0 = H2_CS_SETTINGS1;
                }
                /* need to wait for the other side */
-               if (h2c->st0 < H2_CS_FRAME_H)
+               if (h2c->st0 < (h2c->flags & H2_CF_SETTINGS_NEEDED ? H2_CS_FRAME_H : H2_CS_SETTINGS1))
                        goto done;
        }
 
@@ -6698,6 +6700,14 @@ static size_t h2s_snd_bhdrs(struct h2s *h2s, struct htx *htx)
                                /* rfc 7230 #6.1 Connection = list of tokens */
                                struct ist connection_ist = list[hdr].v;
 
+                               if (h2c->st0 < H2_CS_FRAME_H) {
+                                       /* this feature is negotiated, can't proceed without
+                                        * the server's settings.
+                                        */
+                                       h2c->flags |= H2_CF_SETTINGS_NEEDED;
+                                       goto end;
+                               }
+
                                if (!(sl->flags & HTX_SL_F_BODYLESS)) {
                                        TRACE_STATE("cannot convert upgrade for request with payload", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
                                        goto fail;
@@ -7845,7 +7855,7 @@ static size_t h2_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, in
        }
        h2s->flags &= ~H2_SF_NOTIFIED;
 
-       if (h2s->h2c->st0 < H2_CS_FRAME_H) {
+       if (h2s->h2c->st0 < ((h2s->h2c->flags & H2_CF_SETTINGS_NEEDED) ? H2_CS_FRAME_H : H2_CS_SETTINGS1)) {
                TRACE_DEVEL("connection not ready, leaving", H2_EV_H2S_SEND|H2_EV_H2S_BLK, h2s->h2c->conn, h2s);
                return 0;
        }
@@ -7894,6 +7904,10 @@ static size_t h2_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, in
                                        if (ret < bsize)
                                                goto done;
                                }
+                               else if ((h2s->h2c->flags & H2_CF_SETTINGS_NEEDED) && h2s->h2c->st0 < H2_CS_FRAME_H) {
+                                       /* cannot proceed further */
+                                       goto done;
+                               }
                                break;
 
                        case HTX_BLK_RES_SL: