From b0e8edaef2fd37dcb9df7b03be7cac90996c59bc Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 29 Oct 2025 17:38:07 +0100 Subject: [PATCH] MEDIUM: mux-h2: do not needlessly refrain from sending data early The mux currently refrains from sending data before H2_CS_FRAME_H, i.e. before the peer's SETTINGS frame was received. While it makes sense on the frontend, it's causing harm on the backend because it forces the first request to be sent in two halves over an extra RTT: first the preface and settings, second the request once the settings are received. This is totally contrary to the philosophy of the H2 protocol, consisting in permitting the client to send as soon as possible. Actually what happens is the following: - process_stream() calls connect_server() - connect_server() creates a connection, and if the proto/alpn is guessed or known, the mux is instantiated for the current request. - the H2 init code wakes the h2 tasklet up and returns - process_stream() tries to send the request using h2_snd_buf(), but that one sees that we're before H2_CS_FRAME_H, refrains from doing so and returns. - process_stream() subscribes and quits - the h2 tasklet can now execute to send the preface and settings, which leave as a first TCP segment. The connection is ready. - the iocb is woken again once the server's SETTINGS frame is received, turning the connection to the H2_CS_FRAME_H state, and the iocb wake up process_stream(). - process_stream() executes again and can try to send again. - h2_snd_buf() is called and finally sends the request as a second TCP segment. Not only this is inefficient, but it also renders 0-RTT and TFO impossible on H2 connections. When 0-RTT is used, only the preface and settings leave as early data (the very first data of that connection), which is totally pointless. In order to fix this, we have to go through a few steps: - first we need to let data be sent to a server immediately after the SETTINGS frame was sent (i.e. in H2_CS_SETTINGS1 state instead of H2_CS_FRAME_H). However, some protocol extensions are advertised by the server using SETTINGS (e.g. RFC8441) and some requests might need to know the existence of such extensions. For this reason we're adding a new h2c flag, H2_CF_SETTINGS_NEEDED, which indicates that some operations were not done because a server's SETTINGS frame is needed. This is set when trying to send a protocol upgrade or extended CONNECT during H2_CS_SETTINGS1, indicating that it's needed to wait for H2_CS_FRAME_H in this case. The flag is always set on frontend connections. This is what is being done in this patch. - second, we need to be able to push the preface opportunistically with the first h2_snd_buf() so that it's not needed to wake the tasklet up just to send that and wake process_stream() again. This will be in a separate patch. By doing the first step, we're at least saving one needless tasklet wakeup per connection (~9%), which results in ~5% backend connection rate increase. --- include/haproxy/mux_h2-t.h | 1 + src/mux_h2.c | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/include/haproxy/mux_h2-t.h b/include/haproxy/mux_h2-t.h index a16085987..fd8f5abd8 100644 --- a/include/haproxy/mux_h2-t.h +++ b/include/haproxy/mux_h2-t.h @@ -62,6 +62,7 @@ #define H2_CF_RCVD_SHUT 0x00020000 // a recv() attempt already failed on a shutdown #define H2_CF_END_REACHED 0x00040000 // pending data too short with RCVD_SHUT present +#define H2_CF_SETTINGS_NEEDED 0x00080000 // can't proceed without knowing settings (frontend or extensions) #define H2_CF_RCVD_RFC8441 0x00100000 // settings from RFC8441 has been received indicating support for Extended CONNECT #define H2_CF_SHTS_UPDATED 0x00200000 // SETTINGS_HEADER_TABLE_SIZE updated #define H2_CF_DTSU_EMITTED 0x00400000 // HPACK Dynamic Table Size Update opcode emitted diff --git a/src/mux_h2.c b/src/mux_h2.c index 406a953bd..f8b1ca661 100644 --- a/src/mux_h2.c +++ b/src/mux_h2.c @@ -1315,6 +1315,7 @@ static int h2_init(struct connection *conn, struct proxy *prx, struct session *s &h2_stats_module); } else { h2c->flags = H2_CF_NONE; + h2c->flags |= H2_CF_SETTINGS_NEEDED; h2c->shut_timeout = h2c->timeout = prx->timeout.client; if (tick_isset(prx->timeout.clientfin)) h2c->shut_timeout = prx->timeout.clientfin; @@ -2847,6 +2848,7 @@ static int h2c_handle_settings(struct h2c *h2c) /* need to ACK this frame now */ h2c->st0 = H2_CS_FRAME_A; + h2c->flags &= ~H2_CF_SETTINGS_NEEDED; done: TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_SETTINGS, h2c->conn); return 1; @@ -4648,7 +4650,7 @@ static int h2_process_mux(struct h2c *h2c) { TRACE_ENTER(H2_EV_H2C_WAKE, h2c->conn); - if (unlikely(h2c->st0 < H2_CS_FRAME_H)) { + if (unlikely(h2c->st0 < (h2c->flags & H2_CF_SETTINGS_NEEDED ? H2_CS_FRAME_H : H2_CS_SETTINGS1))) { if (unlikely(h2c->st0 == H2_CS_PREFACE && (h2c->flags & H2_CF_IS_BACK))) { if (unlikely(h2c_bck_send_preface(h2c) <= 0)) { /* RFC7540#3.5: a GOAWAY frame MAY be omitted */ @@ -4659,7 +4661,7 @@ static int h2_process_mux(struct h2c *h2c) h2c->st0 = H2_CS_SETTINGS1; } /* need to wait for the other side */ - if (h2c->st0 < H2_CS_FRAME_H) + if (h2c->st0 < (h2c->flags & H2_CF_SETTINGS_NEEDED ? H2_CS_FRAME_H : H2_CS_SETTINGS1)) goto done; } @@ -6698,6 +6700,14 @@ static size_t h2s_snd_bhdrs(struct h2s *h2s, struct htx *htx) /* rfc 7230 #6.1 Connection = list of tokens */ struct ist connection_ist = list[hdr].v; + if (h2c->st0 < H2_CS_FRAME_H) { + /* this feature is negotiated, can't proceed without + * the server's settings. + */ + h2c->flags |= H2_CF_SETTINGS_NEEDED; + goto end; + } + if (!(sl->flags & HTX_SL_F_BODYLESS)) { TRACE_STATE("cannot convert upgrade for request with payload", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s); goto fail; @@ -7845,7 +7855,7 @@ static size_t h2_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, in } h2s->flags &= ~H2_SF_NOTIFIED; - if (h2s->h2c->st0 < H2_CS_FRAME_H) { + if (h2s->h2c->st0 < ((h2s->h2c->flags & H2_CF_SETTINGS_NEEDED) ? H2_CS_FRAME_H : H2_CS_SETTINGS1)) { TRACE_DEVEL("connection not ready, leaving", H2_EV_H2S_SEND|H2_EV_H2S_BLK, h2s->h2c->conn, h2s); return 0; } @@ -7894,6 +7904,10 @@ static size_t h2_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, in if (ret < bsize) goto done; } + else if ((h2s->h2c->flags & H2_CF_SETTINGS_NEEDED) && h2s->h2c->st0 < H2_CS_FRAME_H) { + /* cannot proceed further */ + goto done; + } break; case HTX_BLK_RES_SL: -- 2.47.3