From 2d7e3ddd4a28ef9ffefef44e6aa931d555053a3d Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Mon, 27 Oct 2025 15:48:59 +0100
Subject: [PATCH] BUG/MEDIUM: cli: do not return ACKs one char at a time

Since 3.0 where the CLI started to use rcv_buf, it appears that some
external tools sending chained commands are randomly experiencing
failures. Each time this happens when the whole command is sent as a
single packet, immediately followed by a close. This is not a correct
way to use the CLI but this has been working for ages for simple
netcat-based scripts, so we should at least try to preserve this.

The cause of the failure is that the first LF that acks a command is
immediately sent back to the client and rejected due to the closed
connection. This in turn forwards the error back to the applet which
aborts its processing.

Before 3.0 the responses would be queued into the buffer, then sent
back to the channel, and would all fail at once. This changed when
snd_buf/rcv_buf were implemented because the applets are much more
responsive and since they yield between each command, they can
deliver one ACK at a time that is immediately forwarded down the
chain.

An easy way to observe the problem is to send 5 map updates, a shutdown,
and immediately close via tcploop, and in parallel run a periodic
"show map" to count the number of elements:

  $ tcploop -U /tmp/sock1 C S:"add map #0 1 1; add map #0 2 2; add map #0 3 3; add map #0 4 4; add map #0 5 5\n" F K

Before 3.0, there would always be 5 elements. Since 3.0 and before
20ec1de214 ("MAJOR: cli: Refacor parsing and execution of pipelined
commands"), almost always 2. And since that commit above in 3.2, almost
always one. Doing the same using socat or netcat shows almost always 5...
It's entirely timing-dependent, and might even vary based on the RTT
between the client and haproxy!

The approach taken here consists in doing the same principle as MSG_MORE
or Nagle but on the response buffer: the applet doesn't need to send a
single ACK for each command when it has already been woken up and is
scheduled to come back to work. It's fine (and even desirable) that
ACKs are grouped in a single packet as much as possible.

For this reason, this patch implements APPCTX_CLI_ST1_YIELD, a new CLI
flag which indicates that the applet left in yielding condition, i.e.
it has not finished its work. This flag is used by .rcv_buf to hold
pending data. This way we won't return partial responses for no reason,
and we can continue to emulate the previous behavior.

One very nice benefit to this is that it saves huge amounts of CPU on
the client. In the test below that tries to update 1M map entries, the
CPU used by socat went from 100% to 0% and the total transfer time
dropped by 28%:

  before:
    $ time awk 'BEGIN{ printf "prompt i\n"; for (i=0;i<1000000;i++) { \
         printf "add map #0 %d %d\n",i,i,i }}' | socat /tmp/sock1 - >/dev/null

    real    0m2.407s
    user    0m1.485s
    sys     0m1.682s

  after:
    $ time awk 'BEGIN{ printf "prompt i\n"; for (i=0;i<1000000;i++) { \
         printf "add map #0 %d %d\n",i,i,i }}' | socat /tmp/sock1 - >/dev/null

    real    0m1.721s
    user    0m0.952s
    sys     0m0.057s

The difference is also quite visible on the number of syscalls during
the test (for 1k updates):

  before:
    % time     seconds  usecs/call     calls    errors syscall
    ------ ----------- ----------- --------- --------- ----------------
    100.00    0.071691           0    100001           sendmsg

  after:
    % time     seconds  usecs/call     calls    errors syscall
    ------ ----------- ----------- --------- --------- ----------------
    100.00    0.000011           1         9           sendmsg

This patch will need to be backported to 3.0, and depends on these two
patches to be backported as well:

    MINOR: applet: do not put SE_FL_WANT_ROOM on rcv_buf() if the channel is empty
    MINOR: cli: create cli_raw_rcv_buf() from the generic applet_raw_rcv_buf()
---
 include/haproxy/cli-t.h | 1 +
 src/cli.c               | 9 ++++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/haproxy/cli-t.h b/include/haproxy/cli-t.h
index 45eed35bc..a581f2a94 100644
--- a/include/haproxy/cli-t.h
+++ b/include/haproxy/cli-t.h
@@ -47,6 +47,7 @@
 #define APPCTX_CLI_ST1_INTER   (1 << 3) /* interactive mode (i.e. don't close after 1st cmd) */
 #define APPCTX_CLI_ST1_PROMPT  (1 << 4) /* display prompt */
 #define APPCTX_CLI_ST1_TIMED   (1 << 5) /* display timer in prompt */
+#define APPCTX_CLI_ST1_YIELD   (1 << 6) /* forced yield between commands */
 
 #define CLI_PREFIX_KW_NB 5
 #define CLI_MAX_MATCHES 5
diff --git a/src/cli.c b/src/cli.c
index 059e343d7..dea6c36f1 100644
--- a/src/cli.c
+++ b/src/cli.c
@@ -1087,6 +1087,7 @@ int cli_parse_cmdline(struct appctx *appctx)
  */
 void cli_io_handler(struct appctx *appctx)
 {
+	appctx->st1 &= ~APPCTX_CLI_ST1_YIELD;
 
 	if (unlikely(applet_fl_test(appctx, APPCTX_FL_EOS|APPCTX_FL_ERROR))) {
 		appctx->st0 = CLI_ST_END;
@@ -1282,8 +1283,10 @@ void cli_io_handler(struct appctx *appctx)
 				}
 			}
 
-			if ((b_data(&appctx->inbuf) && appctx->st0 == CLI_ST_PARSE_CMDLINE) || appctx->st0 == CLI_ST_PROCESS_CMDLINE)
+			if ((b_data(&appctx->inbuf) && appctx->st0 == CLI_ST_PARSE_CMDLINE) || appctx->st0 == CLI_ST_PROCESS_CMDLINE) {
+				appctx->st1 |= APPCTX_CLI_ST1_YIELD;
 				appctx_wakeup(appctx);
+			}
 
 			/* this forces us to yield between pipelined commands and
 			 * avoid extremely long latencies (e.g. "del map" etc). In
@@ -3904,6 +3907,10 @@ error:
 
 size_t cli_raw_rcv_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags)
 {
+	/* don't send partial responses, we're just yielding to avoid CPU spikes */
+	if (appctx->st1 & APPCTX_CLI_ST1_YIELD)
+		return 0;
+
 	return b_xfer(buf, &appctx->outbuf, MIN(count, b_data(&appctx->outbuf)));
 }
 
-- 
2.47.3