]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MEDIUM: h1: strictly verify quoting in chunk extensions
authorWilly Tarreau <w@1wt.eu>
Wed, 28 Jan 2026 16:32:36 +0000 (17:32 +0100)
committerWilly Tarreau <w@1wt.eu>
Wed, 28 Jan 2026 17:54:23 +0000 (18:54 +0100)
As reported by Ben Kallus in the following thread:

   https://www.mail-archive.com/haproxy@formilux.org/msg46471.html

there exist some agents which mistakenly accept CRLF inside quoted
chunk extensions, making it possible to fool them by injecting one
extra chunk they won't see for example, or making them miss the end
of the body depending on how it's done. Haproxy, like most other
agents nowadays, doesn't care at all about chunk extensions and just
drops them, in agreement with the spec.

However, as discussed, since chunk extensions are basically never used
except for attacks, and that the cost of just matching quote pairs and
checking backslashed quotes is escape consistency remains relatively
low, it can make sense to add such a check to abort the message parsing
when this situation is encountered. Note that it has to be done at two
places, because there is a fast path and a slow path for chunk parsing.

Also note that it *will* cause transfers using improperly formatted chunk
extensions to fail, but since these are really not used, and that the
likelihood of them being used but improperly quoted certainly is much
lower than the risk of crossing a broken parser on the client's request
path or on the server's response path, we consider the risk as
acceptable. The test is not subject to the configurable parser exceptions
and it's very unlikely that it will ever be needed.

Since this is done in 3.4 which will be LTS, this patch will have to be
backported to 3.3 so that any unlikely trouble gets a chance to be
detected before users upgrade to 3.4.

Thanks to Ben for the discussion, and to Rajat Raghav for sparking it
in the first place even though the original report was mistaken.

Cc: Ben Kallus <benjamin.p.kallus.gr@dartmouth.edu>
Cc: Rajat Raghav <xclow3n@gmail.com>
Cc: Christopher Faulet <cfaulet@haproxy.com>
include/haproxy/h1.h
src/h1_htx.c

index a01b53855d8ab437a4ade401dd8a02007fc5ed36..d77ed1e1f175be1809e1013381bbbeb518fbcb39 100644 (file)
@@ -263,6 +263,8 @@ static inline int h1_parse_chunk_size(const struct buffer *buf, int start, int s
        const char *ptr_old = ptr;
        const char *end = b_wrap(buf);
        uint64_t chunk = 0;
+       int backslash = 0;
+       int quote = 0;
 
        stop -= start; // bytes left
        start = stop;  // bytes to transfer
@@ -327,13 +329,37 @@ static inline int h1_parse_chunk_size(const struct buffer *buf, int start, int s
                        if (--stop == 0)
                                return 0;
 
-                       while (!HTTP_IS_CRLF(*ptr)) {
+                       /* The loop seeks the first CRLF or non-tab CTL char
+                        * and stops there. If a backslash/quote is active,
+                        * it's an error. If none, we assume it's the CRLF
+                        * and go back to the top of the loop checking for
+                        * CR then LF. This way CTLs, lone LF etc are handled
+                        * in the fallback path. This allows to protect
+                        * remotes against their own possibly non-compliant
+                        * chunk-ext parser which could mistakenly skip a
+                        * quoted CRLF. Chunk-ext are not used anyway, except
+                        * by attacks.
+                        */
+                       while (!HTTP_IS_CTL(*ptr) || HTTP_IS_SPHT(*ptr)) {
+                               if (backslash)
+                                       backslash = 0; // escaped char
+                               else if (*ptr == '\\' && quote)
+                                       backslash = 1;
+                               else if (*ptr == '\\') // backslash not permitted outside quotes
+                                       goto error;
+                               else if (*ptr == '"')  // begin/end of quoted-pair
+                                       quote = !quote;
                                if (++ptr >= end)
                                        ptr = b_orig(buf);
                                if (--stop == 0)
                                        return 0;
                        }
-                       /* we have a CRLF now, loop above */
+
+                       /* mismatched quotes / backslashes end here */
+                       if (quote || backslash)
+                               goto error;
+
+                       /* CTLs (CRLF) fall to the common check */
                        continue;
                }
                else
index 1b387d519ae7b4100db7a08b646ad7274dc0e367..1f806a76e37d4714ca9f6c9c1d0c1ca0471a2220 100644 (file)
@@ -724,14 +724,42 @@ static size_t h1_parse_full_contig_chunks(struct h1m *h1m, struct htx **dsthtx,
                                break;
                        }
                        else if (likely(end[ridx] == ';')) {
+                               int backslash = 0;
+                               int quote = 0;
+
                                /* chunk extension, ends at next CRLF */
                                if (!++ridx)
                                        goto end_parsing;
-                               while (!HTTP_IS_CRLF(end[ridx])) {
+
+                               /* The loop seeks the first CRLF or non-tab CTL char
+                                * and stops there. If a backslash/quote is active,
+                                * it's an error. If none, we assume it's the CRLF
+                                * and go back to the top of the loop checking for
+                                * CR then LF. This way CTLs, lone LF etc are handled
+                                * in the fallback path. This allows to protect
+                                * remotes against their own possibly non-compliant
+                                * chunk-ext parser which could mistakenly skip a
+                                * quoted CRLF. Chunk-ext are not used anyway, except
+                                * by attacks.
+                                */
+                               while (!HTTP_IS_CTL(end[ridx]) || HTTP_IS_SPHT(end[ridx])) {
+                                       if (backslash)
+                                               backslash = 0; // escaped char
+                                       else if (end[ridx] == '\\' && quote)
+                                               backslash = 1;
+                                       else if (end[ridx] == '\\') // backslash not permitted outside quotes
+                                               goto parsing_error;
+                                       else if (end[ridx] == '"')  // begin/end of quoted-pair
+                                               quote = !quote;
                                        if (!++ridx)
                                                goto end_parsing;
                                }
-                               /* we have a CRLF now, loop above */
+
+                               /* mismatched quotes / backslashes end here */
+                               if (quote || backslash)
+                                       goto parsing_error;
+
+                               /* CTLs (CRLF) fall to the common check */
                                continue;
                        }
                        else {