]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] http: optional insertion-ordered header emission
authorVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 22 May 2026 09:36:36 +0000 (10:36 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 22 May 2026 09:36:36 +0000 (10:36 +0100)
The HTTP client stores headers in a khash and emits them in bucket
order, so the on-the-wire header order is unpredictable. Add an opt-in
RSPAMD_HTTP_FLAG_ORDERED_HEADERS flag: each header is stamped with a
monotonic `order` at insertion time, and when the flag is set the
client serialises headers sorted by that order instead of hash order.

lua_http now accepts a list form for the headers table
({{'name', 'value'}, ...}) which preserves order and sets the flag;
the existing map form and every other caller are byte-identical.

This lets callers reproduce a real browser's exact header order, used
by the url_redirector stealth fingerprint profiles.

src/libserver/http/http_connection.c
src/libserver/http/http_connection.h
src/libserver/http/http_message.c
src/libserver/http/http_private.h
src/lua/lua_http.c

index 2b49711b204c052ed2a502f6f610baf13f32f25d..d9bbce8dae2f81c9de58bfccaa9cbd6961cb49de 100644 (file)
@@ -1565,6 +1565,7 @@ rspamd_http_connection_copy_msg(struct rspamd_http_message *msg, GError **err)
        new_msg->port = msg->port;
        new_msg->date = msg->date;
        new_msg->last_modified = msg->last_modified;
+       new_msg->header_cnt = msg->header_cnt;
 
        kh_foreach_value(msg->headers, hdr, {
                nhdrs = NULL;
@@ -1581,6 +1582,7 @@ rspamd_http_connection_copy_msg(struct rspamd_http_message *msg, GError **err)
                        nhdr->value.begin = nhdr->combined->str +
                                                                (hcur->value.begin - hcur->combined->str);
                        nhdr->value.len = hcur->value.len;
+                       nhdr->order = hcur->order;
                        DL_APPEND(nhdrs, nhdr);
                }
 
@@ -1692,6 +1694,28 @@ void rspamd_http_connection_read_message_shared(struct rspamd_http_connection *c
                                                                                           RSPAMD_HTTP_FLAG_SHMEM);
 }
 
+/*
+ * Comparator for sorting header nodes by their insertion order. Used when
+ * RSPAMD_HTTP_FLAG_ORDERED_HEADERS is set so headers leave on the wire in the
+ * exact order the caller added them, instead of hash bucket order. Each header
+ * gets a unique `order` value, so qsort's instability never bites here.
+ */
+static int
+rspamd_http_header_cmp_order(const void *a, const void *b)
+{
+       const struct rspamd_http_header *ha = *(struct rspamd_http_header *const *) a;
+       const struct rspamd_http_header *hb = *(struct rspamd_http_header *const *) b;
+
+       if (ha->order < hb->order) {
+               return -1;
+       }
+       else if (ha->order > hb->order) {
+               return 1;
+       }
+
+       return 0;
+}
+
 static void
 rspamd_http_connection_encrypt_message(
        struct rspamd_http_connection *conn,
@@ -1746,12 +1770,37 @@ rspamd_http_connection_encrypt_message(
        }
 
 
-       kh_foreach_value (msg->headers, hdr, {
-               DL_FOREACH (hdr, hcur) {
-                       segments[i].data = hcur->combined->str;
-                       segments[i++].len = hcur->combined->len;
+       if (msg->flags & RSPAMD_HTTP_FLAG_ORDERED_HEADERS) {
+               struct rspamd_http_header **hdrs_sorted;
+               unsigned int nhdrs = 0;
+
+               hdrs_sorted = g_malloc(sizeof(*hdrs_sorted) * (hdrcount + 1));
+
+               kh_foreach_value (msg->headers, hdr, {
+                       DL_FOREACH (hdr, hcur) {
+                               hdrs_sorted[nhdrs++] = hcur;
+       }
+});
+
+qsort(hdrs_sorted, nhdrs, sizeof(*hdrs_sorted),
+         rspamd_http_header_cmp_order);
+
+for (unsigned int j = 0; j < nhdrs; j++) {
+       segments[i].data = hdrs_sorted[j]->combined->str;
+       segments[i++].len = hdrs_sorted[j]->combined->len;
+}
+
+g_free(hdrs_sorted);
+}
+else
+{
+               kh_foreach_value (msg->headers, hdr, {
+                       DL_FOREACH (hdr, hcur) {
+                               segments[i].data = hcur->combined->str;
+                               segments[i++].len = hcur->combined->len;
 }
 });
+}
 
 /* crlfp should point now at the second crlf */
 segments[i].data = crlfp;
@@ -2282,6 +2331,7 @@ rspamd_http_connection_write_message_common(struct rspamd_http_connection *conn,
                        hdr->name.len = srch.len;
                        hdr->value.begin = hdr->combined->str + srch.len + 2;
                        hdr->value.len = vlen;
+                       hdr->order = msg->header_cnt++;
                        hdr->prev = hdr; /* for utlists */
 
                        kh_value(msg->headers, k) = hdr;
@@ -2502,12 +2552,37 @@ if (encrypted) {
 else {
        i = 1;
        if (msg->method < HTTP_SYMBOLS) {
+               if (msg->flags & RSPAMD_HTTP_FLAG_ORDERED_HEADERS) {
+                       struct rspamd_http_header **hdrs_sorted;
+                       unsigned int nhdrs = 0;
+
+                       hdrs_sorted = g_malloc(sizeof(*hdrs_sorted) * (hdrcount + 1));
+
+                       kh_foreach_value (msg->headers, hdr, {
+                               DL_FOREACH (hdr, hcur) {
+                                       hdrs_sorted[nhdrs++] = hcur;
+               }
+       });
+
+       qsort(hdrs_sorted, nhdrs, sizeof(*hdrs_sorted),
+                 rspamd_http_header_cmp_order);
+
+       for (unsigned int j = 0; j < nhdrs; j++) {
+               priv->out[i].iov_base = hdrs_sorted[j]->combined->str;
+               priv->out[i++].iov_len = hdrs_sorted[j]->combined->len;
+       }
+
+       g_free(hdrs_sorted);
+}
+else
+{
                        kh_foreach_value (msg->headers, hdr, {
                                DL_FOREACH (hdr, hcur) {
                                        priv->out[i].iov_base = hcur->combined->str;
                                        priv->out[i++].iov_len = hcur->combined->len;
-       }
+}
 });
+}
 
 priv->out[i].iov_base = "\r\n";
 priv->out[i++].iov_len = 2;
index 32a902d5bf7727059b64d457dba65c1d4cfba539..5a5243de7469e5fc4f1ff4014df9391366f224d6 100644 (file)
@@ -87,6 +87,10 @@ struct rspamd_storage_shmem {
  * Message is intended for SSL connection
  */
 #define RSPAMD_HTTP_FLAG_WANT_SSL (1 << 9)
+/**
+ * Emit headers on the wire in insertion order instead of hash order
+ */
+#define RSPAMD_HTTP_FLAG_ORDERED_HEADERS (1 << 10)
 /**
  * Options for HTTP connection
  */
index 92ca8334ca338e3bfdd626b319fab44a2555e240..f1aac919af6bfcf8c892130e3c57f8aa4bf319a2 100644 (file)
@@ -572,6 +572,7 @@ void rspamd_http_message_add_header_len(struct rspamd_http_message *msg,
                hdr->name.len = nlen;
                hdr->value.begin = hdr->combined->str + nlen + 2;
                hdr->value.len = vlen;
+               hdr->order = msg->header_cnt++;
 
                k = kh_put(rspamd_http_headers_hash, msg->headers, &hdr->name,
                                   &r);
@@ -616,6 +617,7 @@ void rspamd_http_message_add_header_fstr(struct rspamd_http_message *msg,
                hdr->name.len = nlen;
                hdr->value.begin = hdr->combined->str + nlen + 2;
                hdr->value.len = vlen;
+               hdr->order = msg->header_cnt++;
 
                k = kh_put(rspamd_http_headers_hash, msg->headers, &hdr->name,
                                   &r);
index bbdeb7e0a44187c3f955c6a3a873afe5e6cbefe6..8613a4366b71a42b176892eeb37c090acb1d6b7e 100644 (file)
@@ -36,6 +36,9 @@ struct rspamd_http_header {
        rspamd_fstring_t *combined;
        rspamd_ftok_t name;
        rspamd_ftok_t value;
+       /* Insertion order, used to emit headers deterministically when
+        * RSPAMD_HTTP_FLAG_ORDERED_HEADERS is set on the message */
+       unsigned int order;
        struct rspamd_http_header *prev, *next;
 };
 
@@ -86,6 +89,8 @@ struct rspamd_http_message {
        int code;
        enum http_method method;
        int flags;
+       /* Monotonic counter stamped onto each header as it is added */
+       unsigned int header_cnt;
        ref_entry_t ref;
 };
 
index 05990544d98668c1c0c655cd631f4114a0657457..5bf43769a2823f4402187f62a469ed7e3b67f9dd 100644 (file)
@@ -686,30 +686,90 @@ lua_http_push_headers(lua_State *L, struct rspamd_http_message *msg)
 {
        const char *name, *value;
        int i, sz;
+       int tbl = lua_gettop(L);
+
+       /*
+        * Two accepted shapes for the headers table:
+        *  - map:  { ['Name'] = 'value', ['Name'] = {'v1', 'v2'} }  -- order undefined
+        *  - list: { {'Name', 'value'}, {'Name', {'v1', 'v2'}} }    -- order preserved
+        * The list shape sets RSPAMD_HTTP_FLAG_ORDERED_HEADERS so the HTTP client
+        * emits headers on the wire in exactly the order they are listed (used to
+        * mimic a real browser's header order). It is detected by a non-nil
+        * integer key 1 whose value is itself a table.
+        */
+       lua_rawgeti(L, tbl, 1);
+       if (lua_type(L, -1) == LUA_TTABLE) {
+               lua_pop(L, 1);
 
-       lua_pushnil(L);
-       while (lua_next(L, -2) != 0) {
-
-               lua_pushvalue(L, -2);
-               name = lua_tostring(L, -1);
-               sz = rspamd_lua_table_size(L, -2);
-               if (sz != 0 && name != NULL) {
-                       for (i = 1; i <= sz; i++) {
-                               lua_rawgeti(L, -2, i);
-                               value = lua_tostring(L, -1);
-                               if (value != NULL) {
-                                       rspamd_http_message_add_header(msg, name, value);
-                               }
+               msg->flags |= RSPAMD_HTTP_FLAG_ORDERED_HEADERS;
+               sz = rspamd_lua_table_size(L, tbl);
+
+               for (i = 1; i <= sz; i++) {
+                       lua_rawgeti(L, tbl, i); /* pair { name, value } */
+
+                       if (lua_type(L, -1) == LUA_TTABLE) {
+                               int pair = lua_gettop(L);
+                               int vsz, j;
+
+                               lua_rawgeti(L, pair, 1);
+                               name = lua_tostring(L, -1);
                                lua_pop(L, 1);
+
+                               lua_rawgeti(L, pair, 2);
+                               vsz = rspamd_lua_table_size(L, -1);
+
+                               if (name != NULL) {
+                                       if (vsz != 0) {
+                                               /* Duplicated header: value is a list of strings */
+                                               for (j = 1; j <= vsz; j++) {
+                                                       lua_rawgeti(L, -1, j);
+                                                       value = lua_tostring(L, -1);
+                                                       if (value != NULL) {
+                                                               rspamd_http_message_add_header(msg, name, value);
+                                                       }
+                                                       lua_pop(L, 1);
+                                               }
+                                       }
+                                       else {
+                                               value = lua_tostring(L, -1);
+                                               if (value != NULL) {
+                                                       rspamd_http_message_add_header(msg, name, value);
+                                               }
+                                       }
+                               }
+                               lua_pop(L, 1); /* value */
                        }
+
+                       lua_pop(L, 1); /* pair */
                }
-               else {
-                       value = lua_tostring(L, -2);
-                       if (name != NULL && value != NULL) {
-                               rspamd_http_message_add_header(msg, name, value);
+       }
+       else {
+               lua_pop(L, 1);
+
+               lua_pushnil(L);
+               while (lua_next(L, tbl) != 0) {
+
+                       lua_pushvalue(L, -2);
+                       name = lua_tostring(L, -1);
+                       sz = rspamd_lua_table_size(L, -2);
+                       if (sz != 0 && name != NULL) {
+                               for (i = 1; i <= sz; i++) {
+                                       lua_rawgeti(L, -2, i);
+                                       value = lua_tostring(L, -1);
+                                       if (value != NULL) {
+                                               rspamd_http_message_add_header(msg, name, value);
+                                       }
+                                       lua_pop(L, 1);
+                               }
+                       }
+                       else {
+                               value = lua_tostring(L, -2);
+                               if (name != NULL && value != NULL) {
+                                       rspamd_http_message_add_header(msg, name, value);
+                               }
                        }
+                       lua_pop(L, 2);
                }
-               lua_pop(L, 2);
        }
 }
 
@@ -729,7 +789,7 @@ lua_http_push_headers(lua_State *L, struct rspamd_http_message *msg)
  * @param {string} url specifies URL for a request in the standard URI form (e.g. 'http://example.com/path')
  * @param {function} callback specifies callback function in format  `function (err_message, code, body, headers)` that is called on HTTP request completion. if this parameter is missing, the function performs "pseudo-synchronous" call (see [Synchronous and Asynchronous API overview](/doc/developers/sync_async.html#API-example-http-module)
  * @param {task} task if called from symbol handler it is generally a good idea to use the common task objects: event base, DNS resolver and events session
- * @param {table} headers optional headers in form `[name='value']` or `[name=['value1', 'value2']]` to duplicate a header with multiple values
+ * @param {table} headers optional headers in form `[name='value']` or `[name=['value1', 'value2']]` to duplicate a header with multiple values. A list form `{{'name', 'value'}, {'name', {'value1', 'value2'}}}` is also accepted and preserves the header order on the wire
  * @param {string} mime_type MIME type of the HTTP content (for example, `text/html`)
  * @param {string/text} body full body content, can be opaque `rspamd{text}` to avoid data copying
  * @param {number} timeout floating point request timeout value in seconds (default is 5.0 seconds)