From: Vsevolod Stakhov Date: Fri, 22 May 2026 09:36:36 +0000 (+0100) Subject: [Feature] http: optional insertion-ordered header emission X-Git-Tag: 4.1.0~33^2~1 X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=f7720651eddaed87626753bf7f7283bd5efd9902;p=thirdparty%2Frspamd.git [Feature] http: optional insertion-ordered header emission The HTTP client stores headers in a khash and emits them in bucket order, so the on-the-wire header order is unpredictable. Add an opt-in RSPAMD_HTTP_FLAG_ORDERED_HEADERS flag: each header is stamped with a monotonic `order` at insertion time, and when the flag is set the client serialises headers sorted by that order instead of hash order. lua_http now accepts a list form for the headers table ({{'name', 'value'}, ...}) which preserves order and sets the flag; the existing map form and every other caller are byte-identical. This lets callers reproduce a real browser's exact header order, used by the url_redirector stealth fingerprint profiles. --- diff --git a/src/libserver/http/http_connection.c b/src/libserver/http/http_connection.c index 2b49711b20..d9bbce8dae 100644 --- a/src/libserver/http/http_connection.c +++ b/src/libserver/http/http_connection.c @@ -1565,6 +1565,7 @@ rspamd_http_connection_copy_msg(struct rspamd_http_message *msg, GError **err) new_msg->port = msg->port; new_msg->date = msg->date; new_msg->last_modified = msg->last_modified; + new_msg->header_cnt = msg->header_cnt; kh_foreach_value(msg->headers, hdr, { nhdrs = NULL; @@ -1581,6 +1582,7 @@ rspamd_http_connection_copy_msg(struct rspamd_http_message *msg, GError **err) nhdr->value.begin = nhdr->combined->str + (hcur->value.begin - hcur->combined->str); nhdr->value.len = hcur->value.len; + nhdr->order = hcur->order; DL_APPEND(nhdrs, nhdr); } @@ -1692,6 +1694,28 @@ void rspamd_http_connection_read_message_shared(struct rspamd_http_connection *c RSPAMD_HTTP_FLAG_SHMEM); } +/* + * Comparator for sorting header nodes by their insertion order. Used when + * RSPAMD_HTTP_FLAG_ORDERED_HEADERS is set so headers leave on the wire in the + * exact order the caller added them, instead of hash bucket order. Each header + * gets a unique `order` value, so qsort's instability never bites here. + */ +static int +rspamd_http_header_cmp_order(const void *a, const void *b) +{ + const struct rspamd_http_header *ha = *(struct rspamd_http_header *const *) a; + const struct rspamd_http_header *hb = *(struct rspamd_http_header *const *) b; + + if (ha->order < hb->order) { + return -1; + } + else if (ha->order > hb->order) { + return 1; + } + + return 0; +} + static void rspamd_http_connection_encrypt_message( struct rspamd_http_connection *conn, @@ -1746,12 +1770,37 @@ rspamd_http_connection_encrypt_message( } - kh_foreach_value (msg->headers, hdr, { - DL_FOREACH (hdr, hcur) { - segments[i].data = hcur->combined->str; - segments[i++].len = hcur->combined->len; + if (msg->flags & RSPAMD_HTTP_FLAG_ORDERED_HEADERS) { + struct rspamd_http_header **hdrs_sorted; + unsigned int nhdrs = 0; + + hdrs_sorted = g_malloc(sizeof(*hdrs_sorted) * (hdrcount + 1)); + + kh_foreach_value (msg->headers, hdr, { + DL_FOREACH (hdr, hcur) { + hdrs_sorted[nhdrs++] = hcur; + } +}); + +qsort(hdrs_sorted, nhdrs, sizeof(*hdrs_sorted), + rspamd_http_header_cmp_order); + +for (unsigned int j = 0; j < nhdrs; j++) { + segments[i].data = hdrs_sorted[j]->combined->str; + segments[i++].len = hdrs_sorted[j]->combined->len; +} + +g_free(hdrs_sorted); +} +else +{ + kh_foreach_value (msg->headers, hdr, { + DL_FOREACH (hdr, hcur) { + segments[i].data = hcur->combined->str; + segments[i++].len = hcur->combined->len; } }); +} /* crlfp should point now at the second crlf */ segments[i].data = crlfp; @@ -2282,6 +2331,7 @@ rspamd_http_connection_write_message_common(struct rspamd_http_connection *conn, hdr->name.len = srch.len; hdr->value.begin = hdr->combined->str + srch.len + 2; hdr->value.len = vlen; + hdr->order = msg->header_cnt++; hdr->prev = hdr; /* for utlists */ kh_value(msg->headers, k) = hdr; @@ -2502,12 +2552,37 @@ if (encrypted) { else { i = 1; if (msg->method < HTTP_SYMBOLS) { + if (msg->flags & RSPAMD_HTTP_FLAG_ORDERED_HEADERS) { + struct rspamd_http_header **hdrs_sorted; + unsigned int nhdrs = 0; + + hdrs_sorted = g_malloc(sizeof(*hdrs_sorted) * (hdrcount + 1)); + + kh_foreach_value (msg->headers, hdr, { + DL_FOREACH (hdr, hcur) { + hdrs_sorted[nhdrs++] = hcur; + } + }); + + qsort(hdrs_sorted, nhdrs, sizeof(*hdrs_sorted), + rspamd_http_header_cmp_order); + + for (unsigned int j = 0; j < nhdrs; j++) { + priv->out[i].iov_base = hdrs_sorted[j]->combined->str; + priv->out[i++].iov_len = hdrs_sorted[j]->combined->len; + } + + g_free(hdrs_sorted); +} +else +{ kh_foreach_value (msg->headers, hdr, { DL_FOREACH (hdr, hcur) { priv->out[i].iov_base = hcur->combined->str; priv->out[i++].iov_len = hcur->combined->len; - } +} }); +} priv->out[i].iov_base = "\r\n"; priv->out[i++].iov_len = 2; diff --git a/src/libserver/http/http_connection.h b/src/libserver/http/http_connection.h index 32a902d5bf..5a5243de74 100644 --- a/src/libserver/http/http_connection.h +++ b/src/libserver/http/http_connection.h @@ -87,6 +87,10 @@ struct rspamd_storage_shmem { * Message is intended for SSL connection */ #define RSPAMD_HTTP_FLAG_WANT_SSL (1 << 9) +/** + * Emit headers on the wire in insertion order instead of hash order + */ +#define RSPAMD_HTTP_FLAG_ORDERED_HEADERS (1 << 10) /** * Options for HTTP connection */ diff --git a/src/libserver/http/http_message.c b/src/libserver/http/http_message.c index 92ca8334ca..f1aac919af 100644 --- a/src/libserver/http/http_message.c +++ b/src/libserver/http/http_message.c @@ -572,6 +572,7 @@ void rspamd_http_message_add_header_len(struct rspamd_http_message *msg, hdr->name.len = nlen; hdr->value.begin = hdr->combined->str + nlen + 2; hdr->value.len = vlen; + hdr->order = msg->header_cnt++; k = kh_put(rspamd_http_headers_hash, msg->headers, &hdr->name, &r); @@ -616,6 +617,7 @@ void rspamd_http_message_add_header_fstr(struct rspamd_http_message *msg, hdr->name.len = nlen; hdr->value.begin = hdr->combined->str + nlen + 2; hdr->value.len = vlen; + hdr->order = msg->header_cnt++; k = kh_put(rspamd_http_headers_hash, msg->headers, &hdr->name, &r); diff --git a/src/libserver/http/http_private.h b/src/libserver/http/http_private.h index bbdeb7e0a4..8613a4366b 100644 --- a/src/libserver/http/http_private.h +++ b/src/libserver/http/http_private.h @@ -36,6 +36,9 @@ struct rspamd_http_header { rspamd_fstring_t *combined; rspamd_ftok_t name; rspamd_ftok_t value; + /* Insertion order, used to emit headers deterministically when + * RSPAMD_HTTP_FLAG_ORDERED_HEADERS is set on the message */ + unsigned int order; struct rspamd_http_header *prev, *next; }; @@ -86,6 +89,8 @@ struct rspamd_http_message { int code; enum http_method method; int flags; + /* Monotonic counter stamped onto each header as it is added */ + unsigned int header_cnt; ref_entry_t ref; }; diff --git a/src/lua/lua_http.c b/src/lua/lua_http.c index 05990544d9..5bf43769a2 100644 --- a/src/lua/lua_http.c +++ b/src/lua/lua_http.c @@ -686,30 +686,90 @@ lua_http_push_headers(lua_State *L, struct rspamd_http_message *msg) { const char *name, *value; int i, sz; + int tbl = lua_gettop(L); + + /* + * Two accepted shapes for the headers table: + * - map: { ['Name'] = 'value', ['Name'] = {'v1', 'v2'} } -- order undefined + * - list: { {'Name', 'value'}, {'Name', {'v1', 'v2'}} } -- order preserved + * The list shape sets RSPAMD_HTTP_FLAG_ORDERED_HEADERS so the HTTP client + * emits headers on the wire in exactly the order they are listed (used to + * mimic a real browser's header order). It is detected by a non-nil + * integer key 1 whose value is itself a table. + */ + lua_rawgeti(L, tbl, 1); + if (lua_type(L, -1) == LUA_TTABLE) { + lua_pop(L, 1); - lua_pushnil(L); - while (lua_next(L, -2) != 0) { - - lua_pushvalue(L, -2); - name = lua_tostring(L, -1); - sz = rspamd_lua_table_size(L, -2); - if (sz != 0 && name != NULL) { - for (i = 1; i <= sz; i++) { - lua_rawgeti(L, -2, i); - value = lua_tostring(L, -1); - if (value != NULL) { - rspamd_http_message_add_header(msg, name, value); - } + msg->flags |= RSPAMD_HTTP_FLAG_ORDERED_HEADERS; + sz = rspamd_lua_table_size(L, tbl); + + for (i = 1; i <= sz; i++) { + lua_rawgeti(L, tbl, i); /* pair { name, value } */ + + if (lua_type(L, -1) == LUA_TTABLE) { + int pair = lua_gettop(L); + int vsz, j; + + lua_rawgeti(L, pair, 1); + name = lua_tostring(L, -1); lua_pop(L, 1); + + lua_rawgeti(L, pair, 2); + vsz = rspamd_lua_table_size(L, -1); + + if (name != NULL) { + if (vsz != 0) { + /* Duplicated header: value is a list of strings */ + for (j = 1; j <= vsz; j++) { + lua_rawgeti(L, -1, j); + value = lua_tostring(L, -1); + if (value != NULL) { + rspamd_http_message_add_header(msg, name, value); + } + lua_pop(L, 1); + } + } + else { + value = lua_tostring(L, -1); + if (value != NULL) { + rspamd_http_message_add_header(msg, name, value); + } + } + } + lua_pop(L, 1); /* value */ } + + lua_pop(L, 1); /* pair */ } - else { - value = lua_tostring(L, -2); - if (name != NULL && value != NULL) { - rspamd_http_message_add_header(msg, name, value); + } + else { + lua_pop(L, 1); + + lua_pushnil(L); + while (lua_next(L, tbl) != 0) { + + lua_pushvalue(L, -2); + name = lua_tostring(L, -1); + sz = rspamd_lua_table_size(L, -2); + if (sz != 0 && name != NULL) { + for (i = 1; i <= sz; i++) { + lua_rawgeti(L, -2, i); + value = lua_tostring(L, -1); + if (value != NULL) { + rspamd_http_message_add_header(msg, name, value); + } + lua_pop(L, 1); + } + } + else { + value = lua_tostring(L, -2); + if (name != NULL && value != NULL) { + rspamd_http_message_add_header(msg, name, value); + } } + lua_pop(L, 2); } - lua_pop(L, 2); } } @@ -729,7 +789,7 @@ lua_http_push_headers(lua_State *L, struct rspamd_http_message *msg) * @param {string} url specifies URL for a request in the standard URI form (e.g. 'http://example.com/path') * @param {function} callback specifies callback function in format `function (err_message, code, body, headers)` that is called on HTTP request completion. if this parameter is missing, the function performs "pseudo-synchronous" call (see [Synchronous and Asynchronous API overview](/doc/developers/sync_async.html#API-example-http-module) * @param {task} task if called from symbol handler it is generally a good idea to use the common task objects: event base, DNS resolver and events session - * @param {table} headers optional headers in form `[name='value']` or `[name=['value1', 'value2']]` to duplicate a header with multiple values + * @param {table} headers optional headers in form `[name='value']` or `[name=['value1', 'value2']]` to duplicate a header with multiple values. A list form `{{'name', 'value'}, {'name', {'value1', 'value2'}}}` is also accepted and preserves the header order on the wire * @param {string} mime_type MIME type of the HTTP content (for example, `text/html`) * @param {string/text} body full body content, can be opaque `rspamd{text}` to avoid data copying * @param {number} timeout floating point request timeout value in seconds (default is 5.0 seconds)