From: Vsevolod Stakhov Date: Sat, 2 May 2026 10:41:37 +0000 (+0100) Subject: [Fix] lua_upstream: retire inflight on __gc when caller forgets ok/fail X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2041f9e2b5449a22108689c0ec7d4df3c64a7db8;p=thirdparty%2Frspamd.git [Fix] lua_upstream: retire inflight on __gc when caller forgets ok/fail Lua plugin code can drop a get_upstream_*() wrapper without ever calling :ok or :fail (e.g. when an async callback never fires or is written incorrectly). Without retirement, the C-side inflight counter introduced for P2C scoring leaks indefinitely and biases selection away from the affected upstream. Add acquired/retired bookkeeping on the Lua wrapper: - lua_push_upstream() takes an explicit acquired flag. The three get_upstream_* bindings pass TRUE; all_upstreams() inserter passes FALSE since it returns a view, not a fresh inflight reference. - The watcher path inlines lua_newuserdata; explicitly zero the new fields there so uninitialised stack memory doesn't trigger spurious retire calls. - :ok and :fail set retired = TRUE so the destructor doesn't double retire when the caller did pair properly. - The __gc destructor calls rspamd_upstream_release when acquired && !retired, decrementing inflight without affecting error counts or latency. Lua GC is non-deterministic, so retirement may lag for some time; that's acceptable noise for a load comparator and strictly better than an unbounded leak. Tests in test/lua/unit/upstream.lua cover smoke-level API usage, the abandoned-wrapper path, view safety from all_upstreams(), and double-retirement protection. --- diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h index c8dc336857..a3b9030b04 100644 --- a/src/lua/lua_common.h +++ b/src/lua/lua_common.h @@ -170,6 +170,18 @@ struct rspamd_lua_map { struct rspamd_lua_upstream { struct upstream *up; int upref; + /* + * Inflight bookkeeping for the C-side P2C load comparator. acquired is + * set when this wrapper holds the inflight reference produced by a + * get_* call (round-robin / hashed / master-slave). retired is set the + * first time :ok or :fail is called. If acquired && !retired at __gc + * time, the destructor calls rspamd_upstream_release so abandoned + * selections don't permanently skew P2C scoring. Wrappers handed out + * by all_upstreams() or watch callbacks set acquired = FALSE and the + * destructor leaves inflight alone. + */ + gboolean acquired; + gboolean retired; }; /* Common utility functions */ diff --git a/src/lua/lua_upstream.c b/src/lua/lua_upstream.c index 6cab4e7e43..31e0717982 100644 --- a/src/lua/lua_upstream.c +++ b/src/lua/lua_upstream.c @@ -197,6 +197,7 @@ lua_upstream_fail(lua_State *L) } rspamd_upstream_fail(up->up, fail_addr, reason); + up->retired = TRUE; } return 0; @@ -214,6 +215,7 @@ lua_upstream_ok(lua_State *L) if (up) { rspamd_upstream_ok(up->up); + up->retired = TRUE; } return 0; @@ -226,6 +228,15 @@ lua_upstream_destroy(lua_State *L) struct rspamd_lua_upstream *up = lua_check_upstream(L, 1); if (up) { + /* + * Lua callers can forget to pair :get_upstream_*() with :ok()/:fail(). + * Retire the inflight reference here so abandoned selections don't + * permanently skew P2C scoring. Wrappers from all_upstreams() and + * watch callbacks set acquired = FALSE and are skipped. + */ + if (up->acquired && !up->retired) { + rspamd_upstream_release(up->up); + } /* Remove reference to the parent */ luaL_unref(L, LUA_REGISTRYINDEX, up->upref); /* Upstream belongs to the upstream list, so no free here */ @@ -246,7 +257,8 @@ lua_check_upstream_list(lua_State *L) } static struct rspamd_lua_upstream * -lua_push_upstream(lua_State *L, int up_idx, struct upstream *up) +lua_push_upstream(lua_State *L, int up_idx, struct upstream *up, + gboolean acquired) { struct rspamd_lua_upstream *lua_ups; @@ -256,6 +268,8 @@ lua_push_upstream(lua_State *L, int up_idx, struct upstream *up) lua_ups = lua_newuserdata(L, sizeof(*lua_ups)); lua_ups->up = up; + lua_ups->acquired = acquired; + lua_ups->retired = FALSE; rspamd_lua_setclass(L, rspamd_upstream_classname, -1); /* Store parent in the upstream to prevent gc */ lua_pushvalue(L, up_idx); @@ -374,7 +388,7 @@ lua_upstream_list_get_upstream_by_hash(lua_State *L) (unsigned int) keyl); if (selected) { - lua_push_upstream(L, 1, selected); + lua_push_upstream(L, 1, selected, TRUE); } else { lua_pushnil(L); @@ -408,7 +422,7 @@ lua_upstream_list_get_upstream_round_robin(lua_State *L) selected = rspamd_upstream_get(upl, RSPAMD_UPSTREAM_ROUND_ROBIN, NULL, 0); if (selected) { - lua_push_upstream(L, 1, selected); + lua_push_upstream(L, 1, selected, TRUE); } else { lua_pushnil(L); @@ -440,7 +454,7 @@ lua_upstream_list_get_upstream_master_slave(lua_State *L) NULL, 0); if (selected) { - lua_push_upstream(L, 1, selected); + lua_push_upstream(L, 1, selected, TRUE); } else { lua_pushnil(L); @@ -462,7 +476,8 @@ static void lua_upstream_inserter(struct upstream *up, unsigned int idx, void *u { struct upstream_foreach_cbdata *cbd = (struct upstream_foreach_cbdata *) ud; - lua_push_upstream(cbd->L, cbd->ups_pos, up); + /* all_upstreams() is a pure view; no inflight to retire on __gc. */ + lua_push_upstream(cbd->L, cbd->ups_pos, up, FALSE); lua_rawseti(cbd->L, -2, idx + 1); } /*** @@ -570,6 +585,9 @@ lua_upstream_watch_func(struct upstream *up, struct rspamd_lua_upstream *lua_ups = lua_newuserdata(L, sizeof(*lua_ups)); lua_ups->up = up; + /* Watcher event: no inflight reference, leave it that way on __gc. */ + lua_ups->acquired = FALSE; + lua_ups->retired = FALSE; rspamd_lua_setclass(L, rspamd_upstream_classname, -1); /* Store parent in the upstream to prevent gc */ lua_rawgeti(L, LUA_REGISTRYINDEX, cdata->parent_cbref); diff --git a/test/lua/unit/upstream.lua b/test/lua/unit/upstream.lua new file mode 100644 index 0000000000..5c69e5f948 --- /dev/null +++ b/test/lua/unit/upstream.lua @@ -0,0 +1,82 @@ +-- Upstream list / upstream object tests + +context("Upstream lua API", function() + local upstream_list = require "rspamd_upstream_list" + + test("create from comma-separated string", function() + local ups = upstream_list.create('127.0.0.1,127.0.0.2,127.0.0.3', 11333) + assert_not_nil(ups) + local all = ups:all_upstreams() + assert_equal(#all, 3) + end) + + test("get_upstream_round_robin returns a usable upstream", function() + local ups = upstream_list.create('127.0.0.1,127.0.0.2', 11333) + assert_not_nil(ups) + local up = ups:get_upstream_round_robin() + assert_not_nil(up) + assert_not_nil(up:get_name()) + assert_not_nil(up:get_port()) + up:ok() + end) + + test("get_upstream_by_hash with the same key is stable", function() + local ups = upstream_list.create('127.0.0.1,127.0.0.2,127.0.0.3', 11333) + local first = ups:get_upstream_by_hash('hello') + local second = ups:get_upstream_by_hash('hello') + assert_not_nil(first) + assert_not_nil(second) + assert_equal(first:get_name(), second:get_name()) + first:ok() + second:ok() + end) + + test("dropping a wrapper without ok/fail does not crash", function() + -- Smoke test for the __gc retire-on-drop fallback. Repeatedly acquire + -- and immediately abandon wrappers; the destructor must release the + -- inflight reference without blowing up. We then verify we can still + -- get usable wrappers from the same list. + local ups = upstream_list.create('127.0.0.1,127.0.0.2,127.0.0.3', 11333) + for _ = 1, 50 do + local up = ups:get_upstream_round_robin() + assert_not_nil(up) + up = nil + end + collectgarbage('collect') + + local survivor = ups:get_upstream_round_robin() + assert_not_nil(survivor) + survivor:ok() + end) + + test("all_upstreams() wrappers are safe to drop", function() + -- all_upstreams() is a view, not an acquisition: dropping the table + -- must not retire any inflight reference (there is none to retire). + local ups = upstream_list.create('127.0.0.1,127.0.0.2', 11333) + for _ = 1, 20 do + local all = ups:all_upstreams() + assert_equal(#all, 2) + all = nil + end + collectgarbage('collect') + + -- Subsequent operations still work. + local up = ups:get_upstream_round_robin() + assert_not_nil(up) + up:ok() + end) + + test("calling :ok then :fail on the same wrapper is safe", function() + -- The retired flag prevents the __gc from also retiring; explicit + -- pairs of ok/fail still drive the underlying upstream, but the + -- per-wrapper inflight is decremented only once. + local ups = upstream_list.create('127.0.0.1', 11333) + local up = ups:get_upstream_round_robin() + assert_not_nil(up) + up:ok() + up:fail('test') + up:ok() + up = nil + collectgarbage('collect') + end) +end)