From: Aurelien DARRAGON Date: Thu, 13 Jul 2023 08:47:33 +0000 (+0200) Subject: BUG/MEDIUM: hlua_fcn/queue: bad pop_wait sequencing X-Git-Tag: v2.9-dev2~51 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=70e10ee5bc368e02496710793c19ccca891ffbc4;p=thirdparty%2Fhaproxy.git BUG/MEDIUM: hlua_fcn/queue: bad pop_wait sequencing I assumed that the hlua_yieldk() function used in queue:pop_wait() function would eventually return when the continuation function would return. But this is wrong, the continuation function is simply called back by the resume after the hlua_yieldk() which does not return in this case. The caller is no longer the initial calling function, but Lua, so when the continuation function eventually returns, it does not give the hand back to the C calling function (queue:pop_wait()), like we're used to, but directly to Lua which will continue the normal execution of the (Lua) function that triggered the C-function, effectively bypassing the end of the C calling function. Because of this, the queue waiting list cleanup never occurs! This causes some undesirable effects: - pop_wait() will slowly leak over the time, because the allocated queue waiting entry never gets deallocated when the function is finished - queue:push() will become slower and slower because the wait list will keep growing indefinitely as a result of the previous leak - the task that performed at least 1 pop_wait() could suffer from useless wakeups because it will stay indefinitely in the queue waiting list, so every queue:push() will try to wake the task, even if the task is not waiting for new queue items. - last but not least, if the task that performed at least 1 pop_wait ends or crashes, the next queue:push() will lead to invalid reads and process crash because it will try to wakeup a ghost task that doesn't exist anymore. To fix this, the pop_wait function was reworked with the assumption that the hlua_yieldk() with continuation function never returns. Indeed, it is now the continuation function that will take care of the cleanup, instead of the parent function. This must be backported in 2.8 with 86fb22c5 ("MINOR: hlua_fcn: add Queue class") --- diff --git a/src/hlua_fcn.c b/src/hlua_fcn.c index 63366694fd..06805feef9 100644 --- a/src/hlua_fcn.c +++ b/src/hlua_fcn.c @@ -639,10 +639,18 @@ static int hlua_queue_pop(lua_State *L) static int _hlua_queue_pop_wait(lua_State *L, int status, lua_KContext ctx) { struct hlua_queue *queue = hlua_check_queue(L, 1); + struct hlua_queue_wait *wait = lua_touserdata(L, 2); /* new pop attempt */ - if (!_hlua_queue_pop(L, queue)) + if (!_hlua_queue_pop(L, queue)) { hlua_yieldk(L, 0, 0, _hlua_queue_pop_wait, TICK_ETERNITY, 0); // wait retry + return 0; // never reached, yieldk won't return + } + + /* remove task from waiting list */ + MT_LIST_DELETE(&wait->entry); + pool_free(pool_head_hlua_queuew, wait); + return 1; // success } static int hlua_queue_pop_wait(lua_State *L) @@ -662,6 +670,12 @@ static int hlua_queue_pop_wait(lua_State *L) return 0; /* not reached */ } + /* try opportunistic pop (there could already be pending items) */ + if (_hlua_queue_pop(L, queue)) + return 1; // success + + /* no pending items, waiting required */ + wait = pool_alloc(pool_head_hlua_queuew); if (!wait) { lua_pushnil(L); @@ -674,20 +688,23 @@ static int hlua_queue_pop_wait(lua_State *L) /* add task to queue's wait list */ MT_LIST_TRY_APPEND(&queue->wait_tasks, &wait->entry); - /* push queue on the top of the stack */ - lua_pushlightuserdata(L, queue); - - /* try to pop without waiting (there could be already pending items) */ - if (!_hlua_queue_pop(L, queue)) { - /* no item immediately available, go to waiting loop */ - hlua_yieldk(L, 0, 0, _hlua_queue_pop_wait, TICK_ETERNITY, 0); - } - - /* remove task from waiting list */ - MT_LIST_DELETE(&wait->entry); - pool_free(pool_head_hlua_queuew, wait); + /* push wait entry at index 2 on the stack (queue is already there) */ + lua_pushlightuserdata(L, wait); - return 1; + /* Go to waiting loop which immediately performs a new attempt to make + * sure we didn't miss a push during the wait entry initialization. + * + * _hlua_queue_pop_wait() won't return to us if it has to yield, which + * is the most likely scenario. What happens in this case is that yieldk + * call never returns, and instead Lua will call the continuation + * function after a successful resume, so the calling function will + * no longer be us, but Lua instead. And when the continuation function + * eventually returns (because it succesfully popped an item), Lua will + * directly give the hand back to the Lua function that called us. + * + * More info here: https://www.lua.org/manual/5.4/manual.html#4.7 + */ + return _hlua_queue_pop_wait(L, LUA_OK, 0); } static int hlua_queue_new(lua_State *L)