From 13e86d947d1536dbe6d8f4057388727528546ae6 Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Thu, 17 Nov 2022 14:40:20 +0100 Subject: [PATCH] BUG/MEDIUM: listener: Fix race condition when updating the global mngmt task It is pretty similar to fbb934da90 ("BUG/MEDIUM: stick-table: fix a race condition when updating the expiration task"). When the global management task is running, at the end of its process function, it resets the expire date by setting it to TICK_ETERNITY. In same time, a listener may reach a global limit and decides to schedule the task. Thus it is possible to queue the task and trigger the BUG_ON() on the expire date because its value was set to TICK_ETERNITY in the means time: FATAL: bug condition "task->expire == 0" matched at src/task.c:310 call trace(12): | 0x662de8 [b8 01 00 00 00 c6 00 00]: __task_queue+0xc7/0x11e | 0x63b03f [48 b8 04 00 00 00 05 00]: main+0x2535e | 0x63ed1a [e9 d2 fd ff ff 48 8b 45]: listener_accept+0xf72/0xfda | 0x6a36d3 [eb 01 90 c9 c3 55 48 89]: sock_accept_iocb+0x82/0x87 | 0x6af22f [48 8b 05 ca f9 13 00 8b]: fd_update_events+0x35a/0x497 | 0x42a7a8 [89 45 d8 83 7d d8 02 75]: main-0x1eb539 | 0x6158fb [48 8b 05 e6 06 1c 00 64]: run_poll_loop+0x2e7/0x319 | 0x615b6c [48 8b 05 ed 65 1d 00 48]: main-0x175 | 0x7ffff775bded [e9 69 fe ff ff 48 8b 4c]: libc:+0x8cded | 0x7ffff77e1370 [48 89 c7 b8 3c 00 00 00]: libc:+0x112370 To fix the bug, a RW lock is introduced. It is used to fix the race condition. A read lock is taken when the task is scheduled, in listener_accpet() and a write lock is used at the end of process function to set the expire date to TICK_ETERNITY. This lock should not be used very often and most of time by "readers". So, the impact should be really limited. This patch should fix the issue #1930. It must be backported as far as 1.8 with some cautions because the code has evolved a lot since then. --- src/listener.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/listener.c b/src/listener.c index 412af94a16..33a1a9995d 100644 --- a/src/listener.c +++ b/src/listener.c @@ -47,6 +47,7 @@ struct bind_kw_list bind_keywords = { /* list of the temporarily limited listeners because of lack of resource */ static struct mt_list global_listener_queue = MT_LIST_HEAD_INIT(global_listener_queue); static struct task *global_listener_queue_task; +static HA_RWLOCK_T global_listener_rwlock; /* listener status for stats */ const char* li_status_st[LI_STATE_COUNT] = { @@ -1207,7 +1208,9 @@ void listener_accept(struct listener *l) * later than ahead. The listener turns to LI_LIMITED. */ limit_listener(l, &global_listener_queue); + HA_RWLOCK_RDLOCK(LISTENER_LOCK, &global_listener_rwlock); task_schedule(global_listener_queue_task, expire); + HA_RWLOCK_RDUNLOCK(LISTENER_LOCK, &global_listener_rwlock); goto end; limit_proxy: @@ -1257,6 +1260,7 @@ static int listener_queue_init() /* very simple initialization, users will queue the task if needed */ global_listener_queue_task->context = NULL; /* not even a context! */ global_listener_queue_task->process = manage_global_listener_queue; + HA_RWLOCK_INIT(&global_listener_rwlock); return 0; } @@ -1294,7 +1298,9 @@ struct task *manage_global_listener_queue(struct task *t, void *context, unsigne dequeue_all_listeners(); out: + HA_RWLOCK_WRLOCK(LISTENER_LOCK, &global_listener_rwlock); t->expire = TICK_ETERNITY; + HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &global_listener_rwlock); task_queue(t); return t; } -- 2.39.5