]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
BUG/MEDIUM: tasks: Make sure we don't schedule a task already running
authorOlivier Houchard <ohouchard@haproxy.com>
Thu, 9 Apr 2026 13:33:39 +0000 (15:33 +0200)
committerOlivier Houchard <cognet@ci0.org>
Wed, 22 Apr 2026 14:05:23 +0000 (16:05 +0200)
In task_schedule(), before attempting to set the new task expiration
date, make sure it is not running by trying to set the TASK_RUNNING
flag, and waiting if it is already there. Having the flag set will
ensure that the task won't be running while we're modifying it.
There is a very rare race condition, where the expire would be set by
task_schedule(), then the running task might set it to something else,
and if it sets it to TICK_ETERNITY before task_schedule() calls
__task_queue(), then we will hit a BUG_ON() there.
This is very hard to reproduce, but has been reported a few times,
included in Github issue #3327, which should now be fixed.

This should be backported as far back as 2.8.

WIP: Make sure the task is not running before changing expire

include/haproxy/task.h

index bb4b4e73c7016b726b6100b4eaca58906a9dda71..8422992c17db7a682334e63ccfeb6922aa00148b 100644 (file)
@@ -713,12 +713,27 @@ static inline void _task_schedule(struct task *task, int when, const struct ha_c
 
 #ifdef USE_THREAD
        if (task->tid < 0) {
+               int was_running;
+               /*
+                * Make sure the task is not already running before changing
+                * its expire, otherwise it could overwrite our modification
+                */
+               if (task == th_ctx->current)
+                       was_running = 1;
+               else {
+                       was_running = 0;
+                       while (HA_ATOMIC_FETCH_OR(&task->state, TASK_RUNNING) & TASK_RUNNING)
+                               __ha_cpu_relax();
+               }
+
                /* FIXME: is it really needed to lock the WQ during the check ? */
                HA_RWLOCK_WRLOCK(TASK_WQ_LOCK, &wq_lock);
                if (task_in_wq(task))
                        when = tick_first(when, task->expire);
 
                task->expire = when;
+               if (!was_running)
+                       task_drop_running(task, 0);
                if (!task_in_wq(task) || tick_is_lt(task->expire, task->wq.key)) {
                        if (likely(caller)) {
                                caller = HA_ATOMIC_XCHG(&task->caller, caller);