From: Willy Tarreau Date: Thu, 17 Apr 2025 13:24:08 +0000 (+0200) Subject: MINOR: sched: add a new function is_sched_alive() to report scheduler's health X-Git-Tag: v3.2-dev11~12 X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=36ec70c526797267f337822468810225265fb44a;p=thirdparty%2Fhaproxy.git MINOR: sched: add a new function is_sched_alive() to report scheduler's health This verifies that the scheduler is still ticking without having to access the activity[] array nor keeping local copies of the ctxsw counter. It just tests and sets a flag that is reset after each return from a ->process() function. --- diff --git a/include/haproxy/task.h b/include/haproxy/task.h index a9e8763df..40cdea559 100644 --- a/include/haproxy/task.h +++ b/include/haproxy/task.h @@ -123,6 +123,12 @@ void wake_expired_tasks(void); */ int next_timer_expiry(void); +/* Pings the scheduler to verify that tasks continue running. + * Returns 1 if the scheduler made progress since last call, + * 0 if it looks stuck. + */ +int is_sched_alive(void); + /* * Delete every tasks before running the master polling loop */ diff --git a/src/task.c b/src/task.c index 02b4ef5bb..4f8eb7ef9 100644 --- a/src/task.c +++ b/src/task.c @@ -42,6 +42,9 @@ DECLARE_POOL(pool_head_notification, "notification", sizeof(struct notification) */ __decl_aligned_rwlock(wq_lock); +/* used to detect if the scheduler looks stuck (for warnings) */ +static THREAD_LOCAL int sched_stuck; + /* Flags the task for immediate destruction and puts it into its first * thread's shared tasklet list if not yet queued/running. This will bypass * the priority scheduling and make the task show up as fast as possible in @@ -605,6 +608,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[]) else { done++; th_ctx->current = NULL; + sched_stuck = 0; // scheduler is not stuck (don't warn) /* signal barrier to prevent thread dump helpers * from dumping a task currently being freed. */ @@ -646,6 +650,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[]) task_unlink_wq(t); __task_free(t); th_ctx->current = NULL; + sched_stuck = 0; // scheduler is not stuck (don't warn) __ha_barrier_store(); /* We don't want max_processed to be decremented if * we're just freeing a destroyed task, we should only @@ -671,6 +676,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[]) } th_ctx->current = NULL; + sched_stuck = 0; // scheduler is not stuck (don't warn) __ha_barrier_store(); /* stats are only registered for non-zero wake dates */ @@ -894,6 +900,20 @@ void process_runnable_tasks() activity[tid].long_rq++; } +/* Pings the scheduler to verify that tasks continue running. + * Returns 1 if the scheduler made progress since last call, + * 0 if it looks stuck. + */ +int is_sched_alive(void) +{ + if (sched_stuck) + return 0; + + /* next time we'll know if any progress was made */ + sched_stuck = 1; + return 1; +} + /* * Delete every tasks before running the master polling loop */