]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MINOR: sched: add a new function is_sched_alive() to report scheduler's health
authorWilly Tarreau <w@1wt.eu>
Thu, 17 Apr 2025 13:24:08 +0000 (15:24 +0200)
committerWilly Tarreau <w@1wt.eu>
Thu, 17 Apr 2025 14:25:47 +0000 (16:25 +0200)
This verifies that the scheduler is still ticking without having to
access the activity[] array nor keeping local copies of the ctxsw
counter. It just tests and sets a flag that is reset after each
return from a ->process() function.

include/haproxy/task.h
src/task.c

index a9e8763df27941b172b3d7f35be168ab26f2009a..40cdea559dcaaba2c69aedf1abaab28238523f28 100644 (file)
@@ -123,6 +123,12 @@ void wake_expired_tasks(void);
  */
 int next_timer_expiry(void);
 
+/* Pings the scheduler to verify that tasks continue running.
+ * Returns 1 if the scheduler made progress since last call,
+ * 0 if it looks stuck.
+ */
+int is_sched_alive(void);
+
 /*
  * Delete every tasks before running the master polling loop
  */
index 02b4ef5bb72415f6d5b9d3b97c6fff936579f0be..4f8eb7ef92d48c5ba18aeea7f86095934cd6e606 100644 (file)
@@ -42,6 +42,9 @@ DECLARE_POOL(pool_head_notification, "notification", sizeof(struct notification)
  */
 __decl_aligned_rwlock(wq_lock);
 
+/* used to detect if the scheduler looks stuck (for warnings) */
+static THREAD_LOCAL int sched_stuck;
+
 /* Flags the task <t> for immediate destruction and puts it into its first
  * thread's shared tasklet list if not yet queued/running. This will bypass
  * the priority scheduling and make the task show up as fast as possible in
@@ -605,6 +608,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
                        else {
                                done++;
                                th_ctx->current = NULL;
+                               sched_stuck = 0; // scheduler is not stuck (don't warn)
                                /* signal barrier to prevent thread dump helpers
                                 * from dumping a task currently being freed.
                                 */
@@ -646,6 +650,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
                                task_unlink_wq(t);
                                __task_free(t);
                                th_ctx->current = NULL;
+                               sched_stuck = 0; // scheduler is not stuck (don't warn)
                                __ha_barrier_store();
                                /* We don't want max_processed to be decremented if
                                 * we're just freeing a destroyed task, we should only
@@ -671,6 +676,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
                }
 
                th_ctx->current = NULL;
+               sched_stuck = 0; // scheduler is not stuck (don't warn)
                __ha_barrier_store();
 
                /* stats are only registered for non-zero wake dates */
@@ -894,6 +900,20 @@ void process_runnable_tasks()
                activity[tid].long_rq++;
 }
 
+/* Pings the scheduler to verify that tasks continue running.
+ * Returns 1 if the scheduler made progress since last call,
+ * 0 if it looks stuck.
+ */
+int is_sched_alive(void)
+{
+       if (sched_stuck)
+               return 0;
+
+       /* next time we'll know if any progress was made */
+       sched_stuck = 1;
+       return 1;
+}
+
 /*
  * Delete every tasks before running the master polling loop
  */