From ce4e0aa7f387f589e293419269c5641c3966fbbc Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 5 Nov 2017 23:57:00 +0100 Subject: [PATCH] MEDIUM: task: change the construction of the loop in process_runnable_tasks() This patch slightly rearranges the loop to pack the locked code a little bit, and to try to concentrate accesses to the tree together to benefit more from the cache. It also fixes how the loop handles the right margin : now that is guaranteed that the retrieved nodes are filtered to only match the current thread, we don't need to rewind every 16 entries. Instead we can rewind each time we reach the right margin again. With this change, we now achieve the following performance for 10 H2 conns each containing 100 streams : 1 thread : 550kreq/s 2 thread : 644kreq/s 3 thread : 598kreq/s --- src/task.c | 60 ++++++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/src/task.c b/src/task.c index aa7f3f41f4..37994a3318 100644 --- a/src/task.c +++ b/src/task.c @@ -186,12 +186,12 @@ void process_runnable_tasks() int i; int max_processed; struct eb32sc_node *rq_next; - int rewind; struct task *local_tasks[16]; int local_tasks_count; tasks_run_queue_cur = tasks_run_queue; /* keep a copy for reporting */ nb_tasks_cur = nb_tasks; max_processed = tasks_run_queue; + if (!tasks_run_queue) return; @@ -202,44 +202,38 @@ void process_runnable_tasks() max_processed = (max_processed + 3) / 4; SPIN_LOCK(TASK_RQ_LOCK, &rq_lock); - while (max_processed > 0) { + rq_next = eb32sc_lookup_ge(&rqueue, rqueue_ticks - TIMER_LOOK_BACK, tid_bit); + + do { /* Note: this loop is one of the fastest code path in * the whole program. It should not be re-arranged * without a good reason. */ - - rewind = 0; - rq_next = eb32sc_lookup_ge(&rqueue, rqueue_ticks - TIMER_LOOK_BACK, tid_bit); - if (!rq_next) { - /* we might have reached the end of the tree, typically because - * is in the first half and we're first scanning - * the last half. Let's loop back to the beginning of the tree now. - */ - rq_next = eb32sc_first(&rqueue, tid_bit); - if (!rq_next) { - break; + for (local_tasks_count = 0; local_tasks_count < 16; local_tasks_count++) { + if (unlikely(!rq_next)) { + /* either we just started or we reached the end + * of the tree, typically because + * is in the first half and we're first scanning + * the last half. Let's loop back to the beginning + * of the tree now. + */ + if (unlikely(!rq_next)) { + rq_next = eb32sc_first(&rqueue, tid_bit); + if (!rq_next) + break; + } } - rewind = 1; - } - local_tasks_count = 0; - while (local_tasks_count < 16) { t = eb32sc_entry(rq_next, struct task, rq); rq_next = eb32sc_next(rq_next, tid_bit); - if (t->thread_mask & tid_bit) { - /* detach the task from the queue */ - __task_unlink_rq(t); - t->state |= TASK_RUNNING; - t->pending_state = 0; - t->calls++; - local_tasks[local_tasks_count++] = t; - } - if (!rq_next) { - if (rewind || !(rq_next = eb32sc_first(&rqueue, tid_bit))) { - break; - } - rewind = 1; - } + + /* detach the task from the queue */ + __task_unlink_rq(t); + local_tasks[local_tasks_count] = t; + t->state |= TASK_RUNNING; + t->pending_state = 0; + t->calls++; + max_processed--; } if (!local_tasks_count) @@ -259,7 +253,6 @@ void process_runnable_tasks() local_tasks[i] = t; } - max_processed -= local_tasks_count; SPIN_LOCK(TASK_RQ_LOCK, &rq_lock); for (i = 0; i < local_tasks_count ; i++) { t = local_tasks[i]; @@ -276,7 +269,8 @@ void process_runnable_tasks() task_queue(t); } } - } + } while (max_processed > 0); + SPIN_UNLOCK(TASK_RQ_LOCK, &rq_lock); } -- 2.39.5