* cannot use that to store sorted information because that reference changes
* all the time.
*
- * So we cut the time in 3 ranges, only one of which <now> can be. The base of
- * the range holding <now> serves as a reference for as long as <now> remains
- * in this range :
- * - previous : those ones are expired by definition (all before <now>)
- * - current : some are expired, some are not (holds <now>)
- * - next : none are expired (all after <now>)
+ * We'll use the fact that the time wraps to sort timers. Timers above <now>
+ * are in the future, timers below <now> are in the past. Here, "above" and
+ * "below" are to be considered modulo 2^31.
*
- * We use the higher two bits of the timers expressed in ticks to determine
- * which range a timer is in, compared to <now> :
- *
- * now previous current next0 next1
- * [31:30] [31:30] [31:30] [31:30] [31:30]
- * 00 11 00 01 10
- * 01 00 01 10 11
- * 10 01 10 11 00
- * 11 10 11 00 01
- *
- * By definition, <current> is the range containing <now> as well as all timers
- * which have the same 2 high bits as <now>, <previous> is the range just
- * before, which contains all timers whose high bits equal those of <now> minus
- * 1. Last, <next> is composed of the two remaining ranges.
- *
- * For ease of implementation, the timers will then be stored into 4 queues 0-3
- * determined by the 2 higher bits of the timer. The expiration algorithm is
- * very simple :
- * - expire everything in <previous>=queue[((now>>30)-1)&3]
- * - expire from <current>=queue[(now>>30)&3] everything where timer >= now
- *
- * With this algorithm, it's possible to queue tasks meant to expire 24.8 days
- * in the future, and still be able to detect events remaining unprocessed for
- * the last 12.4 days! Note that the principle might be extended to any number
- * of higher bits as long as there is only one range for expired tasks. For
- * instance, using the 8 higher bits to index the range, we would have one past
- * range of 4.6 hours (24 bits in ms), and 254 ranges in the future totalizing
- * 49.3 days. This would eat more memory for very little added benefit though.
- *
- * Also, in order to maintain the ability to perform time comparisons, it is
- * preferable to avoid using the <next1> range above, as values in this range
- * may not easily be compared to <now> outside of these functions as it is the
- * opposite of the <current> range, and <timer>-<now> may randomly be positive
- * or negative. That means we're left with +/- 12.4 days timers.
- *
- * To keep timers ordered, we use 4 ebtrees [0..3]. We could have used instead
- * of ticks, (seconds*1024)+milliseconds, as well as 1024th of seconds, but
- * that makes comparisons with ticks more difficult, so in the end it's better
- * to stick to the ticks.
+ * Timers are stored sorted in an ebtree. We use the new ability for ebtrees to
+ * lookup values starting from X to only expire tasks between <now> - 2^31 and
+ * <now>. If the end of the tree is reached while walking over it, we simply
+ * loop back to the beginning. That way, we have no problem keeping sorted
+ * wrapping timers in a tree, between (now - 24 days) and (now + 24 days). The
+ * keys in the tree always reflect their real position, none can be infinite.
+ * This reduces the number of checks to be performed.
*
* Another nice optimisation is to allow a timer to stay at an old place in the
* queue as long as it's not further than the real expiration date. That way,
* So, to summarize, we have :
* - node->key always defines current position in the wait queue
* - timer is the real expiration date (possibly infinite)
- * - node->key <= timer
+ * - node->key is always before or equal to timer
*
* The run queue works similarly to the wait queue except that the current date
* is replaced by an insertion counter which can also wrap without any problem.
*/
-/* the timers are stored as 32-bit values in the queues */
-#define TIMER_TICK_BITS 32
-#define TIMER_TREE_BITS 2
-#define TIMER_TREES (1 << TIMER_TREE_BITS)
-#define TIMER_TREE_SHIFT (TIMER_TICK_BITS - TIMER_TREE_BITS)
-#define TIMER_TREE_MASK (TIMER_TREES - 1)
-#define TIMER_TICK_MASK ((1U << (TIMER_TICK_BITS-1)) * 2 - 1)
-#define TIMER_SIGN_BIT (1 << (TIMER_TICK_BITS - 1))
+/* The farthest we can look back in a timer tree */
+#define TIMER_LOOK_BACK (1U << 31)
/* a few exported variables */
extern unsigned int run_queue; /* run queue size */
extern struct pool_head *pool2_task;
extern struct task *last_timer; /* optimization: last queued timer */
-/* Convert ticks to timers. Must not be called with TICK_ETERNITY, which is not
- * a problem inside tree scanning functions. Note that ticks are signed while
- * timers are not.
- */
-static inline unsigned int tick_to_timer(int tick)
-{
- return tick & TIMER_TICK_MASK;
-}
-
-/* Convert timer to ticks. This operation will be correct only as long as
- * timers are stored on a minimum of 32-bit. We take care of not returning zero
- * which would mean "eternity" for a tick. Also note that ticks are signed and
- * timers are not.
- */
-static inline int timer_to_tick(unsigned int timer)
-{
- return timer ? timer : 1;
-}
-
-/* returns a tree number based on a ticks value */
-static inline unsigned int timer_to_tree(unsigned int timer)
-{
- return (timer >> TIMER_TREE_SHIFT) & TIMER_TREE_MASK;
-}
-
/* return 0 if task is in run queue, otherwise non-zero */
static inline int task_in_rq(struct task *t)
{
if (!tick_isset(task->expire))
return;
- if (((tick_to_timer(task->expire) - task->wq.key) & TIMER_SIGN_BIT)
- || !task_in_wq(task))
+ if (!task_in_wq(task) || tick_is_lt(task->expire, task->wq.key))
__task_queue(task);
}
struct pool_head *pool2_task;
unsigned int run_queue = 0;
-unsigned int niced_tasks = 0; /* number of niced tasks in the run queue */
-struct task *last_timer = NULL; /* optimization: last queued timer */
+unsigned int niced_tasks = 0; /* number of niced tasks in the run queue */
+struct task *last_timer = NULL; /* optimization: last queued timer */
-static struct eb_root timers[TIMER_TREES]; /* trees with MSB 00, 01, 10 and 11 */
-static struct eb_root rqueue[TIMER_TREES]; /* trees constituting the run queue */
-static unsigned int rqueue_ticks; /* insertion count */
+static struct eb_root timers; /* sorted timers tree */
+static struct eb_root rqueue; /* tree constituting the run queue */
+static unsigned int rqueue_ticks; /* insertion count */
/* Puts the task <t> in run queue at a position depending on t->nice. <t> is
* returned. The nice value assigns boosts in 32th of the run queue size. A
/* clear state flags at the same time */
t->state &= ~TASK_WOKEN_ANY;
- eb32_insert(&rqueue[timer_to_tree(t->rq.key)], &t->rq);
+ eb32_insert(&rqueue, &t->rq);
return t;
}
* Inserts a task into the wait queue at the position given by its expiration
* date. It does not matter if the task was already in the wait queue or not,
* as it will be unlinked. The task must not have an infinite expiration timer.
- * Last, tasks must not be queued further than the end of the next tree, which
- * is between <now_ms> and <now_ms> + TIMER_SIGN_BIT ms (now+12days..24days in
- * 32bit).
+ * Last, tasks must not be queued further than the end of the tree, which is
+ * between <now_ms> and <now_ms> + 2^31 ms (now+24days in 32bit).
*
* This function should not be used directly, it is meant to be called by the
* inline version of task_queue() which performs a few cheap preliminary tests
__task_unlink_wq(task);
/* the task is not in the queue now */
- if (unlikely(!tick_isset(task->expire)))
- return;
-
- task->wq.key = tick_to_timer(task->expire);
+ task->wq.key = task->expire;
#ifdef DEBUG_CHECK_INVALID_EXPIRATION_DATES
- if ((task->wq.key - tick_to_timer(now_ms)) & TIMER_SIGN_BIT)
+ if (tick_is_lt(task->wq.key, now_ms))
/* we're queuing too far away or in the past (most likely) */
return;
#endif
eb_insert_dup(&last_timer->wq.node, &task->wq.node);
return;
}
- eb32_insert(&timers[timer_to_tree(task->wq.key)], &task->wq);
+ eb32_insert(&timers, &task->wq);
if (task->wq.node.bit == -1)
- last_timer = task; /* we only want dup a tree's root */
+ last_timer = task; /* we only want a dup tree's root */
return;
}
/*
* Extract all expired timers from the timer queue, and wakes up all
- * associated tasks. Returns the date of next event (or eternity).
+ * associated tasks. Returns the date of next event (or eternity) in <next>.
*/
void wake_expired_tasks(int *next)
{
struct task *task;
struct eb32_node *eb;
- unsigned int now_tree;
- unsigned int tree;
- /* In theory, we should :
- * - wake all tasks from the <previous> tree
- * - wake all expired tasks from the <current> tree
- * - scan <next> trees for next expiration date if not found earlier.
- * But we can do all this more easily : we scan all 3 trees before we
- * wrap, and wake everything expired from there, then stop on the first
- * non-expired entry.
- */
+ eb = eb32_lookup_ge(&timers, now_ms - TIMER_LOOK_BACK);
+ while (1) {
+ if (unlikely(!eb)) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now.
+ */
+ eb = eb32_first(&timers);
+ if (likely(!eb))
+ break;
+ }
- now_tree = timer_to_tree(tick_to_timer(now_ms));
- tree = (now_tree - 1) & TIMER_TREE_MASK;
- do {
- eb = eb32_first(&timers[tree]);
- while (eb) {
- task = eb32_entry(eb, struct task, wq);
- if (likely((tick_to_timer(now_ms) - eb->key) & TIMER_SIGN_BIT)) {
- /* note that we don't need this check for the <previous>
- * tree, but it's cheaper than duplicating the code.
- */
- *next = timer_to_tick(eb->key);
- return;
- }
+ if (likely(tick_is_lt(now_ms, eb->key))) {
+ /* timer not expired yet, revisit it later */
+ *next = eb->key;
+ return;
+ }
- /* detach the task from the queue and add the task to the run queue */
- eb = eb32_next(eb);
- __task_unlink_wq(task);
+ /* timer looks expired, detach it from the queue */
+ task = eb32_entry(eb, struct task, wq);
+ eb = eb32_next(eb);
+ __task_unlink_wq(task);
- /* It is possible that this task was left at an earlier place in the
- * tree because a recent call to task_queue() has not moved it. This
- * happens when the new expiration date is later than the old one.
- * Since it is very unlikely that we reach a timeout anyway, it's a
- * lot cheaper to proceed like this because we almost never update
- * the tree. We may also find disabled expiration dates there. Since
- * we have detached the task from the tree, we simply call task_queue
- * to take care of this.
- */
- if (!tick_is_expired(task->expire, now_ms)) {
- task_queue(task);
- continue;
- }
- task_wakeup(task, TASK_WOKEN_TIMER);
+ /* It is possible that this task was left at an earlier place in the
+ * tree because a recent call to task_queue() has not moved it. This
+ * happens when the new expiration date is later than the old one.
+ * Since it is very unlikely that we reach a timeout anyway, it's a
+ * lot cheaper to proceed like this because we almost never update
+ * the tree. We may also find disabled expiration dates there. Since
+ * we have detached the task from the tree, we simply call task_queue
+ * to take care of this.
+ */
+ if (!tick_is_expired(task->expire, now_ms)) {
+ task_queue(task);
+ continue;
}
- tree = (tree + 1) & TIMER_TREE_MASK;
- } while (((tree - now_tree) & TIMER_TREE_MASK) < TIMER_TREES/2);
+ task_wakeup(task, TASK_WOKEN_TIMER);
+ }
/* We have found no task to expire in any tree */
*next = TICK_ETERNITY;
* counter may wrap without a problem, of course. We then limit the number of
* tasks processed at once to 1/4 of the number of tasks in the queue, and to
* 200 max in any case, so that general latency remains low and so that task
- * positions have a chance to be considered. It also reduces the number of
- * trees to be evaluated when no task remains.
- *
- * Just like with timers, we start with tree[(current - 1)], which holds past
- * values, and stop when we reach the middle of the list. In practise, we visit
- * 3 out of 4 trees.
+ * positions have a chance to be considered.
*
* The function adjusts <next> if a new event is closer.
*/
{
struct task *t;
struct eb32_node *eb;
- unsigned int tree, stop;
unsigned int max_processed;
int expire;
- if (!run_queue)
- return;
-
max_processed = run_queue;
if (max_processed > 200)
max_processed = 200;
if (likely(niced_tasks))
max_processed /= 4;
- tree = timer_to_tree(rqueue_ticks);
- stop = (tree + TIMER_TREES / 2) & TIMER_TREE_MASK;
- tree = (tree - 1) & TIMER_TREE_MASK;
-
expire = *next;
- do {
- eb = eb32_first(&rqueue[tree]);
- while (eb) {
- /* Note: this loop is one of the fastest code path in
- * the whole program. It should not be re-arranged
- * without a good reason.
- */
- t = eb32_entry(eb, struct task, rq);
+ eb = eb32_lookup_ge(&rqueue, rqueue_ticks - TIMER_LOOK_BACK);
+ while (max_processed--) {
+ /* Note: this loop is one of the fastest code path in
+ * the whole program. It should not be re-arranged
+ * without a good reason.
+ */
+
+ if (unlikely(!eb)) {
+ /* we might have reached the end of the tree, typically because
+ * <rqueue_ticks> is in the first half and we're first scanning
+ * the last half. Let's loop back to the beginning of the tree now.
+ */
+ eb = eb32_first(&rqueue);
+ if (likely(!eb))
+ break;
+ }
- /* detach the task from the queue and add the task to the run queue */
- eb = eb32_next(eb);
- __task_unlink_rq(t);
+ /* detach the task from the queue */
+ t = eb32_entry(eb, struct task, rq);
+ eb = eb32_next(eb);
+ __task_unlink_rq(t);
- t->state |= TASK_RUNNING;
- /* This is an optimisation to help the processor's branch
- * predictor take this most common call.
- */
- if (likely(t->process == process_session))
- t = process_session(t);
- else
- t = t->process(t);
+ t->state |= TASK_RUNNING;
+ /* This is an optimisation to help the processor's branch
+ * predictor take this most common call.
+ */
+ if (likely(t->process == process_session))
+ t = process_session(t);
+ else
+ t = t->process(t);
- if (likely(t != NULL)) {
- t->state &= ~TASK_RUNNING;
- if (t->expire) {
- task_queue(t);
- expire = tick_first_2nz(expire, t->expire);
- }
+ if (likely(t != NULL)) {
+ t->state &= ~TASK_RUNNING;
+ if (t->expire) {
+ task_queue(t);
+ expire = tick_first_2nz(expire, t->expire);
}
-
- if (!--max_processed)
- goto out;
}
- tree = (tree + 1) & TIMER_TREE_MASK;
- } while (tree != stop);
- out:
+ }
*next = expire;
}