include/common/ticks.h
Functions and macros for manipulation of expiration timers
- Copyright (C) 2000-2008 Willy Tarreau - w@1wt.eu
+ Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
/*
* Using a mix of milliseconds and timeval for internal timers is expensive and
* overkill, because we don't need such a precision to compute timeouts.
- * So we're converting them to "ticks". Right now, one tick equals one
- * millisecond, but that might change in the future. Ticks are stored as 32bit
- * values, and sorted in four 30bit-wide rotating arrays, which means that any
- * timer may be 2^30 ms in the future, or 12.4 days. The ticks are designed to
- * wrap after they pass 2^32. That means that we cannot directly compare them,
- * but we can check the sign of their difference.
+ * So we're converting them to "ticks".
*
+ * A tick is a representation of a date relative to another one, and is
+ * measured in milliseconds. The natural usage is to represent an absolute date
+ * relative to the current date. Since it is not practical to update all values
+ * each time the current date changes, instead we use the absolute date rounded
+ * down to fit in a tick. We then have to compare a tick to the current date to
+ * know whether it is in the future or in the past. If a tick is below the
+ * current date, it is in the past. If it is above, it is in the future. The
+ * values will wrap so we can't compare that easily, instead we check the sign
+ * of the difference between a tick and the current date.
+ *
+ * Proceeding like this allows us to manipulate dates that are stored in
+ * scalars with enough precision and range. For this reason, we store ticks in
+ * 32-bit integers. This is enough to handle dates that are between 24.85 days
+ * in the past and as much in the future.
+ *
* We must both support absolute dates (well in fact, dates relative to now+/-
* 12 days), and intervals (for timeouts). Both types need an "eternity" magic
* value. For optimal code generation, we'll use zero as the magic value
* indicating that an expiration timer or a timeout is not set. We have to
* check that we don't return this value when adding timeouts to <now>. If a
* computation returns 0, we must increase it to 1 (which will push the timeout
- * 1 ms further).
+ * 1 ms further). For this reason, timeouts must not be added by hand but via
+ * the dedicated tick_add() function.
*/
#ifndef _COMMON_TICKS_H
#include <common/memory.h>
#include <common/mini-clist.h>
#include <common/standard.h>
+#include <common/ticks.h>
#include <types/task.h>
+/* Principle of the wait queue.
+ *
+ * We want to be able to tell whether an expiration date is before of after the
+ * current time <now>. We KNOW that expiration dates are never too far apart,
+ * because they are measured in ticks (milliseconds). We also know that almost
+ * all dates will be in the future, and that a very small part of them will be
+ * in the past, they are the ones which have expired since last time we checked
+ * them. Using ticks, we know if a date is in the future or in the past, but we
+ * cannot use that to store sorted information because that reference changes
+ * all the time.
+ *
+ * So we cut the time in 3 ranges, only one of which <now> can be. The base of
+ * the range holding <now> serves as a reference for as long as <now> remains
+ * in this range :
+ * - previous : those ones are expired by definition (all before <now>)
+ * - current : some are expired, some are not (holds <now>)
+ * - next : none are expired (all after <now>)
+ *
+ * We use the higher two bits of the timers expressed in ticks to determine
+ * which range a timer is in, compared to <now> :
+ *
+ * now previous current next0 next1
+ * [31:30] [31:30] [31:30] [31:30] [31:30]
+ * 00 11 00 01 10
+ * 01 00 01 10 11
+ * 10 01 10 11 00
+ * 11 10 11 00 01
+ *
+ * By definition, <current> is the range containing <now> as well as all timers
+ * which have the same 2 high bits as <now>, <previous> is the range just
+ * before, which contains all timers whose high bits equal those of <now> minus
+ * 1. Last, <next> is composed of the two remaining ranges.
+ *
+ * For ease of implementation, the timers will then be stored into 4 queues 0-3
+ * determined by the 2 higher bits of the timer. The expiration algorithm is
+ * very simple :
+ * - expire everything in <previous>=queue[((now>>30)-1)&3]
+ * - expire from <current>=queue[(now>>30)&3] everything where timer >= now
+ *
+ * With this algorithm, it's possible to queue tasks meant to expire 24.8 days
+ * in the future, and still be able to detect events remaining unprocessed for
+ * the last 12.4 days! Note that the principle might be extended to any number
+ * of higher bits as long as there is only one range for expired tasks. For
+ * instance, using the 8 higher bits to index the range, we would have one past
+ * range of 4.6 hours (24 bits in ms), and 254 ranges in the future totalizing
+ * 49.3 days. This would eat more memory for very little added benefit though.
+ *
+ * Also, in order to maintain the ability to perform time comparisons, it is
+ * preferable to avoid using the <next1> range above, as values in this range
+ * may not easily be compared to <now> outside of these functions as it is the
+ * opposite of the <current> range, and <timer>-<now> may randomly be positive
+ * or negative. That means we're left with +/- 12.4 days timers.
+ *
+ * To keep timers ordered, we use 4 ebtrees [0..3]. We could have used instead
+ * of ticks, (seconds*1024)+milliseconds, as well as 1024th of seconds, but
+ * that makes comparisons with ticks more difficult, so in the end it's better
+ * to stick to the ticks.
+ *
+ * Another nice optimisation is to allow a timer to stay at an old place in the
+ * queue as long as it's not further than the real expiration date. That way,
+ * we use the tree as a place holder for a minorant of the real expiration
+ * date. Since we have a very low chance of hitting a timeout anyway, we can
+ * bounce the nodes to their right place when we scan the tree if we encounter
+ * a misplaced node once in a while. This even allows us not to remove the
+ * infinite timers from the wait queue.
+ *
+ * So, to summarize, we have :
+ * - node->key always defines current position in the wait queue
+ * - timer is the real expiration date (possibly infinite)
+ * - node->key <= timer
+ *
+ * The run queue works similarly to the wait queue except that the current date
+ * is replaced by an insertion counter which can also wrap without any problem.
+ */
+
+/* the timers are stored as 32-bit values in the queues */
+#define TIMER_TICK_BITS 32
+#define TIMER_TREE_BITS 2
+#define TIMER_TREES (1 << TIMER_TREE_BITS)
+#define TIMER_TREE_SHIFT (TIMER_TICK_BITS - TIMER_TREE_BITS)
+#define TIMER_TREE_MASK (TIMER_TREES - 1)
+#define TIMER_TICK_MASK ((1U << (TIMER_TICK_BITS-1)) * 2 - 1)
+#define TIMER_SIGN_BIT (1 << (TIMER_TICK_BITS - 1))
+
+/* a few exported variables */
extern unsigned int run_queue; /* run queue size */
extern unsigned int niced_tasks; /* number of niced tasks in the run queue */
extern struct pool_head *pool2_task;
extern struct task *last_timer; /* optimization: last queued timer */
-/* perform minimal initializations, report 0 in case of error, 1 if OK. */
-int init_task();
+/* Convert ticks to timers. Must not be called with TICK_ETERNITY, which is not
+ * a problem inside tree scanning functions. Note that ticks are signed while
+ * timers are not.
+ */
+static inline unsigned int tick_to_timer(int tick)
+{
+ return tick & TIMER_TICK_MASK;
+}
+
+/* Convert timer to ticks. This operation will be correct only as long as
+ * timers are stored on a minimum of 32-bit. We take care of not returning zero
+ * which would mean "eternity" for a tick. Also note that ticks are signed and
+ * timers are not.
+ */
+static inline int timer_to_tick(unsigned int timer)
+{
+ return timer ? timer : 1;
+}
+
+/* returns a tree number based on a ticks value */
+static inline unsigned int timer_to_tree(unsigned int timer)
+{
+ return (timer >> TIMER_TREE_SHIFT) & TIMER_TREE_MASK;
+}
/* return 0 if task is in run queue, otherwise non-zero */
static inline int task_in_rq(struct task *t)
*/
void wake_expired_tasks(int *next);
+/* Perform minimal initializations, report 0 in case of error, 1 if OK. */
+int init_task();
#endif /* _PROTO_TASK_H */
struct eb32_node wq; /* ebtree node used to hold the task in the wait queue */
struct eb32_node rq; /* ebtree node used to hold the task in the run queue */
int state; /* task state : bit field of TASK_* */
- unsigned int expire; /* next expiration time for this task */
+ int expire; /* next expiration date for this task, in ticks */
struct task * (*process)(struct task *t); /* the function which processes the task */
void *context; /* the task's context */
int nice; /* the task's current nice value from -1024 to +1024 */
#ifdef DEBUG_DEV
/* this may only happen when no timeout is set or in case of an FSM bug */
- if (!t->expire)
+ if (!tick_isset(t->expire))
ABORT_NOW();
#endif
return t; /* nothing more to do */
unsigned int niced_tasks = 0; /* number of niced tasks in the run queue */
struct task *last_timer = NULL; /* optimization: last queued timer */
-/* Principle of the wait queue.
- *
- * We want to be able to tell whether an expiration date is before of after the
- * current time <now>. We KNOW that expiration dates are never too far apart,
- * because they are already computed by adding integer numbers of milliseconds
- * to the current date.
- * We also know that almost all dates will be in the future, and that a very
- * small part of them will be in the past, they are the ones which have expired
- * since last time we checked them.
- *
- * The current implementation uses a wrapping time cut into 3 ranges :
- * - previous : those ones are expired by definition
- * - current : some are expired, some are not
- * - next : none are expired
- *
- * We use the higher two bits of the timers expressed in ticks (milliseconds)
- * to determine which range a timer is in, compared to <now> :
- *
- * now previous current next0 next1
- * [31:30] [31:30] [31:30] [31:30] [31:30]
- * 00 11 00 01 10
- * 01 00 01 10 11
- * 10 01 10 11 00
- * 11 10 11 00 01
- *
- * By definition, <current> is the range containing <now> as well as all timers
- * which have the same 2 high bits as <now>, <previous> is the range just
- * before, which contains all timers whose high bits equal those of <now> minus
- * 1. Last, <next> is composed of the two remaining ranges.
- *
- * For ease of implementation, the timers will then be stored into 4 queues 0-3
- * determined by the 2 higher bits of the timer. The expiration algorithm is
- * very simple :
- * - expire everything in <previous>=queue[((now>>30)-1)&3]
- * - expire from <current>=queue[(now>>30)&3] everything where timer >= now
- *
- * With this algorithm, it's possible to queue tasks meant to expire 24.8 days
- * in the future, and still be able to detect events remaining unprocessed for
- * the last 12.4 days! Note that the principle might be extended to any number
- * of higher bits as long as there is only one range for expired tasks. For
- * instance, using the 8 higher bits to index the range, we would have one past
- * range of 4.6 hours (24 bits in ms), and 254 ranges in the future totalizing
- * 49.3 days. This would eat more memory for a very little added benefit.
- *
- * Also, in order to maintain the ability to perform time comparisons, it is
- * recommended to avoid using the <next1> range above, as values in this range
- * may not easily be compared to <now> outside of these functions as it is the
- * opposite of the <current> range, and <timer>-<now> may randomly be positive
- * or negative. That means we're left with +/- 12 days timers.
- *
- * To keep timers ordered, we use 4 ebtrees [0..3]. To keep computation low, we
- * may use (seconds*1024)+milliseconds, which preserves ordering eventhough we
- * can't do real computations on it. Future evolutions could make use of 1024th
- * of seconds instead of milliseconds, with the special value 0 avoided (and
- * replaced with 1), so that zero indicates the timer is not set.
- *
- * Another nice optimisation is to allow a timer to stay at an old place in the
- * queue as long as it's not further than the real expected timeout. We really
- * use the tree as a place holder for a minorant of the real expiration date.
- * Since we have very low chance of hitting a timeout anyway, we can bounce the
- * nodes to their right place when we scan the tree and encounter a misplaced
- * node once in a while. This even allows us not to remove the infinite timers.
- *
- * So, to summarize, we have :
- * - node->key always defines current position in the tree
- * - timer is the real expiration date (possibly infinite)
- * - node->key <= timer
- */
-
-#define TIMER_TICK_BITS 32
-#define TIMER_TREE_BITS 2
-#define TIMER_TREES (1 << TIMER_TREE_BITS)
-#define TIMER_TREE_SHIFT (TIMER_TICK_BITS - TIMER_TREE_BITS)
-#define TIMER_TREE_MASK (TIMER_TREES - 1)
-#define TIMER_TICK_MASK ((1U << (TIMER_TICK_BITS-1)) * 2 - 1)
-#define TIMER_SIGN_BIT (1 << (TIMER_TICK_BITS - 1))
-
static struct eb_root timers[TIMER_TREES]; /* trees with MSB 00, 01, 10 and 11 */
static struct eb_root rqueue[TIMER_TREES]; /* trees constituting the run queue */
static unsigned int rqueue_ticks; /* insertion count */
-/* returns an ordered key based on an expiration date. */
-static inline unsigned int timeval_to_ticks(const struct timeval *t)
-{
- unsigned int key;
-
- key = ((unsigned int)t->tv_sec * 1000) + ((unsigned int)t->tv_usec / 1000);
- key &= TIMER_TICK_MASK;
- return key;
-}
-
-/* returns a tree number based on a ticks value */
-static inline unsigned int ticks_to_tree(unsigned int ticks)
-{
- return (ticks >> TIMER_TREE_SHIFT) & TIMER_TREE_MASK;
-}
-
-/* returns a tree number based on an expiration date. */
-static inline unsigned int timeval_to_tree(const struct timeval *t)
-{
- return ticks_to_tree(timeval_to_ticks(t));
-}
-
/* Puts the task <t> in run queue at a position depending on t->nice. <t> is
* returned. The nice value assigns boosts in 32th of the run queue size. A
* nice value of -1024 sets the task to -run_queue*32, while a nice value of
/* clear state flags at the same time */
t->state &= ~TASK_WOKEN_ANY;
- eb32_insert(&rqueue[ticks_to_tree(t->rq.key)], &t->rq);
+ eb32_insert(&rqueue[timer_to_tree(t->rq.key)], &t->rq);
return t;
}
* rely on wake_expired_tasks() to catch the node and move it to the
* proper place should it ever happen.
*/
- if (!task->expire || ((task->wq.key - task->expire) & TIMER_SIGN_BIT))
+ if (!tick_isset(task->expire) ||
+ ((task->wq.key - tick_to_timer(task->expire)) & TIMER_SIGN_BIT))
return;
__task_unlink_wq(task);
}
/* the task is not in the queue now */
- if (unlikely(!task->expire))
+ if (unlikely(!tick_isset(task->expire)))
return;
- task->wq.key = task->expire;
+ task->wq.key = tick_to_timer(task->expire);
#ifdef DEBUG_CHECK_INVALID_EXPIRATION_DATES
- if ((task->wq.key - now_ms) & TIMER_SIGN_BIT)
+ if ((task->wq.key - tick_to_timer(now_ms)) & TIMER_SIGN_BIT)
/* we're queuing too far away or in the past (most likely) */
return;
#endif
eb_insert_dup(&last_timer->wq.node, &task->wq.node);
return;
}
- eb32_insert(&timers[ticks_to_tree(task->wq.key)], &task->wq);
+ eb32_insert(&timers[timer_to_tree(task->wq.key)], &task->wq);
if (task->wq.node.bit == -1)
last_timer = task; /* we only want dup a tree's root */
return;
* non-expired entry.
*/
- now_tree = ticks_to_tree(now_ms);
+ now_tree = timer_to_tree(tick_to_timer(now_ms));
tree = (now_tree - 1) & TIMER_TREE_MASK;
do {
eb = eb32_first(&timers[tree]);
while (eb) {
task = eb32_entry(eb, struct task, wq);
- if ((now_ms - eb->key) & TIMER_SIGN_BIT) {
+ if ((tick_to_timer(now_ms) - eb->key) & TIMER_SIGN_BIT) {
/* note that we don't need this check for the <previous>
* tree, but it's cheaper than duplicating the code.
*/
- *next = eb->key; /* when we want to revisit the tree */
+ *next = timer_to_tick(eb->key);
return;
}
if (likely(niced_tasks))
max_processed /= 4;
- tree = ticks_to_tree(rqueue_ticks);
+ tree = timer_to_tree(rqueue_ticks);
stop = (tree + TIMER_TREES / 2) & TIMER_TREE_MASK;
tree = (tree - 1) & TIMER_TREE_MASK;