[CLEANUP] task: distinguish between clock ticks and timers

author Willy Tarreau <w@1wt.eu>

Sun, 8 Mar 2009 14:53:06 +0000 (15:53 +0100)

committer Willy Tarreau <w@1wt.eu>

Sun, 8 Mar 2009 14:58:07 +0000 (15:58 +0100)
author Willy Tarreau <w@1wt.eu>
Sun, 8 Mar 2009 14:53:06 +0000 (15:53 +0100)
committer Willy Tarreau <w@1wt.eu>
Sun, 8 Mar 2009 14:58:07 +0000 (15:58 +0100)
diff --git a/include/common/ticks.h b/include/common/ticks.h

index 0b3e102e13fb05bc453203d3e7a4aae302549d3a..f3c1a7de69b2bb90ceb26e443425a87980096a54 100644 (file)
--- a/include/common/ticks.h
+++ b/include/common/ticks.h
@@ -2,7 +2,7 @@
    include/common/ticks.h
    Functions and macros for manipulation of expiration timers
  
-  Copyright (C) 2000-2008 Willy Tarreau - w@1wt.eu
+  Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu
    
    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -22,20 +22,31 @@
  /*
   * Using a mix of milliseconds and timeval for internal timers is expensive and
   * overkill, because we don't need such a precision to compute timeouts.
- * So we're converting them to "ticks". Right now, one tick equals one
- * millisecond, but that might change in the future. Ticks are stored as 32bit
- * values, and sorted in four 30bit-wide rotating arrays, which means that any
- * timer may be 2^30 ms in the future, or 12.4 days. The ticks are designed to
- * wrap after they pass 2^32. That means that we cannot directly compare them,
- * but we can check the sign of their difference.
+ * So we're converting them to "ticks".
   *
+ * A tick is a representation of a date relative to another one, and is
+ * measured in milliseconds. The natural usage is to represent an absolute date
+ * relative to the current date. Since it is not practical to update all values
+ * each time the current date changes, instead we use the absolute date rounded
+ * down to fit in a tick. We then have to compare a tick to the current date to
+ * know whether it is in the future or in the past. If a tick is below the
+ * current date, it is in the past. If it is above, it is in the future. The
+ * values will wrap so we can't compare that easily, instead we check the sign
+ * of the difference between a tick and the current date.
+ *
+ * Proceeding like this allows us to manipulate dates that are stored in
+ * scalars with enough precision and range. For this reason, we store ticks in
+ * 32-bit integers. This is enough to handle dates that are between 24.85 days
+ * in the past and as much in the future.
+ * 
   * We must both support absolute dates (well in fact, dates relative to now+/-
   * 12 days), and intervals (for timeouts). Both types need an "eternity" magic
   * value. For optimal code generation, we'll use zero as the magic value
   * indicating that an expiration timer or a timeout is not set. We have to
   * check that we don't return this value when adding timeouts to <now>. If a
   * computation returns 0, we must increase it to 1 (which will push the timeout
- * 1 ms further).
+ * 1 ms further). For this reason, timeouts must not be added by hand but via
+ * the dedicated tick_add() function.
   */
  
  #ifndef _COMMON_TICKS_H
diff --git a/include/proto/task.h b/include/proto/task.h

index 1db30805c0d4a11a4a848e0d16809e44c044f806..67eb92499cad1cf10ba850ab9fc08e0ee4a3486a 100644 (file)
--- a/include/proto/task.h
+++ b/include/proto/task.h
@@ -29,16 +29,124 @@
  #include <common/memory.h>
  #include <common/mini-clist.h>
  #include <common/standard.h>
+#include <common/ticks.h>
  
  #include <types/task.h>
  
+/* Principle of the wait queue.
+ *
+ * We want to be able to tell whether an expiration date is before of after the
+ * current time <now>. We KNOW that expiration dates are never too far apart,
+ * because they are measured in ticks (milliseconds). We also know that almost
+ * all dates will be in the future, and that a very small part of them will be
+ * in the past, they are the ones which have expired since last time we checked
+ * them. Using ticks, we know if a date is in the future or in the past, but we
+ * cannot use that to store sorted information because that reference changes
+ * all the time.
+ *
+ * So we cut the time in 3 ranges, only one of which <now> can be. The base of
+ * the range holding <now> serves as a reference for as long as <now> remains
+ * in this range :
+ *   - previous : those ones are expired by definition (all before <now>)
+ *   - current  : some are expired, some are not (holds <now>)
+ *   - next     : none are expired (all after <now>)
+ *
+ * We use the higher two bits of the timers expressed in ticks to determine
+ * which range a timer is in, compared to <now> :
+ *
+ *   now     previous     current      next0     next1
+ * [31:30]   [31:30]      [31:30]     [31:30]   [31:30]
+ *    00        11           00          01        10
+ *    01        00           01          10        11
+ *    10        01           10          11        00
+ *    11        10           11          00        01
+ *
+ * By definition, <current> is the range containing <now> as well as all timers
+ * which have the same 2 high bits as <now>, <previous> is the range just
+ * before, which contains all timers whose high bits equal those of <now> minus
+ * 1. Last, <next> is composed of the two remaining ranges.
+ *
+ * For ease of implementation, the timers will then be stored into 4 queues 0-3
+ * determined by the 2 higher bits of the timer. The expiration algorithm is
+ * very simple :
+ *  - expire everything in <previous>=queue[((now>>30)-1)&3]
+ *  - expire from <current>=queue[(now>>30)&3] everything where timer >= now
+ *
+ * With this algorithm, it's possible to queue tasks meant to expire 24.8 days
+ * in the future, and still be able to detect events remaining unprocessed for
+ * the last 12.4 days! Note that the principle might be extended to any number
+ * of higher bits as long as there is only one range for expired tasks. For
+ * instance, using the 8 higher bits to index the range, we would have one past
+ * range of 4.6 hours (24 bits in ms), and 254 ranges in the future totalizing
+ * 49.3 days. This would eat more memory for very little added benefit though.
+ *
+ * Also, in order to maintain the ability to perform time comparisons, it is
+ * preferable to avoid using the <next1> range above, as values in this range
+ * may not easily be compared to <now> outside of these functions as it is the
+ * opposite of the <current> range, and <timer>-<now> may randomly be positive
+ * or negative. That means we're left with +/- 12.4 days timers.
+ *
+ * To keep timers ordered, we use 4 ebtrees [0..3]. We could have used instead
+ * of ticks, (seconds*1024)+milliseconds, as well as 1024th of seconds, but
+ * that makes comparisons with ticks more difficult, so in the end it's better
+ * to stick to the ticks.
+ *
+ * Another nice optimisation is to allow a timer to stay at an old place in the
+ * queue as long as it's not further than the real expiration date. That way,
+ * we use the tree as a place holder for a minorant of the real expiration
+ * date. Since we have a very low chance of hitting a timeout anyway, we can
+ * bounce the nodes to their right place when we scan the tree if we encounter
+ * a misplaced node once in a while. This even allows us not to remove the
+ * infinite timers from the wait queue.
+ *
+ * So, to summarize, we have :
+ *   - node->key always defines current position in the wait queue
+ *   - timer is the real expiration date (possibly infinite)
+ *   - node->key <= timer
+ *
+ * The run queue works similarly to the wait queue except that the current date
+ * is replaced by an insertion counter which can also wrap without any problem.
+ */
+
+/* the timers are stored as 32-bit values in the queues */
+#define TIMER_TICK_BITS       32
+#define TIMER_TREE_BITS        2
+#define TIMER_TREES           (1 << TIMER_TREE_BITS)
+#define TIMER_TREE_SHIFT      (TIMER_TICK_BITS - TIMER_TREE_BITS)
+#define TIMER_TREE_MASK       (TIMER_TREES - 1)
+#define TIMER_TICK_MASK       ((1U << (TIMER_TICK_BITS-1)) * 2 - 1)
+#define TIMER_SIGN_BIT        (1 << (TIMER_TICK_BITS - 1))
+
+/* a few exported variables */
  extern unsigned int run_queue;    /* run queue size */
  extern unsigned int niced_tasks;  /* number of niced tasks in the run queue */
  extern struct pool_head *pool2_task;
  extern struct task *last_timer;   /* optimization: last queued timer */
  
-/* perform minimal initializations, report 0 in case of error, 1 if OK. */
-int init_task();
+/* Convert ticks to timers. Must not be called with TICK_ETERNITY, which is not
+ * a problem inside tree scanning functions. Note that ticks are signed while
+ * timers are not.
+ */
+static inline unsigned int tick_to_timer(int tick)
+{
+       return tick & TIMER_TICK_MASK;
+}
+
+/* Convert timer to ticks. This operation will be correct only as long as
+ * timers are stored on a minimum of 32-bit. We take care of not returning zero
+ * which would mean "eternity" for a tick. Also note that ticks are signed and
+ * timers are not.
+ */
+static inline int timer_to_tick(unsigned int timer)
+{
+       return timer ? timer : 1;
+}
+
+/* returns a tree number based on a ticks value */
+static inline unsigned int timer_to_tree(unsigned int timer)
+{
+       return (timer >> TIMER_TREE_SHIFT) & TIMER_TREE_MASK;
+}       
  
  /* return 0 if task is in run queue, otherwise non-zero */
  static inline int task_in_rq(struct task *t)
@@ -158,6 +266,8 @@ void process_runnable_tasks(int *next);
   */
  void wake_expired_tasks(int *next);
  
+/* Perform minimal initializations, report 0 in case of error, 1 if OK. */
+int init_task();
  
  #endif /* _PROTO_TASK_H */
  
diff --git a/include/types/task.h b/include/types/task.h

index 1cc12a9f8f16d59af3a6b96e73d947c1aadad0bd..13e2aacd1f5d174bc418482b550ba61f3f7aa852 100644 (file)
--- a/include/types/task.h
+++ b/include/types/task.h
@@ -49,7 +49,7 @@ struct task {
         struct eb32_node wq;            /* ebtree node used to hold the task in the wait queue */
         struct eb32_node rq;            /* ebtree node used to hold the task in the run queue */
         int state;                      /* task state : bit field of TASK_* */
-       unsigned int expire;            /* next expiration time for this task */
+       int expire;                     /* next expiration date for this task, in ticks */
         struct task * (*process)(struct task *t);  /* the function which processes the task */
         void *context;                  /* the task's context */
         int nice;                       /* the task's current nice value from -1024 to +1024 */
diff --git a/src/session.c b/src/session.c

index 7d7b4b6d4b4e989851b1a1061014d867fcd5f84d..34ed663a6fd8d6109c2bec3670a6f401f7818742 100644 (file)
--- a/src/session.c
+++ b/src/session.c
@@ -1032,7 +1032,7 @@ resync_stream_interface:
  
  #ifdef DEBUG_DEV
                 /* this may only happen when no timeout is set or in case of an FSM bug */
-               if (!t->expire)
+               if (!tick_isset(t->expire))
                         ABORT_NOW();
  #endif
                 return t; /* nothing more to do */
diff --git a/src/task.c b/src/task.c

index 844862e3d41782618eadd0fccc1ffee428384fd6..f054e3170749469c60e9a292feecca166ba01ed4 100644 (file)
--- a/src/task.c
+++ b/src/task.c
@@ -26,109 +26,10 @@ unsigned int run_queue = 0;
  unsigned int niced_tasks = 0; /* number of niced tasks in the run queue */
  struct task *last_timer = NULL;  /* optimization: last queued timer */
  
-/* Principle of the wait queue.
- *
- * We want to be able to tell whether an expiration date is before of after the
- * current time <now>. We KNOW that expiration dates are never too far apart,
- * because they are already computed by adding integer numbers of milliseconds
- * to the current date.
- * We also know that almost all dates will be in the future, and that a very
- * small part of them will be in the past, they are the ones which have expired
- * since last time we checked them.
- *
- * The current implementation uses a wrapping time cut into 3 ranges :
- *   - previous : those ones are expired by definition
- *   - current  : some are expired, some are not
- *   - next     : none are expired
- *
- * We use the higher two bits of the timers expressed in ticks (milliseconds)
- * to determine which range a timer is in, compared to <now> :
- *
- *   now     previous     current      next0     next1
- * [31:30]   [31:30]      [31:30]     [31:30]   [31:30]
- *    00        11           00          01        10
- *    01        00           01          10        11
- *    10        01           10          11        00
- *    11        10           11          00        01
- *
- * By definition, <current> is the range containing <now> as well as all timers
- * which have the same 2 high bits as <now>, <previous> is the range just
- * before, which contains all timers whose high bits equal those of <now> minus
- * 1. Last, <next> is composed of the two remaining ranges.
- *
- * For ease of implementation, the timers will then be stored into 4 queues 0-3
- * determined by the 2 higher bits of the timer. The expiration algorithm is
- * very simple :
- *  - expire everything in <previous>=queue[((now>>30)-1)&3]
- *  - expire from <current>=queue[(now>>30)&3] everything where timer >= now
- *
- * With this algorithm, it's possible to queue tasks meant to expire 24.8 days
- * in the future, and still be able to detect events remaining unprocessed for
- * the last 12.4 days! Note that the principle might be extended to any number
- * of higher bits as long as there is only one range for expired tasks. For
- * instance, using the 8 higher bits to index the range, we would have one past
- * range of 4.6 hours (24 bits in ms), and 254 ranges in the future totalizing
- * 49.3 days. This would eat more memory for a very little added benefit.
- *
- * Also, in order to maintain the ability to perform time comparisons, it is
- * recommended to avoid using the <next1> range above, as values in this range
- * may not easily be compared to <now> outside of these functions as it is the
- * opposite of the <current> range, and <timer>-<now> may randomly be positive
- * or negative. That means we're left with +/- 12 days timers.
- *
- * To keep timers ordered, we use 4 ebtrees [0..3]. To keep computation low, we
- * may use (seconds*1024)+milliseconds, which preserves ordering eventhough we
- * can't do real computations on it. Future evolutions could make use of 1024th
- * of seconds instead of milliseconds, with the special value 0 avoided (and
- * replaced with 1), so that zero indicates the timer is not set.
- *
- * Another nice optimisation is to allow a timer to stay at an old place in the
- * queue as long as it's not further than the real expected timeout. We really
- * use the tree as a place holder for a minorant of the real expiration date.
- * Since we have very low chance of hitting a timeout anyway, we can bounce the
- * nodes to their right place when we scan the tree and encounter a misplaced
- * node once in a while. This even allows us not to remove the infinite timers.
- *
- * So, to summarize, we have :
- *   - node->key always defines current position in the tree
- *   - timer is the real expiration date (possibly infinite)
- *   - node->key <= timer
- */
-
-#define TIMER_TICK_BITS       32
-#define TIMER_TREE_BITS        2
-#define TIMER_TREES           (1 << TIMER_TREE_BITS)
-#define TIMER_TREE_SHIFT      (TIMER_TICK_BITS - TIMER_TREE_BITS)
-#define TIMER_TREE_MASK       (TIMER_TREES - 1)
-#define TIMER_TICK_MASK       ((1U << (TIMER_TICK_BITS-1)) * 2 - 1)
-#define TIMER_SIGN_BIT        (1 << (TIMER_TICK_BITS - 1))
-
  static struct eb_root timers[TIMER_TREES];  /* trees with MSB 00, 01, 10 and 11 */
  static struct eb_root rqueue[TIMER_TREES];  /* trees constituting the run queue */
  static unsigned int rqueue_ticks;           /* insertion count */
  
-/* returns an ordered key based on an expiration date. */
-static inline unsigned int timeval_to_ticks(const struct timeval *t)
-{
-       unsigned int key;
-
-       key  = ((unsigned int)t->tv_sec  * 1000) + ((unsigned int)t->tv_usec / 1000);
-       key &= TIMER_TICK_MASK;
-       return key;
-}       
-
-/* returns a tree number based on a ticks value */
-static inline unsigned int ticks_to_tree(unsigned int ticks)
-{
-       return (ticks >> TIMER_TREE_SHIFT) & TIMER_TREE_MASK;
-}       
-
-/* returns a tree number based on an expiration date. */
-static inline unsigned int timeval_to_tree(const struct timeval *t)
-{
-       return ticks_to_tree(timeval_to_ticks(t));
-}       
-
  /* Puts the task <t> in run queue at a position depending on t->nice. <t> is
   * returned. The nice value assigns boosts in 32th of the run queue size. A
   * nice value of -1024 sets the task to -run_queue*32, while a nice value of
@@ -156,7 +57,7 @@ struct task *__task_wakeup(struct task *t)
         /* clear state flags at the same time */
         t->state &= ~TASK_WOKEN_ANY;
  
-       eb32_insert(&rqueue[ticks_to_tree(t->rq.key)], &t->rq);
+       eb32_insert(&rqueue[timer_to_tree(t->rq.key)], &t->rq);
         return t;
  }
  
@@ -184,18 +85,19 @@ void task_queue(struct task *task)
                  * rely on wake_expired_tasks() to catch the node and move it to the
                  * proper place should it ever happen.
                  */
-               if (!task->expire || ((task->wq.key - task->expire) & TIMER_SIGN_BIT))
+               if (!tick_isset(task->expire) ||
+                   ((task->wq.key - tick_to_timer(task->expire)) & TIMER_SIGN_BIT))
                         return;
                 __task_unlink_wq(task);
         }
  
         /* the task is not in the queue now */
-       if (unlikely(!task->expire))
+       if (unlikely(!tick_isset(task->expire)))
                 return;
  
-       task->wq.key = task->expire;
+       task->wq.key = tick_to_timer(task->expire);
  #ifdef DEBUG_CHECK_INVALID_EXPIRATION_DATES
-       if ((task->wq.key - now_ms) & TIMER_SIGN_BIT)
+       if ((task->wq.key - tick_to_timer(now_ms)) & TIMER_SIGN_BIT)
                 /* we're queuing too far away or in the past (most likely) */
                 return;
  #endif
@@ -211,7 +113,7 @@ void task_queue(struct task *task)
                 eb_insert_dup(&last_timer->wq.node, &task->wq.node);
                 return;
         }
-       eb32_insert(&timers[ticks_to_tree(task->wq.key)], &task->wq);
+       eb32_insert(&timers[timer_to_tree(task->wq.key)], &task->wq);
         if (task->wq.node.bit == -1)
                 last_timer = task; /* we only want dup a tree's root */
         return;
@@ -237,17 +139,17 @@ void wake_expired_tasks(int *next)
          * non-expired entry.
          */
  
-       now_tree = ticks_to_tree(now_ms);
+       now_tree = timer_to_tree(tick_to_timer(now_ms));
         tree = (now_tree - 1) & TIMER_TREE_MASK;
         do {
                 eb = eb32_first(&timers[tree]);
                 while (eb) {
                         task = eb32_entry(eb, struct task, wq);
-                       if ((now_ms - eb->key) & TIMER_SIGN_BIT) {
+                       if ((tick_to_timer(now_ms) - eb->key) & TIMER_SIGN_BIT) {
                                 /* note that we don't need this check for the <previous>
                                  * tree, but it's cheaper than duplicating the code.
                                  */
-                               *next = eb->key;  /* when we want to revisit the tree */
+                               *next = timer_to_tick(eb->key);
                                 return;
                         }
  
@@ -311,7 +213,7 @@ void process_runnable_tasks(int *next)
         if (likely(niced_tasks))
                 max_processed /= 4;
  
-       tree = ticks_to_tree(rqueue_ticks);
+       tree = timer_to_tree(rqueue_ticks);
         stop = (tree + TIMER_TREES / 2) & TIMER_TREE_MASK;
         tree = (tree - 1) & TIMER_TREE_MASK;
author	Willy Tarreau <w@1wt.eu>
	Sun, 8 Mar 2009 14:53:06 +0000 (15:53 +0100)
committer	Willy Tarreau <w@1wt.eu>
	Sun, 8 Mar 2009 14:58:07 +0000 (15:58 +0100)
include/common/ticks.h		patch \| blob \| blame \| history
include/proto/task.h		patch \| blob \| blame \| history
include/types/task.h		patch \| blob \| blame \| history
src/session.c		patch \| blob \| blame \| history
src/task.c		patch \| blob \| blame \| history