[CLEANUP] fwrr: ensure that we never overflow in placements

author Willy Tarreau <w@1wt.eu>

Sun, 2 Dec 2007 10:01:23 +0000 (11:01 +0100)

committer Willy Tarreau <w@1wt.eu>

Sun, 2 Dec 2007 10:01:23 +0000 (11:01 +0100)
author Willy Tarreau <w@1wt.eu>
Sun, 2 Dec 2007 10:01:23 +0000 (11:01 +0100)
committer Willy Tarreau <w@1wt.eu>
Sun, 2 Dec 2007 10:01:23 +0000 (11:01 +0100)
diff --git a/include/types/backend.h b/include/types/backend.h

index e392a7b0ab8507ea61f5cb585c9aa91fac5775dd..2b35860f11e8c9a02ce76d8584fff5494411611c 100644 (file)
--- a/include/types/backend.h
+++ b/include/types/backend.h
@@ -44,7 +44,16 @@
  #define BE_LB_ALGO_PH  (BE_LB_PROP_L7  | 0x04) /* balance on URL parameter hash */
  
  /* various constants */
-#define BE_WEIGHT_SCALE 256             /* scale between user weight and effective weight */
+
+/* The scale factor between user weight an effective weight allows smooth
+ * weight modulation even with small weights (eg: 1). It should not be too high
+ * though because it limits the number of servers in FWRR mode in order to
+ * prevent any integer overflow. The max number of servers per backend is
+ * limited to about 2^32/255^2/scale ~= 66051/scale. A scale of 16 looks like
+ * a good value, as it allows more than 4000 servers per backend while leaving
+ * modulation steps of about 6% for servers with the lowest weight (1).
+ */
+#define BE_WEIGHT_SCALE 16
  
  #endif /* _TYPES_BACKEND_H */
  
diff --git a/include/types/server.h b/include/types/server.h

index 36ae7cb404b4f16230e7e2c220566c645b0a06bf..614775bec060086cf07c6fdd8b5b3b7bf5cf37b6 100644 (file)
--- a/include/types/server.h
+++ b/include/types/server.h
@@ -62,6 +62,12 @@
  #define SRV_CHK_RUNNING 0x0002   /* server seen as running */
  #define SRV_CHK_DISABLE 0x0004   /* server returned a "disable" code */
  
+/* various constants */
+#define SRV_UWGHT_RANGE 256
+#define SRV_UWGHT_MAX   (SRV_UWGHT_RANGE - 1)
+#define SRV_EWGHT_RANGE (SRV_UWGHT_RANGE * BE_WEIGHT_SCALE)
+#define SRV_EWGHT_MAX   (SRV_UWGHT_MAX   * BE_WEIGHT_SCALE)
+
  struct server {
         struct server *next;
         int state;                              /* server state (SRV_*) */
diff --git a/src/backend.c b/src/backend.c

index 4dc2b9caf0d081d7c891471ce4d83711e9fa27cf..1eed377e9aff5f3b6740f05def14b4d357cf1909 100644 (file)
--- a/src/backend.c
+++ b/src/backend.c
@@ -511,8 +511,7 @@ static inline void fwrr_remove_from_tree(struct server *s)
   */
  static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
  {
-       /* eweight can be as high as 256*255 */
-       s->lb_node.key = BE_WEIGHT_SCALE*255 - s->eweight;
+       s->lb_node.key = SRV_EWGHT_MAX - s->eweight;
         eb32_insert(root, &s->lb_node);
         s->lb_tree = root;
  }
@@ -598,17 +597,17 @@ static void fwrr_queue_srv(struct server *s)
                 fwrr_queue_by_weight(grp->next, s);
         }
         else {
-               /* FIXME: we want to multiply by a constant to avoid overrides
-                * after weight changes, but this can easily overflow on 32-bit
-                * values. We need to change this for a 64-bit tree, and keep
-                * the 65536 factor for optimal smoothness (both rweight and
-                * eweight are 16 bit entities). s->npos is bound by the number
-                * of servers times the maximum eweight (~= nsrv << 16).
+               /* The sorting key is stored in units of s->npos * user_weight
+                * in order to avoid overflows. As stated in backend.h, the
+                * lower the scale, the rougher the weights modulation, and the
+                * higher the scale, the lower the number of servers without
+                * overflow. With this formula, the result is always positive,
+                * so we can use eb3é_insert().
                  */
-               //s->lb_node.key = grp->curr_weight * s->npos + s->rweight - s->eweight;
-               //s->lb_node.key = 65536 * s->npos + s->rweight - s->eweight;
-               s->lb_node.key = 16 * s->npos + (s->rweight - s->eweight) / 4096;
-               eb32i_insert(&grp->curr, &s->lb_node);
+               s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
+                       (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE;
+
+               eb32_insert(&grp->curr, &s->lb_node);
                 s->lb_tree = &grp->curr;
         }
  }
diff --git a/tests/filltab25.c b/tests/filltab25.c

index 8b983f4afbf21e2b33979fc8798d45c20ae168e8..02802bdb8bc2e2ce9aa3c1f5f97f112c89f4b6cd 100644 (file)
--- a/tests/filltab25.c
+++ b/tests/filltab25.c
@@ -35,7 +35,7 @@ int p;          /* current position, between sw..2sw-1 */
  /* queue a server in the weights tree */
  void queue_by_weight(struct eb_root *root, struct srv *s) {
         s->node.key = 255 - s->w;
-       eb32i_insert(root, &s->node);
+       eb32_insert(root, &s->node);
         s->tree = root;
  }
  
@@ -43,7 +43,7 @@ void queue_by_weight(struct eb_root *root, struct srv *s) {
  void queue_by_weight_0(struct eb_root *root, struct srv *s) {
         if (s->w) {
                 s->node.key = 255 - s->w;
-               eb32i_insert(root, &s->node);
+               eb32_insert(root, &s->node);
                 s->tree = root;
         } else {
                 s->tree = NULL;
@@ -62,13 +62,25 @@ void put_srv(struct srv *s) {
                 /* put into next tree */
                 s->next -= sw; // readjust next in case we could finally take this back to current.
                 queue_by_weight_0(next_tree, s);
-       } else { /* FIXME: sw can be smaller than s->rem! */
-               //s->node.key = sw * s->next + s->rem - s->w;
-               //s->node.key = 65536 * s->next + s->rem - s->w;
-               //s->node.key = 256*s->next + (s->rem / 256);
-               //s->node.key = 256*s->next + ((s->rem - s->w) / 256);
-               s->node.key = 16*s->next + ((s->rem - s->w) / 4096);
-               eb32i_insert(&tree_0, &s->node);
+       } else {
+               // The overflow problem is caused by the scale we want to apply to user weight
+               // to turn it into effective weight. Since this is only used to provide a smooth
+               // slowstart on very low weights (1), it is a pure waste. Thus, we just have to
+               // apply a small scaling factor and warn the user that slowstart is not very smooth
+               // on low weights.
+               // The max key is about ((scale*maxw)*(scale*maxw)*nbsrv)/ratio  (where the ratio is
+               // the arbitrary divide we perform in the examples above). Assuming that ratio==scale,
+               // this translates to maxkey=scale*maxw^2*nbsrv, so
+               //    max_nbsrv=2^32/255^2/scale ~= 66051/scale
+               // Using a scale of 16 is enough to support 4000 servers without overflow, providing
+               // 6% steps during slowstart.
+
+               s->node.key = 256 * s->next + (16*255 + s->rem - s->w) / 16;
+
+               /* check for overflows */
+               if ((int)s->node.key < 0)
+                       printf(" OV: srv=%p w=%d rem=%d next=%d key=%d", s, s->w, s->rem, s->next, s->node.key);
+               eb32_insert(&tree_0, &s->node);
                 s->tree = &tree_0;
         }
  }
@@ -122,6 +134,7 @@ struct srv *get_next_server() {
                         get_srv_init(s);
                         if (s->w == 0)
                                 node = NULL;
+                       s->node.key = 0; // do not display random values
                 }
         }
         if (node)
@@ -299,8 +312,8 @@ main(int argc, char **argv) {
         next_iteration:
                 p++;
                 conns++;
-               if (/*conns == 30*/ /**/random()%1000 == 0/**/) {
-                       int w = /*20*//**/random()%10000/**/;
+               if (/*conns == 30*/ /**/random()%100 == 0/**/) {
+                       int w = /*20*//**/random()%4096/**/;
                         int num = /*1*//**/random()%nsrv/**/;
                         struct srv *s = &srv[num];
author	Willy Tarreau <w@1wt.eu>
	Sun, 2 Dec 2007 10:01:23 +0000 (11:01 +0100)
committer	Willy Tarreau <w@1wt.eu>
	Sun, 2 Dec 2007 10:01:23 +0000 (11:01 +0100)
include/types/backend.h		patch \| blob \| blame \| history
include/types/server.h		patch \| blob \| blame \| history
src/backend.c		patch \| blob \| blame \| history
tests/filltab25.c		patch \| blob \| blame \| history