From: Olivier Houchard <ohouchard@haproxy.com>
Date: Thu, 17 Apr 2025 14:31:44 +0000 (+0200)
Subject: MEDIUM: lb_fwrr: Use one ebtree per thread group.
X-Git-Tag: v3.2-dev11~3
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3758eab71c50180ca161e489789c9238688e778d;p=thirdparty%2Fhaproxy.git

MEDIUM: lb_fwrr: Use one ebtree per thread group.

When using the round-robin load balancer, the major source of contention
is the lbprm lock, that has to be held every time we pick a server.
To mitigate that, make it so there are one tree per thread-group, and
one lock per thread-group. That means we now have a lb_fwrr_per_tgrp
structure that will contain the two lb_fwrr_groups (active and backup) as well
as the lock to protect them in the per-thread lbprm struct, and all
fields in the struct server are now moved to the per-thread structure
too.
Those changes are mostly mechanical, and brings good performances
improvment, on a 64-cores AMD CPU, with 64 servers configured, we could
process about 620000 requests par second, and we now can process around
1400000 requests per second.
---

diff --git a/include/haproxy/backend-t.h b/include/haproxy/backend-t.h
index a27fad853..35f882f28 100644
--- a/include/haproxy/backend-t.h
+++ b/include/haproxy/backend-t.h
@@ -147,6 +147,7 @@
 /* LB parameters for all algorithms, with one instance per thread-group */
 struct lbprm_per_tgrp {
 	union {
+		struct lb_fwrr_per_tgrp fwrr;
 	};
 };
 /* LB parameters for all algorithms */
diff --git a/include/haproxy/lb_fwrr-t.h b/include/haproxy/lb_fwrr-t.h
index d9c3ef210..2e158b8b9 100644
--- a/include/haproxy/lb_fwrr-t.h
+++ b/include/haproxy/lb_fwrr-t.h
@@ -23,6 +23,7 @@
 #define _HAPROXY_LB_FWRR_T_H
 
 #include <import/ebtree-t.h>
+#include <haproxy/thread-t.h>
 
 /* This structure is used to apply fast weighted round robin on a server group */
 struct fwrr_group {
@@ -34,9 +35,13 @@ struct fwrr_group {
 	int curr_weight;        /* total weight of the current time range */
 };
 
-struct lb_fwrr {
+struct lb_fwrr_per_tgrp {
 	struct fwrr_group act;	/* weighted round robin on the active servers */
 	struct fwrr_group bck;	/* weighted round robin on the backup servers */
+	__decl_thread(HA_RWLOCK_T lock);
+};
+
+struct lb_fwrr {
 	int next_weight_act;    /* total weight of the next time range on active servers, for all trees */
 	int next_weight_bck;    /* total weight of the next time range on backup servers, for all trees */
 };
diff --git a/include/haproxy/server-t.h b/include/haproxy/server-t.h
index 5253cfc72..19b84a5f4 100644
--- a/include/haproxy/server-t.h
+++ b/include/haproxy/server-t.h
@@ -275,10 +275,15 @@ struct srv_per_thread {
 struct srv_per_tgroup {
 	struct queue queue;			/* pending connections */
 	struct server *server;                  /* pointer to the corresponding server */
+	struct eb32_node lb_node;               /* node used for tree-based load balancing */
+	struct server *next_full;               /* next server in the temporary full list */
 	unsigned int last_other_tgrp_served;	/* Last other tgrp we dequeued from */
 	unsigned int self_served;		/* Number of connection we dequeued from our own queue */
 	unsigned int dequeuing;                 /* non-zero = dequeuing in progress (atomic) */
 	unsigned int next_takeover;             /* thread ID to try to steal connections from next time */
+	struct eb_root *lb_tree;                 /* For LB algos with split between thread groups, the tree to be used, for each group */
+	unsigned npos, lpos;			/* next and last positions in the LB tree, protected by LB lock */
+	unsigned rweight;			/* remainder of weight in the current LB tree */
 } THREAD_ALIGNED(64);
 
 /* Configure the protocol selection for websocket */
@@ -348,7 +353,6 @@ struct server {
 	unsigned iweight,uweight, cur_eweight;	/* initial weight, user-specified weight, and effective weight */
 	unsigned wscore;			/* weight score, used during srv map computation */
 	unsigned next_eweight;			/* next pending eweight to commit */
-	unsigned rweight;			/* remainder of weight in the current LB tree */
 	unsigned cumulative_weight;		/* weight of servers prior to this one in the same group, for chash balancing */
 	int maxqueue;				/* maximum number of pending connections allowed */
 	unsigned int queueslength;		/* Sum of the length of each queue */
@@ -381,7 +385,6 @@ struct server {
 	 */
 	THREAD_PAD(63);
 	__decl_thread(HA_SPINLOCK_T lock);      /* may enclose the proxy's lock, must not be taken under */
-	unsigned npos, lpos;			/* next and last positions in the LB tree, protected by LB lock */
 	union {
 		struct eb32_node lb_node;       /* node used for tree-based load balancing */
 		struct list lb_list;            /* elem used for list-based load balancing */
@@ -392,7 +395,6 @@ struct server {
 			int lb_lock;                    /* make sure we are the only one updating the server */
 		};
 	};
-	struct server *next_full;               /* next server in the temporary full list */
 
 	/* usually atomically updated by any thread during parsing or on end of request */
 	THREAD_PAD(63);
diff --git a/src/lb_fwrr.c b/src/lb_fwrr.c
index 686d1ba57..395895161 100644
--- a/src/lb_fwrr.c
+++ b/src/lb_fwrr.c
@@ -15,13 +15,14 @@
 #include <haproxy/backend.h>
 #include <haproxy/queue.h>
 #include <haproxy/server-t.h>
+#include <haproxy/global.h>
 
 
-static inline void fwrr_remove_from_tree(struct server *s);
-static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
-static inline void fwrr_dequeue_srv(struct server *s);
-static void fwrr_get_srv(struct server *s);
-static void fwrr_queue_srv(struct server *s);
+static inline void fwrr_remove_from_tree(struct server *s, int tgid);
+static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s, int tgid);
+static inline void fwrr_dequeue_srv(struct server *s, int tgid);
+static void fwrr_get_srv(struct server *s, int tgid);
+static void fwrr_queue_srv(struct server *s, int tgid);
 
 
 /* This function updates the server trees according to server <srv>'s new
@@ -35,6 +36,7 @@ static void fwrr_queue_srv(struct server *s);
 static void fwrr_set_server_status_down(struct server *srv)
 {
 	struct proxy *p = srv->proxy;
+	int i;
 
 	if (!srv_lb_status_changed(srv))
 		return;
@@ -48,6 +50,12 @@ static void fwrr_set_server_status_down(struct server *srv)
 		/* server was already down */
 		goto out_update_backend;
 
+	for (i = 0; i < global.nbtgroups; i++) {
+		HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->per_tgrp[i].lbprm.fwrr.lock);
+		fwrr_dequeue_srv(srv, i + 1);
+		fwrr_remove_from_tree(srv, i + 1);
+		HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->per_tgrp[i].lbprm.fwrr.lock);
+	}
 
 	if (srv->flags & SRV_F_BACKUP) {
 		p->lbprm.fwrr.next_weight_bck -= srv->cur_eweight;
@@ -72,8 +80,6 @@ static void fwrr_set_server_status_down(struct server *srv)
 		p->srv_act--;
 	}
 
-	fwrr_dequeue_srv(srv);
-	fwrr_remove_from_tree(srv);
 
 out_update_backend:
 	/* check/update tot_used, tot_weight */
@@ -98,6 +104,7 @@ static void fwrr_set_server_status_up(struct server *srv)
 	struct proxy *p = srv->proxy;
 	struct fwrr_group *grp;
 	int next_weight;
+	int i;
 
 	if (!srv_lb_status_changed(srv))
 		return;
@@ -111,7 +118,6 @@ static void fwrr_set_server_status_up(struct server *srv)
 		/* server was already up */
 		goto out_update_backend;
 
-	grp = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
 
 	if (srv->flags & SRV_F_BACKUP) {
 		p->lbprm.fwrr.next_weight_bck += srv->next_eweight;
@@ -141,9 +147,14 @@ static void fwrr_set_server_status_up(struct server *srv)
 	}
 
 	/* note that eweight cannot be 0 here */
-	fwrr_get_srv(srv);
-	srv->npos = grp->curr_pos + (next_weight + grp->curr_weight - grp->curr_pos) / srv->next_eweight;
-	fwrr_queue_srv(srv);
+	for (i = 0; i < global.nbtgroups; i++) {
+		HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->per_tgrp[i].lbprm.fwrr.lock);
+		grp = (srv->flags & SRV_F_BACKUP) ? &p->per_tgrp[i].lbprm.fwrr.bck : &p->per_tgrp[i].lbprm.fwrr.act;
+		fwrr_get_srv(srv, i + 1);
+		srv->per_tgrp[i].npos = grp->curr_pos + (next_weight + grp->curr_weight - grp->curr_pos) / srv->next_eweight;
+		fwrr_queue_srv(srv, i + 1);
+		HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->per_tgrp[i].lbprm.fwrr.lock);
+	}
 
 out_update_backend:
 	/* check/update tot_used, tot_weight */
@@ -165,6 +176,7 @@ static void fwrr_update_server_weight(struct server *srv)
 	struct proxy *p = srv->proxy;
 	struct fwrr_group *grp;
 	int next_weight;
+	int i;
 
 	if (!srv_lb_status_changed(srv))
 		return;
@@ -198,50 +210,53 @@ static void fwrr_update_server_weight(struct server *srv)
 	if (srv->flags & SRV_F_BACKUP) {
 		p->lbprm.fwrr.next_weight_bck = p->lbprm.fwrr.next_weight_bck - srv->cur_eweight + srv->next_eweight;
 		next_weight = p->lbprm.tot_wbck = p->lbprm.fwrr.next_weight_bck;
-		grp = &p->lbprm.fwrr.bck;
 	} else {
 		p->lbprm.fwrr.next_weight_act = p->lbprm.fwrr.next_weight_act - srv->cur_eweight + srv->next_eweight;
 		next_weight = p->lbprm.tot_wact = p->lbprm.fwrr.next_weight_act;
-		grp = &p->lbprm.fwrr.act;
 	}
 
-	if (srv->lb_tree == grp->init) {
-		fwrr_dequeue_srv(srv);
-		fwrr_queue_by_weight(grp->init, srv);
-	}
-	else if (!srv->lb_tree) {
-		/* FIXME: server was down. This is not possible right now but
-		 * may be needed soon for slowstart or graceful shutdown.
-		 */
-		fwrr_dequeue_srv(srv);
-		fwrr_get_srv(srv);
-		srv->npos = grp->curr_pos + (next_weight + grp->curr_weight - grp->curr_pos) / srv->next_eweight;
-		fwrr_queue_srv(srv);
-	} else {
-		/* The server is either active or in the next queue. If it's
-		 * still in the active queue and it has not consumed all of its
-		 * places, let's adjust its next position.
-		 */
-		fwrr_get_srv(srv);
+	for (i = 0; i < global.nbtgroups; i++) {
+		HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->per_tgrp[i].lbprm.fwrr.lock);
+		grp = (srv->flags & SRV_F_BACKUP) ? &p->per_tgrp[i].lbprm.fwrr.bck : &p->per_tgrp[i].lbprm.fwrr.act;
+		if (srv->lb_tree == grp->init) {
+			fwrr_dequeue_srv(srv, i + 1);
+			fwrr_queue_by_weight(grp->init, srv, i + 1);
+		}
+		else if (!srv->lb_tree) {
+			/* FIXME: server was down. This is not possible right now but
+			 * may be needed soon for slowstart or graceful shutdown.
+			 */
+			fwrr_dequeue_srv(srv, i + 1);
+			fwrr_get_srv(srv, i + 1);
+			srv->per_tgrp[i].npos = grp->curr_pos + (next_weight + grp->curr_weight - grp->curr_pos) / srv->next_eweight;
+			fwrr_queue_srv(srv, i + 1);
+		} else {
+			/* The server is either active or in the next queue. If it's
+			 * still in the active queue and it has not consumed all of its
+			 * places, let's adjust its next position.
+			 */
+			fwrr_get_srv(srv, i + 1);
 
-		if (srv->next_eweight > 0) {
-			int prev_next = srv->npos;
-			int step = next_weight / srv->next_eweight;
+			if (srv->next_eweight > 0) {
+				int prev_next = srv->per_tgrp[i].npos;
+				int step = next_weight / srv->next_eweight;
 
-			srv->npos = srv->lpos + step;
-			srv->rweight = 0;
+				srv->per_tgrp[i].npos = srv->per_tgrp[i].lpos + step;
+				srv->per_tgrp[i].rweight = 0;
 
-			if (srv->npos > prev_next)
-				srv->npos = prev_next;
-			if (srv->npos < grp->curr_pos + 2)
-				srv->npos = grp->curr_pos + step;
-		} else {
-			/* push it into the next tree */
-			srv->npos = grp->curr_pos + grp->curr_weight;
-		}
+				if (srv->per_tgrp[i].npos > prev_next)
+					srv->per_tgrp[i].npos = prev_next;
+				if (srv->per_tgrp[i].npos < grp->curr_pos + 2)
+					srv->per_tgrp[i].npos = grp->curr_pos + step;
+			} else {
+				/* push it into the next tree */
+				srv->per_tgrp[i].npos = grp->curr_pos + grp->curr_weight;
+			}
 
-		fwrr_dequeue_srv(srv);
-		fwrr_queue_srv(srv);
+			fwrr_dequeue_srv(srv, i + 1);
+			fwrr_queue_srv(srv, i + 1);
+		}
+		HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->per_tgrp[i].lbprm.fwrr.lock);
 	}
 
 	update_backend_weight(p);
@@ -256,9 +271,9 @@ static void fwrr_update_server_weight(struct server *srv)
  *
  * The lbprm's lock must be held. The server's lock is not used.
  */
-static inline void fwrr_remove_from_tree(struct server *s)
+static inline void fwrr_remove_from_tree(struct server *s, int tgid)
 {
-	s->lb_tree = NULL;
+	s->per_tgrp[tgid - 1].lb_tree = NULL;
 }
 
 /* Queue a server in the weight tree <root>, assuming the weight is >0.
@@ -267,11 +282,11 @@ static inline void fwrr_remove_from_tree(struct server *s)
  *
  * The lbprm's lock must be held. The server's lock is not used.
  */
-static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
+static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s, int tgid)
 {
-	s->lb_node.key = SRV_EWGHT_MAX - s->next_eweight;
-	eb32_insert(root, &s->lb_node);
-	s->lb_tree = root;
+	s->per_tgrp[tgid - 1].lb_node.key = SRV_EWGHT_MAX - s->next_eweight;
+	eb32_insert(root, &s->per_tgrp[tgid - 1].lb_node);
+	s->per_tgrp[tgid - 1].lb_tree = root;
 }
 
 /* This function is responsible for building the weight trees in case of fast
@@ -282,6 +297,7 @@ void fwrr_init_server_groups(struct proxy *p)
 {
 	struct server *srv;
 	struct eb_root init_head = EB_ROOT;
+	int i;
 
 	p->lbprm.set_server_status_up   = fwrr_set_server_status_up;
 	p->lbprm.set_server_status_down = fwrr_set_server_status_down;
@@ -296,30 +312,36 @@ void fwrr_init_server_groups(struct proxy *p)
 	recount_servers(p);
 	update_backend_weight(p);
 
-	/* prepare the active servers group */
-	p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
-		p->lbprm.fwrr.next_weight_act = p->lbprm.tot_wact;
-	p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
-		p->lbprm.fwrr.act.t1 = init_head;
-	p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
-	p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
-
-	/* prepare the backup servers group */
-	p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
-		p->lbprm.fwrr.next_weight_bck = p->lbprm.tot_wbck;
-	p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
-		p->lbprm.fwrr.bck.t1 = init_head;
-	p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
-	p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
-
-	/* queue active and backup servers in two distinct groups */
-	for (srv = p->srv; srv; srv = srv->next) {
-		if (!srv_currently_usable(srv))
-			continue;
-		fwrr_queue_by_weight((srv->flags & SRV_F_BACKUP) ?
-				p->lbprm.fwrr.bck.init :
-				p->lbprm.fwrr.act.init,
-				srv);
+	for (i = 0; i < global.nbtgroups; i++) {
+		/* prepare the active servers group */
+		p->per_tgrp[i].lbprm.fwrr.act.curr_pos =
+			p->per_tgrp[i].lbprm.fwrr.act.curr_weight =
+			p->lbprm.fwrr.next_weight_act = p->lbprm.tot_wact;
+		p->per_tgrp[i].lbprm.fwrr.act.curr =
+			p->per_tgrp[i].lbprm.fwrr.act.t0 =
+			p->per_tgrp[i].lbprm.fwrr.act.t1 = init_head;
+		p->per_tgrp[i].lbprm.fwrr.act.init = &p->per_tgrp[i].lbprm.fwrr.act.t0;
+		p->per_tgrp[i].lbprm.fwrr.act.next = &p->per_tgrp[i].lbprm.fwrr.act.t1;
+
+		/* prepare the backup servers group */
+		p->per_tgrp[i].lbprm.fwrr.bck.curr_pos =
+			p->per_tgrp[i].lbprm.fwrr.bck.curr_weight =
+			p->lbprm.fwrr.next_weight_bck = p->lbprm.tot_wbck;
+		p->per_tgrp[i].lbprm.fwrr.bck.curr =
+			p->per_tgrp[i].lbprm.fwrr.bck.t0 =
+			p->per_tgrp[i].lbprm.fwrr.bck.t1 = init_head;
+		p->per_tgrp[i].lbprm.fwrr.bck.init = &p->per_tgrp[i].lbprm.fwrr.bck.t0;
+		p->per_tgrp[i].lbprm.fwrr.bck.next = &p->per_tgrp[i].lbprm.fwrr.bck.t1;
+
+		/* queue active and backup servers in two distinct groups */
+		for (srv = p->srv; srv; srv = srv->next) {
+			if (!srv_currently_usable(srv))
+				continue;
+			fwrr_queue_by_weight((srv->flags & SRV_F_BACKUP) ?
+					p->per_tgrp[i].lbprm.fwrr.bck.init :
+					p->per_tgrp[i].lbprm.fwrr.act.init,
+					srv, i + 1);
+		}
 	}
 }
 
@@ -327,9 +349,9 @@ void fwrr_init_server_groups(struct proxy *p)
  *
  * The lbprm's lock must be held. The server's lock is not used.
  */
-static inline void fwrr_dequeue_srv(struct server *s)
+static inline void fwrr_dequeue_srv(struct server *s, int tgid)
 {
-	eb32_delete(&s->lb_node);
+	eb32_delete(&s->per_tgrp[tgid - 1].lb_node);
 }
 
 /* queues a server into the appropriate group and tree depending on its
@@ -338,16 +360,17 @@ static inline void fwrr_dequeue_srv(struct server *s)
  *
  * The lbprm's lock must be held. The server's lock is not used.
  */
-static void fwrr_queue_srv(struct server *s)
+static void fwrr_queue_srv(struct server *s, int tgid)
 {
 	struct proxy *p = s->proxy;
 	struct fwrr_group *grp;
 	int next_weight;
 
-	grp = (s->flags & SRV_F_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
 	if (s->flags & SRV_F_BACKUP) {
+		grp = &p->per_tgrp[tgid - 1].lbprm.fwrr.bck;
 		next_weight = p->lbprm.fwrr.next_weight_bck;
 	} else {
+		grp = &p->per_tgrp[tgid - 1].lbprm.fwrr.act;
 		next_weight = p->lbprm.fwrr.next_weight_act;
 	}
 
@@ -355,15 +378,15 @@ static void fwrr_queue_srv(struct server *s)
 	 * which does not fit into the theoretical new window.
 	 */
 	if (!srv_willbe_usable(s)) {
-		fwrr_remove_from_tree(s);
+		fwrr_remove_from_tree(s, tgid);
 	}
 	else if (s->next_eweight <= 0 ||
-		 s->npos >= 2 * grp->curr_weight ||
-		 s->npos >= grp->curr_weight + next_weight) {
+	    s->per_tgrp[tgid - 1].npos >= 2 * grp->curr_weight ||
+	    s->per_tgrp[tgid - 1].npos >= grp->curr_weight + next_weight) {
 		/* put into next tree, and readjust npos in case we could
 		 * finally take this back to current. */
-		s->npos -= grp->curr_weight;
-		fwrr_queue_by_weight(grp->next, s);
+		s->per_tgrp[tgid - 1].npos -= grp->curr_weight;
+		fwrr_queue_by_weight(grp->next, s, tgid);
 	}
 	else {
 		/* The sorting key is stored in units of s->npos * user_weight
@@ -373,11 +396,11 @@ static void fwrr_queue_srv(struct server *s)
 		 * overflow. With this formula, the result is always positive,
 		 * so we can use eb32_insert().
 		 */
-		s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
-			(unsigned)(SRV_EWGHT_MAX + s->rweight - s->next_eweight) / BE_WEIGHT_SCALE;
+		s->per_tgrp[tgid - 1].lb_node.key = SRV_UWGHT_RANGE * s->per_tgrp[tgid - 1].npos +
+			(unsigned)(SRV_EWGHT_MAX + s->per_tgrp[tgid - 1].rweight - s->next_eweight) / BE_WEIGHT_SCALE;
 
-		eb32_insert(&grp->curr, &s->lb_node);
-		s->lb_tree = &grp->curr;
+		eb32_insert(&grp->curr, &s->per_tgrp[tgid - 1].lb_node);
+		s->per_tgrp[tgid - 1].lb_tree = &grp->curr;
 	}
 }
 
@@ -385,56 +408,57 @@ static void fwrr_queue_srv(struct server *s)
  *
  * The lbprm's lock must be held. The server's lock is not used.
  */
-static inline void fwrr_get_srv_init(struct server *s)
+static inline void fwrr_get_srv_init(struct server *s, int tgid)
 {
-	s->npos = s->rweight = 0;
+	s->per_tgrp[tgid - 1].npos = s->per_tgrp[tgid - 1].rweight = 0;
 }
 
 /* prepares a server when extracting it from the "next" tree.
  *
  * The lbprm's lock must be held. The server's lock is not used.
  */
-static inline void fwrr_get_srv_next(struct server *s)
+static inline void fwrr_get_srv_next(struct server *s, int tgid)
 {
 	struct fwrr_group *grp = (s->flags & SRV_F_BACKUP) ?
-		&s->proxy->lbprm.fwrr.bck :
-		&s->proxy->lbprm.fwrr.act;
+		&s->proxy->per_tgrp[tgid - 1].lbprm.fwrr.bck :
+		&s->proxy->per_tgrp[tgid - 1].lbprm.fwrr.act;
 
-	s->npos += grp->curr_weight;
+	s->per_tgrp[tgid - 1].npos += grp->curr_weight;
 }
 
 /* prepares a server when it was marked down.
  *
  * The lbprm's lock must be held. The server's lock is not used.
  */
-static inline void fwrr_get_srv_down(struct server *s)
+static inline void fwrr_get_srv_down(struct server *s, int tgid)
 {
 	struct fwrr_group *grp = (s->flags & SRV_F_BACKUP) ?
-		&s->proxy->lbprm.fwrr.bck :
-		&s->proxy->lbprm.fwrr.act;
+		&s->proxy->per_tgrp[tgid - 1].lbprm.fwrr.bck :
+		&s->proxy->per_tgrp[tgid - 1].lbprm.fwrr.act;
 
-	s->npos = grp->curr_pos;
+	s->per_tgrp[tgid - 1].npos = grp->curr_pos;
 }
 
 /* prepares a server when extracting it from its tree.
  *
  * The lbprm's lock must be held. The server's lock is not used.
  */
-static void fwrr_get_srv(struct server *s)
+static void fwrr_get_srv(struct server *s, int tgid)
 {
 	struct proxy *p = s->proxy;
+
 	struct fwrr_group *grp = (s->flags & SRV_F_BACKUP) ?
-		&p->lbprm.fwrr.bck :
-		&p->lbprm.fwrr.act;
+	    &p->per_tgrp[tgid - 1].lbprm.fwrr.bck :
+	    &p->per_tgrp[tgid - 1].lbprm.fwrr.act;
 
-	if (s->lb_tree == grp->init) {
-		fwrr_get_srv_init(s);
+	if (s->per_tgrp[tgid - 1].lb_tree == grp->init) {
+		fwrr_get_srv_init(s, tgid);
 	}
-	else if (s->lb_tree == grp->next) {
-		fwrr_get_srv_next(s);
+	else if (s->per_tgrp[tgid - 1].lb_tree == grp->next) {
+		fwrr_get_srv_next(s, tgid);
 	}
-	else if (s->lb_tree == NULL) {
-		fwrr_get_srv_down(s);
+	else if (s->per_tgrp[tgid - 1].lb_tree == NULL) {
+		fwrr_get_srv_down(s, tgid);
 	}
 }
 
@@ -462,13 +486,15 @@ static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
 {
 	struct eb32_node *node1;
 	struct eb32_node *node2;
+	struct srv_per_tgroup *per_tgrp;
 	struct server *s1 = NULL;
 	struct server *s2 = NULL;
 
 	node1 = eb32_first(&grp->curr);
 	if (node1) {
-		s1 = eb32_entry(node1, struct server, lb_node);
-		if (s1->cur_eweight && s1->npos <= grp->curr_pos)
+		per_tgrp = eb32_entry(node1, struct srv_per_tgroup, lb_node);
+		s1 = per_tgrp->server;
+		if (s1->cur_eweight && s1->per_tgrp[tgid - 1].npos <= grp->curr_pos)
 			return s1;
 	}
 
@@ -478,9 +504,10 @@ static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
 	 */
 	node2 = eb32_first(grp->init);
 	if (node2) {
-		s2 = eb32_entry(node2, struct server, lb_node);
+		per_tgrp = eb32_entry(node2, struct srv_per_tgroup, lb_node);
+		s2 = per_tgrp->server;
 		if (s2->cur_eweight) {
-			fwrr_get_srv_init(s2);
+			fwrr_get_srv_init(s2, tgid);
 			return s2;
 		}
 	}
@@ -499,18 +526,18 @@ static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s
 	if (!eweight)
 		return;
 
-	if (!s->npos) {
+	if (!s->per_tgrp[tgid - 1].npos) {
 		/* first time ever for this server */
-		s->npos     = grp->curr_pos;
+		s->per_tgrp[tgid - 1].npos     = grp->curr_pos;
 	}
 
-	s->lpos     = s->npos;
-	s->lpos    += next_weight / eweight;
-	s->rweight += next_weight % eweight;
+	s->per_tgrp[tgid - 1].lpos     = s->per_tgrp[tgid - 1].npos;
+	s->per_tgrp[tgid - 1].npos    += next_weight / eweight;
+	s->per_tgrp[tgid - 1].rweight += next_weight % eweight;
 
-	if (s->rweight >= eweight) {
-		s->rweight -= eweight;
-		s->npos++;
+	if (s->per_tgrp[tgid - 1].rweight >= eweight) {
+		s->per_tgrp[tgid - 1].rweight -= eweight;
+		s->per_tgrp[tgid - 1].npos++;
 	}
 }
 
@@ -527,9 +554,9 @@ struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
 	int switched;
 	int next_weight;
 
-	HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+	HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->per_tgrp[tgid - 1].lbprm.fwrr.lock);
 	if (p->srv_act) {
-		grp = &p->lbprm.fwrr.act;
+		grp = &p->per_tgrp[tgid - 1].lbprm.fwrr.act;
 		next_weight = p->lbprm.fwrr.next_weight_act;
 	} else if (p->lbprm.fbck) {
 		srv = p->lbprm.fbck;
@@ -537,7 +564,7 @@ struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
 	}
 	else if (p->srv_bck) {
 		next_weight = p->lbprm.fwrr.next_weight_bck;
-		grp = &p->lbprm.fwrr.bck;
+		grp = &p->per_tgrp[tgid - 1].lbprm.fwrr.bck;
 	} else {
 		srv = NULL;
 		goto out;
@@ -577,7 +604,7 @@ struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
 		 * to a better place afterwards.
 		 */
 		fwrr_update_position(grp, srv, next_weight);
-		fwrr_dequeue_srv(srv);
+		fwrr_dequeue_srv(srv, tgid);
 		grp->curr_pos++;
 		if (!srv->maxconn || (!srv->queueslength && srv->served < srv_dynamic_maxconn(srv))) {
 			/* make sure it is not the server we are trying to exclude... */
@@ -589,13 +616,13 @@ struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
 
 		/* the server is saturated or avoided, let's chain it for later reinsertion.
 		 */
-		srv->next_full = full;
+		srv->per_tgrp[tgid - 1].next_full = full;
 		full = srv;
 	}
 
  take_this_one:
 	/* OK, we got the best server, let's update it */
-	fwrr_queue_srv(srv);
+	fwrr_queue_srv(srv, tgid);
 
  requeue_servers:
 	/* Requeue all extracted servers. If full==srv then it was
@@ -611,8 +638,8 @@ struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
 			 */
 			do {
 				if (likely(full != srv))
-					fwrr_queue_by_weight(grp->init, full);
-				full = full->next_full;
+					fwrr_queue_by_weight(grp->init, full, tgid);
+				full = full->per_tgrp[tgid - 1].next_full;
 			} while (full);
 		} else {
 			/* requeue all extracted servers just as if they were consumed
@@ -620,13 +647,13 @@ struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
 			 */
 			do {
 				if (likely(full != srv))
-					fwrr_queue_srv(full);
-				full = full->next_full;
+					fwrr_queue_srv(full, tgid);
+				full = full->per_tgrp[tgid - 1].next_full;
 			} while (full);
 		}
 	}
  out:
-	HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+	HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->per_tgrp[tgid - 1].lbprm.fwrr.lock);
 	return srv;
 }