From: Alan T. DeKok Date: Thu, 24 Jul 2025 12:50:39 +0000 (+0200) Subject: minor fixes X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ff413b105edb73e6c7c931afaa4266c98b0e0038;p=thirdparty%2Ffreeradius-server.git minor fixes add "todo" for consistent-keyed-balance. This should be addressed before the next release. For non-redundant types, if the chosen home server is alive, then return that immediately without going through the loop. Otherwise fall back to load-balance. This is a change from ealier versions, and should help with better load balancing, instead of always picking the "next" home server. Update the documentation to match, and document keyed-load-balance --- diff --git a/raddb/proxy.conf b/raddb/proxy.conf index cf0697d408..f6413a14f9 100644 --- a/raddb/proxy.conf +++ b/raddb/proxy.conf @@ -614,8 +614,7 @@ home_server_pool my_auth_failover { # # client-balance - the home server is chosen by hashing the # source IP address of the packet. If that home server - # is down, the next one in the list is used, just as - # with "fail-over". + # is down, then "load-balance" method is used. # # There is no way of predicting which source IP will map # to which home server. @@ -626,8 +625,8 @@ home_server_pool my_auth_failover { # # client-port-balance - the home server is chosen by hashing # the source IP address and source port of the packet. - # If that home server is down, the next one in the list - # is used, just as with "fail-over". + # If that home server is down, then "load-balance" + # method is used. # # This method provides slightly better load balancing # for EAP sessions than "client-balance". However, it @@ -642,15 +641,37 @@ home_server_pool my_auth_failover { # server = (hash % num_servers_in_pool). # # If there is no Load-Balance-Key in the control items, - # the load balancing method is identical to "load-balance". + # or if the chosen server is down, then the + # "load-balance" method is used. # # For most non-EAP authentication methods, The User-Name # attribute provides a good key. An "unlang" policy can # be used to copy the User-Name to the Load-Balance-Key # attribute. This method may not work for EAP sessions, # as the User-Name outside of the TLS tunnel is often - # static, e.g. "anonymous@realm". - # + # static, e.g. "anonymous@realm". In that case, the key + # should include both the User-Name, and the + # Calling-Station-Id, which helps to make the hash more + # evenly distributed. + # + # consistent-keyed-balance - the server uses consistent + # hashing to pick a home server. If all home servers + # are up, then this method is equivalent to + # keyed-balance. + # + # If the first chosen home server is down, then a new + # home server is chosen using consistent hashing. The + # "consistent" portion means that the same key will map + # to the same "second chosen" home server. If that + # server is down the same key will map to the same + # "third chosen" home server, etc. + + # This method is most useful for EAP, where all packets + # for the same authentication session should take the + # same route through a proxy fabric. In the event of a + # failure of one home server, all packets for one EAP + # session will still be routed through the same home + # server. # # The default type is fail-over. type = fail-over diff --git a/src/main/realms.c b/src/main/realms.c index 41894fc477..8c2dadc74e 100644 --- a/src/main/realms.c +++ b/src/main/realms.c @@ -2957,6 +2957,13 @@ static bool home_server_active(REQUEST *request, home_server_t *home) * situation. * * @todo - if there's only one server alive, just pick that? + * + * @todo - move to a home_server_id_t structure, which contains a + * home_server_t* and a uin32_t id. We can then allocate 8-16 + * IDs per home server, which will help with load balancing. If + * each home server has only one ID, there is a chance that two + * will be randomly assigned right next to each other. That + * results in bad load balancing. */ static home_server_t *home_server_by_consistent_key(REQUEST *request, home_pool_t *pool, uint32_t hash) { @@ -3065,8 +3072,6 @@ home_server_t *home_server_ldb(char const *realmname, * Determine how to pick choose the home server. */ switch (pool->type) { - - /* * For load-balancing by client IP address, we * pick a home server by hashing the client IP. @@ -3091,7 +3096,23 @@ home_server_t *home_server_ldb(char const *realmname, hash = 0; break; } + + pick_matching_server: + /* + * Try the matching server first. If it's alive, we return it. + * + * Otherwise we fall back to just picking a random one. + */ start = hash % pool->num_home_servers; + found = pool->servers[start]; + if (home_server_active(request, found)) return found; + + /* + * The matching one is dead. We then use the + * "load-balance" algorithm to pick the server. + */ + found = NULL; + start = 0; break; case HOME_POOL_CLIENT_PORT_BALANCE: @@ -3112,19 +3133,18 @@ home_server_t *home_server_ldb(char const *realmname, } hash = fr_hash_update(&request->packet->src_port, sizeof(request->packet->src_port), hash); - start = hash % pool->num_home_servers; - break; + goto pick_matching_server; case HOME_POOL_KEYED_BALANCE: if ((vp = fr_pair_find_by_num(request->config, PW_LOAD_BALANCE_KEY, 0, TAG_ANY)) != NULL) { hash = fr_hash(vp->vp_strvalue, vp->vp_length); start = hash % pool->num_home_servers; - break; + goto pick_matching_server; } /* FALL-THROUGH */ - case HOME_POOL_LOAD_BALANCE: case HOME_POOL_FAIL_OVER: + case HOME_POOL_LOAD_BALANCE: start = 0; break; @@ -3151,7 +3171,6 @@ home_server_t *home_server_ldb(char const *realmname, default: /* this shouldn't happen... */ start = 0; break; - } /* @@ -3160,6 +3179,10 @@ home_server_t *home_server_ldb(char const *realmname, * it. If it is too busy, skip it. * * Otherwise, use it. + * + * The difference between fail-over (i.e redundant) and + * load balance is that redundant always starts at the + * first one, and load balance starts at a random one. */ for (count = 0; count < pool->num_home_servers; count++) { home_server_t *home = pool->servers[(start + count) % pool->num_home_servers]; @@ -3179,9 +3202,14 @@ home_server_t *home_server_ldb(char const *realmname, } /* - * We've found the first "live" one. Use that. + * For fail-over, we just pick the first one + * which is alive. + * + * For all other methods, we load balance among + * all servers, picking the least busy home + * server. */ - if (pool->type != HOME_POOL_LOAD_BALANCE) { + if (pool->type == HOME_POOL_FAIL_OVER) { found = home; break; }