]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MINOR: cpu-topo: use cpufreq before acpi cppc
authorWilly Tarreau <w@1wt.eu>
Wed, 12 Mar 2025 12:28:24 +0000 (13:28 +0100)
committerWilly Tarreau <w@1wt.eu>
Fri, 14 Mar 2025 17:30:30 +0000 (18:30 +0100)
The acpi_cppc method was found to take about 5ms per CPU on a 64-core
EPYC system, which is plain unacceptable as it delays the boot by half
a second. Let's use the less accurate cpufreq first, which should be
sufficient anyway since many systems do not have acpi_cppc. We'll only
fall back to acpi_cppc for systems without cpufreq. If it were to be
an issue over time, we could also automatically consider that all
threads of the same core or even of the same cluster run at the same
speed (when a cluster is known to be accurate).

src/cpu_topo.c

index 47a97aea61e69af317e73b6a30b764b2d74ca053..3a7253e51c966ab4c05dc3f71025b600817381e3 100644 (file)
@@ -413,22 +413,25 @@ int cpu_detect_topology(void)
                 * as high as 260 were seen there. Note that only nominal_perf
                 * is trustable, as nominal_freq may return zero. It's also
                 * more reliable than the max cpufreq values because it doesn't
-                * seem to take into account the die quality.
+                * seem to take into account the die quality. However, acpi_cppc
+                * can be super slow on some systems (5ms per access noticed on
+                * a 64-core EPYC), making haproxy literally take seconds to
+                * start just due to this. Thus we start with cpufreq and fall
+                * back to acpi_cppc. If it becomes an issue, we could imagine
+                * forcing the value to all members of the same core and even
+                * cluster.
                 */
                if (ha_cpu_topo[cpu].capa < 0 &&
-                   read_line_to_trash(NUMA_DETECT_SYSTEM_SYSFS_PATH "/cpu/cpu%d/acpi_cppc/nominal_perf", cpu) >= 0) {
+                   read_line_to_trash(NUMA_DETECT_SYSTEM_SYSFS_PATH "/cpu/cpu%d/cpufreq/scaling_max_freq", cpu) >= 0) {
+                       /* This is in kHz, turn it to MHz to stay below 32k */
                        if (trash.data)
-                               ha_cpu_topo[cpu].capa = str2uic(trash.area);
+                               ha_cpu_topo[cpu].capa = (str2uic(trash.area) + 999U) / 1000U;
                }
 
-               /* Finally if none of them is available we can have a look at
-                * cpufreq's max cpu frequency.
-                */
                if (ha_cpu_topo[cpu].capa < 0 &&
-                   read_line_to_trash(NUMA_DETECT_SYSTEM_SYSFS_PATH "/cpu/cpu%d/cpufreq/scaling_max_freq", cpu) >= 0) {
-                       /* This is in kHz turn it to MHz to stay below 32k */
+                   read_line_to_trash(NUMA_DETECT_SYSTEM_SYSFS_PATH "/cpu/cpu%d/acpi_cppc/nominal_perf", cpu) >= 0) {
                        if (trash.data)
-                               ha_cpu_topo[cpu].capa = (str2uic(trash.area) + 999U) / 1000U;
+                               ha_cpu_topo[cpu].capa = str2uic(trash.area);
                }
        }