]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
selftests/resctrl: Improve accuracy of cache occupancy test
authorReinette Chatre <reinette.chatre@intel.com>
Sat, 4 Apr 2026 01:56:00 +0000 (18:56 -0700)
committerShuah Khan <skhan@linuxfoundation.org>
Tue, 5 May 2026 00:40:02 +0000 (18:40 -0600)
Dave Martin reported inconsistent CMT test failures. In one experiment
the first run of the CMT test failed because of too large (24%) difference
between measured and achievable cache occupancy while the second run passed
with an acceptable 4% difference.

The CMT test is susceptible to interference from the rest of the system.
This can be demonstrated with a utility like stress-ng by running the CMT
test while introducing cache misses using:

   stress-ng --matrix-3d 0 --matrix-3d-zyx

Below shows an example of the CMT test failing because of a significant
difference between measured and achievable cache occupancy when run with
interference:
    # Starting CMT test ...
    # Mounting resctrl to "/sys/fs/resctrl"
    # Cache size :335544320
    # Writing benchmark parameters to resctrl FS
    # Benchmark PID: 7011
    # Checking for pass/fail
    # Fail: Check cache miss rate within 15%
    # Percent diff=99
    # Number of bits: 5
    # Average LLC val: 235929
    # Cache span (bytes): 83886080
    not ok 1 CMT: test

The CMT test creates a new control group that is also capable of monitoring
and assigns the workload to it. The workload allocates a buffer that by
default fills a portion of the L3 and keeps reading from the buffer,
measuring the L3 occupancy at intervals. The test passes if the workload's
L3 occupancy is within 15% of the buffer size.

By not adjusting any capacity bitmasks the workload shares the cache with
the rest of the system. Any other task that may be running could evict
the workload's data from the cache causing it to have low cache occupancy.

Reduce interference from the rest of the system by ensuring that the
workload's control group uses the capacity bitmask found in the user
parameters for L3 and that the rest of the system can only allocate into
the inverse of the workload's L3 cache portion. Other tasks can thus no
longer evict the workload's data from L3.

With the above adjustments the CMT test is more consistent. Repeating the
CMT test while generating interference with stress-ng on a sample
system after applying the fixes show significant improvement in test
accuracy:

    # Starting CMT test ...
    # Mounting resctrl to "/sys/fs/resctrl"
    # Cache size :335544320
    # Writing benchmark parameters to resctrl FS
    # Write schema "L3:0=fffe0" to resctrl FS
    # Write schema "L3:0=1f" to resctrl FS
    # Benchmark PID: 7089
    # Checking for pass/fail
    # Pass: Check cache miss rate within 15%
    # Percent diff=12
    # Number of bits: 5
    # Average LLC val: 73269248
    # Cache span (bytes): 83886080
    ok 1 CMT: test

Link: https://lore.kernel.org/r/b160592179f88069cdc679563e152007998a0d76.1775266384.git.reinette.chatre@intel.com
Reported-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Chen Yu <yu.c.chen@intel.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/lkml/aO+7MeSMV29VdbQs@e133380.arm.com/
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
tools/testing/selftests/resctrl/cmt_test.c
tools/testing/selftests/resctrl/mba_test.c
tools/testing/selftests/resctrl/mbm_test.c
tools/testing/selftests/resctrl/resctrl.h
tools/testing/selftests/resctrl/resctrl_val.c

index d09e693dc739cb0dbf97acd39ffc9cf5c1545bc1..7bc6cf49c1c5b6da9dfb2df3bce9242d74513567 100644 (file)
 #define CON_MON_LCC_OCCUP_PATH         \
        "%s/%s/mon_data/mon_L3_%02d/llc_occupancy"
 
-static int cmt_init(const struct resctrl_val_param *param, int domain_id)
+/*
+ * Initialize capacity bitmasks (CBMs) of:
+ * - control group being tested per test parameters,
+ * - default resource group as inverse of control group being tested to prevent
+ *   other tasks from interfering with test.
+ */
+static int cmt_init(const struct resctrl_test *test,
+                   const struct user_params *uparams,
+                   const struct resctrl_val_param *param, int domain_id)
 {
+       unsigned long full_mask;
+       char schemata[64];
+       int ret;
+
        sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH,
                param->ctrlgrp, domain_id);
 
-       return 0;
+       ret = get_full_cbm(test->resource, &full_mask);
+       if (ret)
+               return ret;
+
+       snprintf(schemata, sizeof(schemata), "%lx", ~param->mask & full_mask);
+       ret = write_schemata("", schemata, uparams->cpu, test->resource);
+       if (ret)
+               return ret;
+
+       snprintf(schemata, sizeof(schemata), "%lx", param->mask);
+       return write_schemata(param->ctrlgrp, schemata, uparams->cpu, test->resource);
 }
 
 static int cmt_setup(const struct resctrl_test *test,
index c7e9adc0368f3bc0aa45f9cc8d4a6f01d54439c6..cd4c715b7ffd2ce66340e013511ecb850ab54ff6 100644 (file)
@@ -17,7 +17,9 @@
 #define ALLOCATION_MIN         10
 #define ALLOCATION_STEP                10
 
-static int mba_init(const struct resctrl_val_param *param, int domain_id)
+static int mba_init(const struct resctrl_test *test,
+                   const struct user_params *uparams,
+                   const struct resctrl_val_param *param, int domain_id)
 {
        int ret;
 
index 84d8bc2505392e921020789840efeefd457be7d8..58201f844740414f160a6cfc0010931f46306003 100644 (file)
@@ -83,7 +83,9 @@ static int check_results(size_t span)
        return ret;
 }
 
-static int mbm_init(const struct resctrl_val_param *param, int domain_id)
+static int mbm_init(const struct resctrl_test *test,
+                   const struct user_params *uparams,
+                   const struct resctrl_val_param *param, int domain_id)
 {
        int ret;
 
index afe635b6e48d639008feb3144e88e25f72165689..c72045c74ac4f91c91903518f99fc727d963b7aa 100644 (file)
@@ -135,7 +135,9 @@ struct resctrl_val_param {
        char                    filename[64];
        unsigned long           mask;
        int                     num_of_runs;
-       int                     (*init)(const struct resctrl_val_param *param,
+       int                     (*init)(const struct resctrl_test *test,
+                                       const struct user_params *uparams,
+                                       const struct resctrl_val_param *param,
                                        int domain_id);
        int                     (*setup)(const struct resctrl_test *test,
                                         const struct user_params *uparams,
index 7c08e936572d2ce53bc85f913017795f3ce2a049..a5a8badb83d45a912ae9a2c6507b0dee793ca98e 100644 (file)
@@ -569,7 +569,7 @@ int resctrl_val(const struct resctrl_test *test,
                goto reset_affinity;
 
        if (param->init) {
-               ret = param->init(param, domain_id);
+               ret = param->init(test, uparams, param, domain_id);
                if (ret)
                        goto reset_affinity;
        }