]> git.ipfire.org Git - thirdparty/linux.git/blobdiff - tools/testing/selftests/resctrl/cat_test.c
Merge tag 'timers-core-2024-03-10' of git://git.kernel.org/pub/scm/linux/kernel/git...
[thirdparty/linux.git] / tools / testing / selftests / resctrl / cat_test.c
index 242c4c6200aa23c26ad07b42142bd4c078f16076..4cb991be8e31be37032d9811a070fc68dce27f66 100644 (file)
 #include "resctrl.h"
 #include <unistd.h>
 
-#define RESULT_FILE_NAME1      "result_cat1"
-#define RESULT_FILE_NAME2      "result_cat2"
+#define RESULT_FILE_NAME       "result_cat"
 #define NUM_OF_RUNS            5
-#define MAX_DIFF_PERCENT       4
-#define MAX_DIFF               1000000
 
 /*
- * Change schemata. Write schemata to specified
- * con_mon grp, mon_grp in resctrl FS.
- * Run 5 times in order to get average values.
+ * Minimum difference in LLC misses between a test with n+1 bits CBM to the
+ * test with n bits is MIN_DIFF_PERCENT_PER_BIT * (n - 1). With e.g. 5 vs 4
+ * bits in the CBM mask, the minimum difference must be at least
+ * MIN_DIFF_PERCENT_PER_BIT * (4 - 1) = 3 percent.
+ *
+ * The relationship between number of used CBM bits and difference in LLC
+ * misses is not expected to be linear. With a small number of bits, the
+ * margin is smaller than with larger number of bits. For selftest purposes,
+ * however, linear approach is enough because ultimately only pass/fail
+ * decision has to be made and distinction between strong and stronger
+ * signal is irrelevant.
  */
-static int cat_setup(struct resctrl_val_param *p)
+#define MIN_DIFF_PERCENT_PER_BIT       1UL
+
+static int show_results_info(__u64 sum_llc_val, int no_of_bits,
+                            unsigned long cache_span,
+                            unsigned long min_diff_percent,
+                            unsigned long num_of_runs, bool platform,
+                            __s64 *prev_avg_llc_val)
 {
-       char schemata[64];
+       __u64 avg_llc_val = 0;
+       float avg_diff;
        int ret = 0;
 
-       /* Run NUM_OF_RUNS times */
-       if (p->num_of_runs >= NUM_OF_RUNS)
-               return END_OF_TESTS;
+       avg_llc_val = sum_llc_val / num_of_runs;
+       if (*prev_avg_llc_val) {
+               float delta = (__s64)(avg_llc_val - *prev_avg_llc_val);
+
+               avg_diff = delta / *prev_avg_llc_val;
+               ret = platform && (avg_diff * 100) < (float)min_diff_percent;
+
+               ksft_print_msg("%s Check cache miss rate changed more than %.1f%%\n",
+                              ret ? "Fail:" : "Pass:", (float)min_diff_percent);
 
-       if (p->num_of_runs == 0) {
-               sprintf(schemata, "%lx", p->mask);
-               ret = write_schemata(p->ctrlgrp, schemata, p->cpu_no,
-                                    p->resctrl_val);
+               ksft_print_msg("Percent diff=%.1f\n", avg_diff * 100);
        }
-       p->num_of_runs++;
+       *prev_avg_llc_val = avg_llc_val;
+
+       show_cache_info(no_of_bits, avg_llc_val, cache_span, true);
 
        return ret;
 }
 
-static int check_results(struct resctrl_val_param *param, size_t span)
+/* Remove the highest bit from CBM */
+static unsigned long next_mask(unsigned long current_mask)
+{
+       return current_mask & (current_mask >> 1);
+}
+
+static int check_results(struct resctrl_val_param *param, const char *cache_type,
+                        unsigned long cache_total_size, unsigned long full_cache_mask,
+                        unsigned long current_mask)
 {
        char *token_array[8], temp[512];
-       unsigned long sum_llc_perf_miss = 0;
-       int runs = 0, no_of_bits = 0;
+       __u64 sum_llc_perf_miss = 0;
+       __s64 prev_avg_llc_val = 0;
+       unsigned long alloc_size;
+       int runs = 0;
+       int fail = 0;
+       int ret;
        FILE *fp;
 
        ksft_print_msg("Checking for pass/fail\n");
@@ -59,55 +88,177 @@ static int check_results(struct resctrl_val_param *param, size_t span)
        while (fgets(temp, sizeof(temp), fp)) {
                char *token = strtok(temp, ":\t");
                int fields = 0;
+               int bits;
 
                while (token) {
                        token_array[fields++] = token;
                        token = strtok(NULL, ":\t");
                }
-               /*
-                * Discard the first value which is inaccurate due to monitoring
-                * setup transition phase.
-                */
-               if (runs > 0)
-                       sum_llc_perf_miss += strtoul(token_array[3], NULL, 0);
+
+               sum_llc_perf_miss += strtoull(token_array[3], NULL, 0);
                runs++;
+
+               if (runs < NUM_OF_RUNS)
+                       continue;
+
+               if (!current_mask) {
+                       ksft_print_msg("Unexpected empty cache mask\n");
+                       break;
+               }
+
+               alloc_size = cache_portion_size(cache_total_size, current_mask, full_cache_mask);
+
+               bits = count_bits(current_mask);
+
+               ret = show_results_info(sum_llc_perf_miss, bits,
+                                       alloc_size / 64,
+                                       MIN_DIFF_PERCENT_PER_BIT * (bits - 1),
+                                       runs, get_vendor() == ARCH_INTEL,
+                                       &prev_avg_llc_val);
+               if (ret)
+                       fail = 1;
+
+               runs = 0;
+               sum_llc_perf_miss = 0;
+               current_mask = next_mask(current_mask);
        }
 
        fclose(fp);
-       no_of_bits = count_bits(param->mask);
 
-       return show_cache_info(sum_llc_perf_miss, no_of_bits, span / 64,
-                              MAX_DIFF, MAX_DIFF_PERCENT, runs - 1,
-                              get_vendor() == ARCH_INTEL, false);
+       return fail;
 }
 
 void cat_test_cleanup(void)
 {
-       remove(RESULT_FILE_NAME1);
-       remove(RESULT_FILE_NAME2);
+       remove(RESULT_FILE_NAME);
+}
+
+/*
+ * cat_test - Execute CAT benchmark and measure cache misses
+ * @test:              Test information structure
+ * @uparams:           User supplied parameters
+ * @param:             Parameters passed to cat_test()
+ * @span:              Buffer size for the benchmark
+ * @current_mask       Start mask for the first iteration
+ *
+ * Run CAT selftest by varying the allocated cache portion and comparing the
+ * impact on cache misses (the result analysis is done in check_results()
+ * and show_results_info(), not in this function).
+ *
+ * One bit is removed from the CAT allocation bit mask (in current_mask) for
+ * each subsequent test which keeps reducing the size of the allocated cache
+ * portion. A single test flushes the buffer, reads it to warm up the cache,
+ * and reads the buffer again. The cache misses are measured during the last
+ * read pass.
+ *
+ * Return:             0 when the test was run, < 0 on error.
+ */
+static int cat_test(const struct resctrl_test *test,
+                   const struct user_params *uparams,
+                   struct resctrl_val_param *param,
+                   size_t span, unsigned long current_mask)
+{
+       char *resctrl_val = param->resctrl_val;
+       struct perf_event_read pe_read;
+       struct perf_event_attr pea;
+       cpu_set_t old_affinity;
+       unsigned char *buf;
+       char schemata[64];
+       int ret, i, pe_fd;
+       pid_t bm_pid;
+
+       if (strcmp(param->filename, "") == 0)
+               sprintf(param->filename, "stdio");
+
+       bm_pid = getpid();
+
+       /* Taskset benchmark to specified cpu */
+       ret = taskset_benchmark(bm_pid, uparams->cpu, &old_affinity);
+       if (ret)
+               return ret;
+
+       /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/
+       ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
+                                     resctrl_val);
+       if (ret)
+               goto reset_affinity;
+
+       perf_event_attr_initialize(&pea, PERF_COUNT_HW_CACHE_MISSES);
+       perf_event_initialize_read_format(&pe_read);
+       pe_fd = perf_open(&pea, bm_pid, uparams->cpu);
+       if (pe_fd < 0) {
+               ret = -1;
+               goto reset_affinity;
+       }
+
+       buf = alloc_buffer(span, 1);
+       if (!buf) {
+               ret = -1;
+               goto pe_close;
+       }
+
+       while (current_mask) {
+               snprintf(schemata, sizeof(schemata), "%lx", param->mask & ~current_mask);
+               ret = write_schemata("", schemata, uparams->cpu, test->resource);
+               if (ret)
+                       goto free_buf;
+               snprintf(schemata, sizeof(schemata), "%lx", current_mask);
+               ret = write_schemata(param->ctrlgrp, schemata, uparams->cpu, test->resource);
+               if (ret)
+                       goto free_buf;
+
+               for (i = 0; i < NUM_OF_RUNS; i++) {
+                       mem_flush(buf, span);
+                       fill_cache_read(buf, span, true);
+
+                       ret = perf_event_reset_enable(pe_fd);
+                       if (ret)
+                               goto free_buf;
+
+                       fill_cache_read(buf, span, true);
+
+                       ret = perf_event_measure(pe_fd, &pe_read, param->filename, bm_pid);
+                       if (ret)
+                               goto free_buf;
+               }
+               current_mask = next_mask(current_mask);
+       }
+
+free_buf:
+       free(buf);
+pe_close:
+       close(pe_fd);
+reset_affinity:
+       taskset_restore(bm_pid, &old_affinity);
+
+       return ret;
 }
 
-int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
+static int cat_run_test(const struct resctrl_test *test, const struct user_params *uparams)
 {
-       unsigned long l_mask, l_mask_1;
-       int ret, pipefd[2], sibling_cpu_no;
-       unsigned long cache_size = 0;
-       unsigned long long_mask;
+       unsigned long long_mask, start_mask, full_cache_mask;
+       unsigned long cache_total_size = 0;
+       int n = uparams->bits;
+       unsigned int start;
        int count_of_bits;
-       char pipe_message;
        size_t span;
+       int ret;
 
-       ret = get_full_cbm(cache_type, &long_mask);
+       ret = get_full_cbm(test->resource, &full_cache_mask);
+       if (ret)
+               return ret;
+       /* Get the largest contiguous exclusive portion of the cache */
+       ret = get_mask_no_shareable(test->resource, &long_mask);
        if (ret)
                return ret;
 
        /* Get L3/L2 cache size */
-       ret = get_cache_size(cpu_no, cache_type, &cache_size);
+       ret = get_cache_size(uparams->cpu, test->resource, &cache_total_size);
        if (ret)
                return ret;
-       ksft_print_msg("Cache size :%lu\n", cache_size);
+       ksft_print_msg("Cache size :%lu\n", cache_total_size);
 
-       count_of_bits = count_bits(long_mask);
+       count_of_bits = count_contiguous_bits(long_mask, &start);
 
        if (!n)
                n = count_of_bits / 2;
@@ -118,89 +269,124 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
                               count_of_bits - 1);
                return -1;
        }
-
-       /* Get core id from same socket for running another thread */
-       sibling_cpu_no = get_core_sibling(cpu_no);
-       if (sibling_cpu_no < 0)
-               return -1;
+       start_mask = create_bit_mask(start, n);
 
        struct resctrl_val_param param = {
                .resctrl_val    = CAT_STR,
-               .cpu_no         = cpu_no,
-               .setup          = cat_setup,
+               .ctrlgrp        = "c1",
+               .filename       = RESULT_FILE_NAME,
+               .num_of_runs    = 0,
        };
+       param.mask = long_mask;
+       span = cache_portion_size(cache_total_size, start_mask, full_cache_mask);
 
-       l_mask = long_mask >> n;
-       l_mask_1 = ~l_mask & long_mask;
+       remove(param.filename);
 
-       /* Set param values for parent thread which will be allocated bitmask
-        * with (max_bits - n) bits
-        */
-       span = cache_size * (count_of_bits - n) / count_of_bits;
-       strcpy(param.ctrlgrp, "c2");
-       strcpy(param.mongrp, "m2");
-       strcpy(param.filename, RESULT_FILE_NAME2);
-       param.mask = l_mask;
-       param.num_of_runs = 0;
-
-       if (pipe(pipefd)) {
-               ksft_perror("Unable to create pipe");
-               return -1;
-       }
+       ret = cat_test(test, uparams, &param, span, start_mask);
+       if (ret)
+               goto out;
 
-       fflush(stdout);
-       bm_pid = fork();
+       ret = check_results(&param, test->resource,
+                           cache_total_size, full_cache_mask, start_mask);
+out:
+       cat_test_cleanup();
 
-       /* Set param values for child thread which will be allocated bitmask
-        * with n bits
-        */
-       if (bm_pid == 0) {
-               param.mask = l_mask_1;
-               strcpy(param.ctrlgrp, "c1");
-               strcpy(param.mongrp, "m1");
-               span = cache_size * n / count_of_bits;
-               strcpy(param.filename, RESULT_FILE_NAME1);
-               param.num_of_runs = 0;
-               param.cpu_no = sibling_cpu_no;
+       return ret;
+}
+
+static int noncont_cat_run_test(const struct resctrl_test *test,
+                               const struct user_params *uparams)
+{
+       unsigned long full_cache_mask, cont_mask, noncont_mask;
+       unsigned int eax, ebx, ecx, edx, sparse_masks;
+       int bit_center, ret;
+       char schemata[64];
+
+       /* Check to compare sparse_masks content to CPUID output. */
+       ret = resource_info_unsigned_get(test->resource, "sparse_masks", &sparse_masks);
+       if (ret)
+               return ret;
+
+       if (!strcmp(test->resource, "L3"))
+               __cpuid_count(0x10, 1, eax, ebx, ecx, edx);
+       else if (!strcmp(test->resource, "L2"))
+               __cpuid_count(0x10, 2, eax, ebx, ecx, edx);
+       else
+               return -EINVAL;
+
+       if (sparse_masks != ((ecx >> 3) & 1)) {
+               ksft_print_msg("CPUID output doesn't match 'sparse_masks' file content!\n");
+               return 1;
        }
 
-       remove(param.filename);
+       /* Write checks initialization. */
+       ret = get_full_cbm(test->resource, &full_cache_mask);
+       if (ret < 0)
+               return ret;
+       bit_center = count_bits(full_cache_mask) / 2;
 
-       ret = cat_val(&param, span);
-       if (ret == 0)
-               ret = check_results(&param, span);
-
-       if (bm_pid == 0) {
-               /* Tell parent that child is ready */
-               close(pipefd[0]);
-               pipe_message = 1;
-               if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
-                   sizeof(pipe_message))
-                       /*
-                        * Just print the error message.
-                        * Let while(1) run and wait for itself to be killed.
-                        */
-                       ksft_perror("Failed signaling parent process");
-
-               close(pipefd[1]);
-               while (1)
-                       ;
-       } else {
-               /* Parent waits for child to be ready. */
-               close(pipefd[1]);
-               pipe_message = 0;
-               while (pipe_message != 1) {
-                       if (read(pipefd[0], &pipe_message,
-                                sizeof(pipe_message)) < sizeof(pipe_message)) {
-                               ksft_perror("Failed reading from child process");
-                               break;
-                       }
-               }
-               close(pipefd[0]);
-               kill(bm_pid, SIGKILL);
+       /*
+        * The bit_center needs to be at least 3 to properly calculate the CBM
+        * hole in the noncont_mask. If it's smaller return an error since the
+        * cache mask is too short and that shouldn't happen.
+        */
+       if (bit_center < 3)
+               return -EINVAL;
+       cont_mask = full_cache_mask >> bit_center;
+
+       /* Contiguous mask write check. */
+       snprintf(schemata, sizeof(schemata), "%lx", cont_mask);
+       ret = write_schemata("", schemata, uparams->cpu, test->resource);
+       if (ret) {
+               ksft_print_msg("Write of contiguous CBM failed\n");
+               return 1;
        }
 
-       cat_test_cleanup();
+       /*
+        * Non-contiguous mask write check. CBM has a 0xf hole approximately in the middle.
+        * Output is compared with support information to catch any edge case errors.
+        */
+       noncont_mask = ~(0xfUL << (bit_center - 2)) & full_cache_mask;
+       snprintf(schemata, sizeof(schemata), "%lx", noncont_mask);
+       ret = write_schemata("", schemata, uparams->cpu, test->resource);
+       if (ret && sparse_masks)
+               ksft_print_msg("Non-contiguous CBMs supported but write of non-contiguous CBM failed\n");
+       else if (ret && !sparse_masks)
+               ksft_print_msg("Non-contiguous CBMs not supported and write of non-contiguous CBM failed as expected\n");
+       else if (!ret && !sparse_masks)
+               ksft_print_msg("Non-contiguous CBMs not supported but write of non-contiguous CBM succeeded\n");
+
+       return !ret == !sparse_masks;
+}
 
-       return ret;
+static bool noncont_cat_feature_check(const struct resctrl_test *test)
+{
+       if (!resctrl_resource_exists(test->resource))
+               return false;
+
+       return resource_info_file_exists(test->resource, "sparse_masks");
 }
+
+struct resctrl_test l3_cat_test = {
+       .name = "L3_CAT",
+       .group = "CAT",
+       .resource = "L3",
+       .feature_check = test_resource_feature_check,
+       .run_test = cat_run_test,
+};
+
+struct resctrl_test l3_noncont_cat_test = {
+       .name = "L3_NONCONT_CAT",
+       .group = "CAT",
+       .resource = "L3",
+       .feature_check = noncont_cat_feature_check,
+       .run_test = noncont_cat_run_test,
+};
+
+struct resctrl_test l2_noncont_cat_test = {
+       .name = "L2_NONCONT_CAT",
+       .group = "CAT",
+       .resource = "L2",
+       .feature_check = noncont_cat_feature_check,
+       .run_test = noncont_cat_run_test,
+};