]>
Commit | Line | Data |
---|---|---|
2025cf9e | 1 | // SPDX-License-Identifier: GPL-2.0-only |
6a445edc VS |
2 | /* |
3 | * Resource Director Technology(RDT) | |
4 | * - Monitoring code | |
5 | * | |
6 | * Copyright (C) 2017 Intel Corporation | |
7 | * | |
8 | * Author: | |
9 | * Vikas Shivappa <vikas.shivappa@intel.com> | |
10 | * | |
11 | * This replaces the cqm.c based on perf but we reuse a lot of | |
12 | * code and datastructures originally from Peter Zijlstra and Matt Fleming. | |
13 | * | |
6a445edc VS |
14 | * More information about RDT be found in the Intel (R) x86 Architecture |
15 | * Software Developer Manual June 2016, volume 3, section 17.17. | |
16 | */ | |
17 | ||
18 | #include <linux/module.h> | |
19 | #include <linux/slab.h> | |
20 | #include <asm/cpu_device_id.h> | |
fa7d9493 | 21 | #include "internal.h" |
6a445edc VS |
22 | |
23 | struct rmid_entry { | |
24 | u32 rmid; | |
24247aee | 25 | int busy; |
6a445edc VS |
26 | struct list_head list; |
27 | }; | |
28 | ||
29 | /** | |
30 | * @rmid_free_lru A least recently used list of free RMIDs | |
31 | * These RMIDs are guaranteed to have an occupancy less than the | |
32 | * threshold occupancy | |
33 | */ | |
34 | static LIST_HEAD(rmid_free_lru); | |
35 | ||
36 | /** | |
24247aee | 37 | * @rmid_limbo_count count of currently unused but (potentially) |
6a445edc | 38 | * dirty RMIDs. |
24247aee | 39 | * This counts RMIDs that no one is currently using but that |
6a445edc VS |
40 | * may have a occupancy value > intel_cqm_threshold. User can change |
41 | * the threshold occupancy value. | |
42 | */ | |
5fd88b60 | 43 | static unsigned int rmid_limbo_count; |
6a445edc VS |
44 | |
45 | /** | |
46 | * @rmid_entry - The entry in the limbo and free lists. | |
47 | */ | |
48 | static struct rmid_entry *rmid_ptrs; | |
49 | ||
50 | /* | |
51 | * Global boolean for rdt_monitor which is true if any | |
52 | * resource monitoring is enabled. | |
53 | */ | |
54 | bool rdt_mon_capable; | |
55 | ||
56 | /* | |
57 | * Global to indicate which monitoring events are enabled. | |
58 | */ | |
59 | unsigned int rdt_mon_features; | |
60 | ||
61 | /* | |
62 | * This is the threshold cache occupancy at which we will consider an | |
63 | * RMID available for re-allocation. | |
64 | */ | |
352940ec | 65 | unsigned int resctrl_cqm_threshold; |
6a445edc VS |
66 | |
67 | static inline struct rmid_entry *__rmid_entry(u32 rmid) | |
68 | { | |
69 | struct rmid_entry *entry; | |
70 | ||
71 | entry = &rmid_ptrs[rmid]; | |
72 | WARN_ON(entry->rmid != rmid); | |
73 | ||
74 | return entry; | |
75 | } | |
76 | ||
edf6fa1c VS |
77 | static u64 __rmid_read(u32 rmid, u32 eventid) |
78 | { | |
79 | u64 val; | |
80 | ||
81 | /* | |
82 | * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured | |
83 | * with a valid event code for supported resource type and the bits | |
84 | * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, | |
85 | * IA32_QM_CTR.data (bits 61:0) reports the monitored data. | |
86 | * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) | |
87 | * are error bits. | |
88 | */ | |
89 | wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); | |
90 | rdmsrl(MSR_IA32_QM_CTR, val); | |
91 | ||
92 | return val; | |
93 | } | |
94 | ||
24247aee | 95 | static bool rmid_dirty(struct rmid_entry *entry) |
edf6fa1c | 96 | { |
24247aee | 97 | u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID); |
edf6fa1c | 98 | |
352940ec | 99 | return val >= resctrl_cqm_threshold; |
edf6fa1c VS |
100 | } |
101 | ||
102 | /* | |
24247aee VS |
103 | * Check the RMIDs that are marked as busy for this domain. If the |
104 | * reported LLC occupancy is below the threshold clear the busy bit and | |
105 | * decrement the count. If the busy count gets to zero on an RMID, we | |
106 | * free the RMID | |
edf6fa1c | 107 | */ |
24247aee | 108 | void __check_limbo(struct rdt_domain *d, bool force_free) |
edf6fa1c | 109 | { |
24247aee | 110 | struct rmid_entry *entry; |
edf6fa1c | 111 | struct rdt_resource *r; |
24247aee | 112 | u32 crmid = 1, nrmid; |
edf6fa1c VS |
113 | |
114 | r = &rdt_resources_all[RDT_RESOURCE_L3]; | |
115 | ||
edf6fa1c | 116 | /* |
24247aee VS |
117 | * Skip RMID 0 and start from RMID 1 and check all the RMIDs that |
118 | * are marked as busy for occupancy < threshold. If the occupancy | |
119 | * is less than the threshold decrement the busy counter of the | |
120 | * RMID and move it to the free list when the counter reaches 0. | |
edf6fa1c | 121 | */ |
24247aee VS |
122 | for (;;) { |
123 | nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid); | |
124 | if (nrmid >= r->num_rmid) | |
125 | break; | |
126 | ||
127 | entry = __rmid_entry(nrmid); | |
128 | if (force_free || !rmid_dirty(entry)) { | |
129 | clear_bit(entry->rmid, d->rmid_busy_llc); | |
130 | if (!--entry->busy) { | |
131 | rmid_limbo_count--; | |
edf6fa1c | 132 | list_add_tail(&entry->list, &rmid_free_lru); |
edf6fa1c VS |
133 | } |
134 | } | |
24247aee | 135 | crmid = nrmid + 1; |
edf6fa1c | 136 | } |
24247aee | 137 | } |
edf6fa1c | 138 | |
24247aee VS |
139 | bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d) |
140 | { | |
141 | return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid; | |
edf6fa1c VS |
142 | } |
143 | ||
144 | /* | |
145 | * As of now the RMIDs allocation is global. | |
146 | * However we keep track of which packages the RMIDs | |
147 | * are used to optimize the limbo list management. | |
148 | */ | |
149 | int alloc_rmid(void) | |
150 | { | |
151 | struct rmid_entry *entry; | |
edf6fa1c VS |
152 | |
153 | lockdep_assert_held(&rdtgroup_mutex); | |
154 | ||
24247aee VS |
155 | if (list_empty(&rmid_free_lru)) |
156 | return rmid_limbo_count ? -EBUSY : -ENOSPC; | |
edf6fa1c VS |
157 | |
158 | entry = list_first_entry(&rmid_free_lru, | |
159 | struct rmid_entry, list); | |
160 | list_del(&entry->list); | |
161 | ||
162 | return entry->rmid; | |
163 | } | |
164 | ||
165 | static void add_rmid_to_limbo(struct rmid_entry *entry) | |
166 | { | |
167 | struct rdt_resource *r; | |
168 | struct rdt_domain *d; | |
24247aee | 169 | int cpu; |
edf6fa1c VS |
170 | u64 val; |
171 | ||
172 | r = &rdt_resources_all[RDT_RESOURCE_L3]; | |
173 | ||
24247aee | 174 | entry->busy = 0; |
edf6fa1c VS |
175 | cpu = get_cpu(); |
176 | list_for_each_entry(d, &r->domains, list) { | |
177 | if (cpumask_test_cpu(cpu, &d->cpu_mask)) { | |
178 | val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID); | |
352940ec | 179 | if (val <= resctrl_cqm_threshold) |
edf6fa1c VS |
180 | continue; |
181 | } | |
24247aee VS |
182 | |
183 | /* | |
184 | * For the first limbo RMID in the domain, | |
185 | * setup up the limbo worker. | |
186 | */ | |
187 | if (!has_busy_rmid(r, d)) | |
188 | cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL); | |
edf6fa1c | 189 | set_bit(entry->rmid, d->rmid_busy_llc); |
24247aee | 190 | entry->busy++; |
edf6fa1c VS |
191 | } |
192 | put_cpu(); | |
193 | ||
24247aee VS |
194 | if (entry->busy) |
195 | rmid_limbo_count++; | |
196 | else | |
edf6fa1c | 197 | list_add_tail(&entry->list, &rmid_free_lru); |
edf6fa1c VS |
198 | } |
199 | ||
200 | void free_rmid(u32 rmid) | |
201 | { | |
202 | struct rmid_entry *entry; | |
203 | ||
204 | if (!rmid) | |
205 | return; | |
206 | ||
207 | lockdep_assert_held(&rdtgroup_mutex); | |
208 | ||
209 | entry = __rmid_entry(rmid); | |
210 | ||
211 | if (is_llc_occupancy_enabled()) | |
212 | add_rmid_to_limbo(entry); | |
213 | else | |
214 | list_add_tail(&entry->list, &rmid_free_lru); | |
215 | } | |
216 | ||
ba0f26d8 VS |
217 | static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr) |
218 | { | |
219 | u64 shift = 64 - MBM_CNTR_WIDTH, chunks; | |
220 | ||
221 | chunks = (cur_msr << shift) - (prev_msr << shift); | |
222 | return chunks >>= shift; | |
223 | } | |
224 | ||
d89b7379 VS |
225 | static int __mon_event_count(u32 rmid, struct rmid_read *rr) |
226 | { | |
9f52425b | 227 | struct mbm_state *m; |
ba0f26d8 | 228 | u64 chunks, tval; |
d89b7379 VS |
229 | |
230 | tval = __rmid_read(rmid, rr->evtid); | |
231 | if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { | |
232 | rr->val = tval; | |
233 | return -EINVAL; | |
234 | } | |
235 | switch (rr->evtid) { | |
236 | case QOS_L3_OCCUP_EVENT_ID: | |
237 | rr->val += tval; | |
238 | return 0; | |
9f52425b TL |
239 | case QOS_L3_MBM_TOTAL_EVENT_ID: |
240 | m = &rr->d->mbm_total[rmid]; | |
241 | break; | |
242 | case QOS_L3_MBM_LOCAL_EVENT_ID: | |
243 | m = &rr->d->mbm_local[rmid]; | |
244 | break; | |
d89b7379 VS |
245 | default: |
246 | /* | |
247 | * Code would never reach here because | |
248 | * an invalid event id would fail the __rmid_read. | |
249 | */ | |
250 | return -EINVAL; | |
251 | } | |
a4de1dfd VS |
252 | |
253 | if (rr->first) { | |
ba0f26d8 VS |
254 | memset(m, 0, sizeof(struct mbm_state)); |
255 | m->prev_bw_msr = m->prev_msr = tval; | |
a4de1dfd VS |
256 | return 0; |
257 | } | |
258 | ||
ba0f26d8 | 259 | chunks = mbm_overflow_count(m->prev_msr, tval); |
9f52425b TL |
260 | m->chunks += chunks; |
261 | m->prev_msr = tval; | |
262 | ||
263 | rr->val += m->chunks; | |
264 | return 0; | |
d89b7379 VS |
265 | } |
266 | ||
ba0f26d8 VS |
267 | /* |
268 | * Supporting function to calculate the memory bandwidth | |
269 | * and delta bandwidth in MBps. | |
270 | */ | |
271 | static void mbm_bw_count(u32 rmid, struct rmid_read *rr) | |
272 | { | |
273 | struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; | |
274 | struct mbm_state *m = &rr->d->mbm_local[rmid]; | |
275 | u64 tval, cur_bw, chunks; | |
276 | ||
277 | tval = __rmid_read(rmid, rr->evtid); | |
278 | if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) | |
279 | return; | |
280 | ||
281 | chunks = mbm_overflow_count(m->prev_bw_msr, tval); | |
282 | m->chunks_bw += chunks; | |
283 | m->chunks = m->chunks_bw; | |
284 | cur_bw = (chunks * r->mon_scale) >> 20; | |
285 | ||
286 | if (m->delta_comp) | |
287 | m->delta_bw = abs(cur_bw - m->prev_bw); | |
288 | m->delta_comp = false; | |
289 | m->prev_bw = cur_bw; | |
290 | m->prev_bw_msr = tval; | |
291 | } | |
292 | ||
d89b7379 VS |
293 | /* |
294 | * This is called via IPI to read the CQM/MBM counters | |
295 | * on a domain. | |
296 | */ | |
297 | void mon_event_count(void *info) | |
298 | { | |
299 | struct rdtgroup *rdtgrp, *entry; | |
300 | struct rmid_read *rr = info; | |
301 | struct list_head *head; | |
302 | ||
303 | rdtgrp = rr->rgrp; | |
304 | ||
305 | if (__mon_event_count(rdtgrp->mon.rmid, rr)) | |
306 | return; | |
307 | ||
308 | /* | |
309 | * For Ctrl groups read data from child monitor groups. | |
310 | */ | |
311 | head = &rdtgrp->mon.crdtgrp_list; | |
312 | ||
313 | if (rdtgrp->type == RDTCTRL_GROUP) { | |
314 | list_for_each_entry(entry, head, mon.crdtgrp_list) { | |
315 | if (__mon_event_count(entry->mon.rmid, rr)) | |
316 | return; | |
317 | } | |
318 | } | |
319 | } | |
4af4a88e | 320 | |
de73f38f VS |
321 | /* |
322 | * Feedback loop for MBA software controller (mba_sc) | |
323 | * | |
324 | * mba_sc is a feedback loop where we periodically read MBM counters and | |
325 | * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so | |
326 | * that: | |
327 | * | |
328 | * current bandwdith(cur_bw) < user specified bandwidth(user_bw) | |
329 | * | |
330 | * This uses the MBM counters to measure the bandwidth and MBA throttle | |
331 | * MSRs to control the bandwidth for a particular rdtgrp. It builds on the | |
332 | * fact that resctrl rdtgroups have both monitoring and control. | |
333 | * | |
334 | * The frequency of the checks is 1s and we just tag along the MBM overflow | |
335 | * timer. Having 1s interval makes the calculation of bandwidth simpler. | |
336 | * | |
337 | * Although MBA's goal is to restrict the bandwidth to a maximum, there may | |
338 | * be a need to increase the bandwidth to avoid uncecessarily restricting | |
339 | * the L2 <-> L3 traffic. | |
340 | * | |
341 | * Since MBA controls the L2 external bandwidth where as MBM measures the | |
342 | * L3 external bandwidth the following sequence could lead to such a | |
343 | * situation. | |
344 | * | |
345 | * Consider an rdtgroup which had high L3 <-> memory traffic in initial | |
346 | * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but | |
347 | * after some time rdtgroup has mostly L2 <-> L3 traffic. | |
348 | * | |
349 | * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its | |
350 | * throttle MSRs already have low percentage values. To avoid | |
351 | * unnecessarily restricting such rdtgroups, we also increase the bandwidth. | |
352 | */ | |
353 | static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) | |
354 | { | |
355 | u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val; | |
356 | struct mbm_state *pmbm_data, *cmbm_data; | |
357 | u32 cur_bw, delta_bw, user_bw; | |
358 | struct rdt_resource *r_mba; | |
359 | struct rdt_domain *dom_mba; | |
360 | struct list_head *head; | |
361 | struct rdtgroup *entry; | |
362 | ||
c7563e62 PB |
363 | if (!is_mbm_local_enabled()) |
364 | return; | |
365 | ||
de73f38f VS |
366 | r_mba = &rdt_resources_all[RDT_RESOURCE_MBA]; |
367 | closid = rgrp->closid; | |
368 | rmid = rgrp->mon.rmid; | |
369 | pmbm_data = &dom_mbm->mbm_local[rmid]; | |
370 | ||
371 | dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba); | |
372 | if (!dom_mba) { | |
373 | pr_warn_once("Failure to get domain for MBA update\n"); | |
374 | return; | |
375 | } | |
376 | ||
377 | cur_bw = pmbm_data->prev_bw; | |
378 | user_bw = dom_mba->mbps_val[closid]; | |
379 | delta_bw = pmbm_data->delta_bw; | |
380 | cur_msr_val = dom_mba->ctrl_val[closid]; | |
381 | ||
382 | /* | |
383 | * For Ctrl groups read data from child monitor groups. | |
384 | */ | |
385 | head = &rgrp->mon.crdtgrp_list; | |
386 | list_for_each_entry(entry, head, mon.crdtgrp_list) { | |
387 | cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid]; | |
388 | cur_bw += cmbm_data->prev_bw; | |
389 | delta_bw += cmbm_data->delta_bw; | |
390 | } | |
391 | ||
392 | /* | |
393 | * Scale up/down the bandwidth linearly for the ctrl group. The | |
394 | * bandwidth step is the bandwidth granularity specified by the | |
395 | * hardware. | |
396 | * | |
397 | * The delta_bw is used when increasing the bandwidth so that we | |
398 | * dont alternately increase and decrease the control values | |
399 | * continuously. | |
400 | * | |
401 | * For ex: consider cur_bw = 90MBps, user_bw = 100MBps and if | |
402 | * bandwidth step is 20MBps(> user_bw - cur_bw), we would keep | |
403 | * switching between 90 and 110 continuously if we only check | |
404 | * cur_bw < user_bw. | |
405 | */ | |
406 | if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) { | |
407 | new_msr_val = cur_msr_val - r_mba->membw.bw_gran; | |
408 | } else if (cur_msr_val < MAX_MBA_BW && | |
409 | (user_bw > (cur_bw + delta_bw))) { | |
410 | new_msr_val = cur_msr_val + r_mba->membw.bw_gran; | |
411 | } else { | |
412 | return; | |
413 | } | |
414 | ||
415 | cur_msr = r_mba->msr_base + closid; | |
416 | wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba)); | |
417 | dom_mba->ctrl_val[closid] = new_msr_val; | |
418 | ||
419 | /* | |
420 | * Delta values are updated dynamically package wise for each | |
421 | * rdtgrp everytime the throttle MSR changes value. | |
422 | * | |
423 | * This is because (1)the increase in bandwidth is not perfectly | |
424 | * linear and only "approximately" linear even when the hardware | |
425 | * says it is linear.(2)Also since MBA is a core specific | |
426 | * mechanism, the delta values vary based on number of cores used | |
427 | * by the rdtgrp. | |
428 | */ | |
429 | pmbm_data->delta_comp = true; | |
430 | list_for_each_entry(entry, head, mon.crdtgrp_list) { | |
431 | cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid]; | |
432 | cmbm_data->delta_comp = true; | |
433 | } | |
434 | } | |
435 | ||
e3302683 VS |
436 | static void mbm_update(struct rdt_domain *d, int rmid) |
437 | { | |
438 | struct rmid_read rr; | |
439 | ||
440 | rr.first = false; | |
441 | rr.d = d; | |
442 | ||
443 | /* | |
444 | * This is protected from concurrent reads from user | |
445 | * as both the user and we hold the global mutex. | |
446 | */ | |
447 | if (is_mbm_total_enabled()) { | |
448 | rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID; | |
449 | __mon_event_count(rmid, &rr); | |
450 | } | |
451 | if (is_mbm_local_enabled()) { | |
452 | rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID; | |
de73f38f VS |
453 | |
454 | /* | |
455 | * Call the MBA software controller only for the | |
456 | * control groups and when user has enabled | |
457 | * the software controller explicitly. | |
458 | */ | |
459 | if (!is_mba_sc(NULL)) | |
460 | __mon_event_count(rmid, &rr); | |
461 | else | |
462 | mbm_bw_count(rmid, &rr); | |
e3302683 VS |
463 | } |
464 | } | |
465 | ||
24247aee VS |
466 | /* |
467 | * Handler to scan the limbo list and move the RMIDs | |
468 | * to free list whose occupancy < threshold_occupancy. | |
469 | */ | |
470 | void cqm_handle_limbo(struct work_struct *work) | |
471 | { | |
472 | unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL); | |
473 | int cpu = smp_processor_id(); | |
474 | struct rdt_resource *r; | |
475 | struct rdt_domain *d; | |
476 | ||
477 | mutex_lock(&rdtgroup_mutex); | |
478 | ||
479 | r = &rdt_resources_all[RDT_RESOURCE_L3]; | |
480 | d = get_domain_from_cpu(cpu, r); | |
481 | ||
482 | if (!d) { | |
483 | pr_warn_once("Failure to get domain for limbo worker\n"); | |
484 | goto out_unlock; | |
485 | } | |
486 | ||
487 | __check_limbo(d, false); | |
488 | ||
489 | if (has_busy_rmid(r, d)) | |
490 | schedule_delayed_work_on(cpu, &d->cqm_limbo, delay); | |
491 | ||
492 | out_unlock: | |
493 | mutex_unlock(&rdtgroup_mutex); | |
494 | } | |
495 | ||
496 | void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms) | |
497 | { | |
498 | unsigned long delay = msecs_to_jiffies(delay_ms); | |
24247aee VS |
499 | int cpu; |
500 | ||
24247aee VS |
501 | cpu = cpumask_any(&dom->cpu_mask); |
502 | dom->cqm_work_cpu = cpu; | |
503 | ||
504 | schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay); | |
505 | } | |
506 | ||
e3302683 VS |
507 | void mbm_handle_overflow(struct work_struct *work) |
508 | { | |
509 | unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL); | |
510 | struct rdtgroup *prgrp, *crgrp; | |
511 | int cpu = smp_processor_id(); | |
512 | struct list_head *head; | |
513 | struct rdt_domain *d; | |
514 | ||
515 | mutex_lock(&rdtgroup_mutex); | |
516 | ||
517 | if (!static_branch_likely(&rdt_enable_key)) | |
518 | goto out_unlock; | |
519 | ||
520 | d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]); | |
521 | if (!d) | |
522 | goto out_unlock; | |
523 | ||
524 | list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { | |
525 | mbm_update(d, prgrp->mon.rmid); | |
526 | ||
527 | head = &prgrp->mon.crdtgrp_list; | |
528 | list_for_each_entry(crgrp, head, mon.crdtgrp_list) | |
529 | mbm_update(d, crgrp->mon.rmid); | |
de73f38f VS |
530 | |
531 | if (is_mba_sc(NULL)) | |
532 | update_mba_bw(prgrp, d); | |
e3302683 VS |
533 | } |
534 | ||
535 | schedule_delayed_work_on(cpu, &d->mbm_over, delay); | |
24247aee | 536 | |
e3302683 VS |
537 | out_unlock: |
538 | mutex_unlock(&rdtgroup_mutex); | |
539 | } | |
540 | ||
bbc4615e | 541 | void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms) |
e3302683 | 542 | { |
bbc4615e | 543 | unsigned long delay = msecs_to_jiffies(delay_ms); |
e3302683 VS |
544 | int cpu; |
545 | ||
546 | if (!static_branch_likely(&rdt_enable_key)) | |
547 | return; | |
548 | cpu = cpumask_any(&dom->cpu_mask); | |
549 | dom->mbm_work_cpu = cpu; | |
550 | schedule_delayed_work_on(cpu, &dom->mbm_over, delay); | |
551 | } | |
552 | ||
6a445edc VS |
553 | static int dom_data_init(struct rdt_resource *r) |
554 | { | |
555 | struct rmid_entry *entry = NULL; | |
556 | int i, nr_rmids; | |
557 | ||
558 | nr_rmids = r->num_rmid; | |
559 | rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL); | |
560 | if (!rmid_ptrs) | |
561 | return -ENOMEM; | |
562 | ||
563 | for (i = 0; i < nr_rmids; i++) { | |
564 | entry = &rmid_ptrs[i]; | |
565 | INIT_LIST_HEAD(&entry->list); | |
566 | ||
567 | entry->rmid = i; | |
568 | list_add_tail(&entry->list, &rmid_free_lru); | |
569 | } | |
570 | ||
571 | /* | |
572 | * RMID 0 is special and is always allocated. It's used for all | |
573 | * tasks that are not monitored. | |
574 | */ | |
575 | entry = __rmid_entry(0); | |
576 | list_del(&entry->list); | |
577 | ||
578 | return 0; | |
579 | } | |
580 | ||
581 | static struct mon_evt llc_occupancy_event = { | |
582 | .name = "llc_occupancy", | |
583 | .evtid = QOS_L3_OCCUP_EVENT_ID, | |
584 | }; | |
585 | ||
9f52425b TL |
586 | static struct mon_evt mbm_total_event = { |
587 | .name = "mbm_total_bytes", | |
588 | .evtid = QOS_L3_MBM_TOTAL_EVENT_ID, | |
589 | }; | |
590 | ||
591 | static struct mon_evt mbm_local_event = { | |
592 | .name = "mbm_local_bytes", | |
593 | .evtid = QOS_L3_MBM_LOCAL_EVENT_ID, | |
594 | }; | |
595 | ||
6a445edc VS |
596 | /* |
597 | * Initialize the event list for the resource. | |
598 | * | |
599 | * Note that MBM events are also part of RDT_RESOURCE_L3 resource | |
600 | * because as per the SDM the total and local memory bandwidth | |
601 | * are enumerated as part of L3 monitoring. | |
602 | */ | |
603 | static void l3_mon_evt_init(struct rdt_resource *r) | |
604 | { | |
605 | INIT_LIST_HEAD(&r->evt_list); | |
606 | ||
607 | if (is_llc_occupancy_enabled()) | |
608 | list_add_tail(&llc_occupancy_event.list, &r->evt_list); | |
9f52425b TL |
609 | if (is_mbm_total_enabled()) |
610 | list_add_tail(&mbm_total_event.list, &r->evt_list); | |
611 | if (is_mbm_local_enabled()) | |
612 | list_add_tail(&mbm_local_event.list, &r->evt_list); | |
6a445edc VS |
613 | } |
614 | ||
615 | int rdt_get_mon_l3_config(struct rdt_resource *r) | |
616 | { | |
352940ec | 617 | unsigned int cl_size = boot_cpu_data.x86_cache_size; |
6a445edc VS |
618 | int ret; |
619 | ||
620 | r->mon_scale = boot_cpu_data.x86_cache_occ_scale; | |
621 | r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; | |
622 | ||
623 | /* | |
624 | * A reasonable upper limit on the max threshold is the number | |
625 | * of lines tagged per RMID if all RMIDs have the same number of | |
626 | * lines tagged in the LLC. | |
627 | * | |
628 | * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC. | |
629 | */ | |
352940ec | 630 | resctrl_cqm_threshold = cl_size * 1024 / r->num_rmid; |
6a445edc VS |
631 | |
632 | /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */ | |
352940ec | 633 | resctrl_cqm_threshold /= r->mon_scale; |
6a445edc VS |
634 | |
635 | ret = dom_data_init(r); | |
636 | if (ret) | |
637 | return ret; | |
638 | ||
639 | l3_mon_evt_init(r); | |
640 | ||
641 | r->mon_capable = true; | |
642 | r->mon_enabled = true; | |
643 | ||
644 | return 0; | |
645 | } |