]>
Commit | Line | Data |
---|---|---|
6a445edc VS |
1 | /* |
2 | * Resource Director Technology(RDT) | |
3 | * - Monitoring code | |
4 | * | |
5 | * Copyright (C) 2017 Intel Corporation | |
6 | * | |
7 | * Author: | |
8 | * Vikas Shivappa <vikas.shivappa@intel.com> | |
9 | * | |
10 | * This replaces the cqm.c based on perf but we reuse a lot of | |
11 | * code and datastructures originally from Peter Zijlstra and Matt Fleming. | |
12 | * | |
13 | * This program is free software; you can redistribute it and/or modify it | |
14 | * under the terms and conditions of the GNU General Public License, | |
15 | * version 2, as published by the Free Software Foundation. | |
16 | * | |
17 | * This program is distributed in the hope it will be useful, but WITHOUT | |
18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
20 | * more details. | |
21 | * | |
22 | * More information about RDT be found in the Intel (R) x86 Architecture | |
23 | * Software Developer Manual June 2016, volume 3, section 17.17. | |
24 | */ | |
25 | ||
26 | #include <linux/module.h> | |
27 | #include <linux/slab.h> | |
28 | #include <asm/cpu_device_id.h> | |
29 | #include "intel_rdt.h" | |
30 | ||
edf6fa1c VS |
31 | #define MSR_IA32_QM_CTR 0x0c8e |
32 | #define MSR_IA32_QM_EVTSEL 0x0c8d | |
33 | ||
6a445edc VS |
34 | struct rmid_entry { |
35 | u32 rmid; | |
edf6fa1c | 36 | atomic_t busy; |
6a445edc VS |
37 | struct list_head list; |
38 | }; | |
39 | ||
40 | /** | |
41 | * @rmid_free_lru A least recently used list of free RMIDs | |
42 | * These RMIDs are guaranteed to have an occupancy less than the | |
43 | * threshold occupancy | |
44 | */ | |
45 | static LIST_HEAD(rmid_free_lru); | |
46 | ||
47 | /** | |
48 | * @rmid_limbo_lru list of currently unused but (potentially) | |
49 | * dirty RMIDs. | |
50 | * This list contains RMIDs that no one is currently using but that | |
51 | * may have a occupancy value > intel_cqm_threshold. User can change | |
52 | * the threshold occupancy value. | |
53 | */ | |
54 | static LIST_HEAD(rmid_limbo_lru); | |
55 | ||
56 | /** | |
57 | * @rmid_entry - The entry in the limbo and free lists. | |
58 | */ | |
59 | static struct rmid_entry *rmid_ptrs; | |
60 | ||
61 | /* | |
62 | * Global boolean for rdt_monitor which is true if any | |
63 | * resource monitoring is enabled. | |
64 | */ | |
65 | bool rdt_mon_capable; | |
66 | ||
67 | /* | |
68 | * Global to indicate which monitoring events are enabled. | |
69 | */ | |
70 | unsigned int rdt_mon_features; | |
71 | ||
72 | /* | |
73 | * This is the threshold cache occupancy at which we will consider an | |
74 | * RMID available for re-allocation. | |
75 | */ | |
76 | unsigned int intel_cqm_threshold; | |
77 | ||
78 | static inline struct rmid_entry *__rmid_entry(u32 rmid) | |
79 | { | |
80 | struct rmid_entry *entry; | |
81 | ||
82 | entry = &rmid_ptrs[rmid]; | |
83 | WARN_ON(entry->rmid != rmid); | |
84 | ||
85 | return entry; | |
86 | } | |
87 | ||
edf6fa1c VS |
88 | static u64 __rmid_read(u32 rmid, u32 eventid) |
89 | { | |
90 | u64 val; | |
91 | ||
92 | /* | |
93 | * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured | |
94 | * with a valid event code for supported resource type and the bits | |
95 | * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, | |
96 | * IA32_QM_CTR.data (bits 61:0) reports the monitored data. | |
97 | * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) | |
98 | * are error bits. | |
99 | */ | |
100 | wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); | |
101 | rdmsrl(MSR_IA32_QM_CTR, val); | |
102 | ||
103 | return val; | |
104 | } | |
105 | ||
106 | /* | |
107 | * Walk the limbo list looking at any RMIDs that are flagged in the | |
108 | * domain rmid_busy_llc bitmap as busy. If the reported LLC occupancy | |
109 | * is below the threshold clear the busy bit and decrement the count. | |
110 | * If the busy count gets to zero on an RMID we stop looking. | |
111 | * This can be called from an IPI. | |
112 | * We need an atomic for the busy count because multiple CPUs may check | |
113 | * the same RMID at the same time. | |
114 | */ | |
115 | static bool __check_limbo(struct rdt_domain *d) | |
116 | { | |
117 | struct rmid_entry *entry; | |
118 | u64 val; | |
119 | ||
120 | list_for_each_entry(entry, &rmid_limbo_lru, list) { | |
121 | if (!test_bit(entry->rmid, d->rmid_busy_llc)) | |
122 | continue; | |
123 | val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID); | |
124 | if (val <= intel_cqm_threshold) { | |
125 | clear_bit(entry->rmid, d->rmid_busy_llc); | |
126 | if (atomic_dec_and_test(&entry->busy)) | |
127 | return true; | |
128 | } | |
129 | } | |
130 | return false; | |
131 | } | |
132 | ||
133 | static void check_limbo(void *arg) | |
134 | { | |
135 | struct rdt_domain *d; | |
136 | ||
137 | d = get_domain_from_cpu(smp_processor_id(), | |
138 | &rdt_resources_all[RDT_RESOURCE_L3]); | |
139 | ||
140 | if (d) | |
141 | __check_limbo(d); | |
142 | } | |
143 | ||
144 | static bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d) | |
145 | { | |
146 | return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid; | |
147 | } | |
148 | ||
149 | /* | |
150 | * Scan the limbo list and move all entries that are below the | |
151 | * intel_cqm_threshold to the free list. | |
152 | * Return "true" if the limbo list is empty, "false" if there are | |
153 | * still some RMIDs there. | |
154 | */ | |
155 | static bool try_freeing_limbo_rmid(void) | |
156 | { | |
157 | struct rmid_entry *entry, *tmp; | |
158 | struct rdt_resource *r; | |
159 | cpumask_var_t cpu_mask; | |
160 | struct rdt_domain *d; | |
161 | bool ret = true; | |
162 | int cpu; | |
163 | ||
164 | if (list_empty(&rmid_limbo_lru)) | |
165 | return ret; | |
166 | ||
167 | r = &rdt_resources_all[RDT_RESOURCE_L3]; | |
168 | ||
169 | cpu = get_cpu(); | |
170 | ||
171 | /* | |
172 | * First see if we can free up an RMID by checking busy values | |
173 | * on the local package. | |
174 | */ | |
175 | d = get_domain_from_cpu(cpu, r); | |
176 | if (d && has_busy_rmid(r, d) && __check_limbo(d)) { | |
177 | list_for_each_entry_safe(entry, tmp, &rmid_limbo_lru, list) { | |
178 | if (atomic_read(&entry->busy) == 0) { | |
179 | list_del(&entry->list); | |
180 | list_add_tail(&entry->list, &rmid_free_lru); | |
181 | goto done; | |
182 | } | |
183 | } | |
184 | } | |
185 | ||
186 | if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) { | |
187 | ret = false; | |
188 | goto done; | |
189 | } | |
190 | ||
191 | /* | |
192 | * Build a mask of other domains that have busy RMIDs | |
193 | */ | |
194 | list_for_each_entry(d, &r->domains, list) { | |
195 | if (!cpumask_test_cpu(cpu, &d->cpu_mask) && | |
196 | has_busy_rmid(r, d)) | |
197 | cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); | |
198 | } | |
199 | if (cpumask_empty(cpu_mask)) { | |
200 | ret = false; | |
201 | goto free_mask; | |
202 | } | |
203 | ||
204 | /* | |
205 | * Scan domains with busy RMIDs to check if they still are busy | |
206 | */ | |
207 | on_each_cpu_mask(cpu_mask, check_limbo, NULL, true); | |
208 | ||
209 | /* Walk limbo list moving all free RMIDs to the &rmid_free_lru list */ | |
210 | list_for_each_entry_safe(entry, tmp, &rmid_limbo_lru, list) { | |
211 | if (atomic_read(&entry->busy) != 0) { | |
212 | ret = false; | |
213 | continue; | |
214 | } | |
215 | list_del(&entry->list); | |
216 | list_add_tail(&entry->list, &rmid_free_lru); | |
217 | } | |
218 | ||
219 | free_mask: | |
220 | free_cpumask_var(cpu_mask); | |
221 | done: | |
222 | put_cpu(); | |
223 | return ret; | |
224 | } | |
225 | ||
226 | /* | |
227 | * As of now the RMIDs allocation is global. | |
228 | * However we keep track of which packages the RMIDs | |
229 | * are used to optimize the limbo list management. | |
230 | */ | |
231 | int alloc_rmid(void) | |
232 | { | |
233 | struct rmid_entry *entry; | |
234 | bool ret; | |
235 | ||
236 | lockdep_assert_held(&rdtgroup_mutex); | |
237 | ||
238 | if (list_empty(&rmid_free_lru)) { | |
239 | ret = try_freeing_limbo_rmid(); | |
240 | if (list_empty(&rmid_free_lru)) | |
241 | return ret ? -ENOSPC : -EBUSY; | |
242 | } | |
243 | ||
244 | entry = list_first_entry(&rmid_free_lru, | |
245 | struct rmid_entry, list); | |
246 | list_del(&entry->list); | |
247 | ||
248 | return entry->rmid; | |
249 | } | |
250 | ||
251 | static void add_rmid_to_limbo(struct rmid_entry *entry) | |
252 | { | |
253 | struct rdt_resource *r; | |
254 | struct rdt_domain *d; | |
255 | int cpu, nbusy = 0; | |
256 | u64 val; | |
257 | ||
258 | r = &rdt_resources_all[RDT_RESOURCE_L3]; | |
259 | ||
260 | cpu = get_cpu(); | |
261 | list_for_each_entry(d, &r->domains, list) { | |
262 | if (cpumask_test_cpu(cpu, &d->cpu_mask)) { | |
263 | val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID); | |
264 | if (val <= intel_cqm_threshold) | |
265 | continue; | |
266 | } | |
267 | set_bit(entry->rmid, d->rmid_busy_llc); | |
268 | nbusy++; | |
269 | } | |
270 | put_cpu(); | |
271 | ||
272 | if (nbusy) { | |
273 | atomic_set(&entry->busy, nbusy); | |
274 | list_add_tail(&entry->list, &rmid_limbo_lru); | |
275 | } else { | |
276 | list_add_tail(&entry->list, &rmid_free_lru); | |
277 | } | |
278 | } | |
279 | ||
280 | void free_rmid(u32 rmid) | |
281 | { | |
282 | struct rmid_entry *entry; | |
283 | ||
284 | if (!rmid) | |
285 | return; | |
286 | ||
287 | lockdep_assert_held(&rdtgroup_mutex); | |
288 | ||
289 | entry = __rmid_entry(rmid); | |
290 | ||
291 | if (is_llc_occupancy_enabled()) | |
292 | add_rmid_to_limbo(entry); | |
293 | else | |
294 | list_add_tail(&entry->list, &rmid_free_lru); | |
295 | } | |
296 | ||
d89b7379 VS |
297 | static int __mon_event_count(u32 rmid, struct rmid_read *rr) |
298 | { | |
299 | u64 tval; | |
300 | ||
301 | tval = __rmid_read(rmid, rr->evtid); | |
302 | if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { | |
303 | rr->val = tval; | |
304 | return -EINVAL; | |
305 | } | |
306 | switch (rr->evtid) { | |
307 | case QOS_L3_OCCUP_EVENT_ID: | |
308 | rr->val += tval; | |
309 | return 0; | |
310 | default: | |
311 | /* | |
312 | * Code would never reach here because | |
313 | * an invalid event id would fail the __rmid_read. | |
314 | */ | |
315 | return -EINVAL; | |
316 | } | |
317 | } | |
318 | ||
319 | /* | |
320 | * This is called via IPI to read the CQM/MBM counters | |
321 | * on a domain. | |
322 | */ | |
323 | void mon_event_count(void *info) | |
324 | { | |
325 | struct rdtgroup *rdtgrp, *entry; | |
326 | struct rmid_read *rr = info; | |
327 | struct list_head *head; | |
328 | ||
329 | rdtgrp = rr->rgrp; | |
330 | ||
331 | if (__mon_event_count(rdtgrp->mon.rmid, rr)) | |
332 | return; | |
333 | ||
334 | /* | |
335 | * For Ctrl groups read data from child monitor groups. | |
336 | */ | |
337 | head = &rdtgrp->mon.crdtgrp_list; | |
338 | ||
339 | if (rdtgrp->type == RDTCTRL_GROUP) { | |
340 | list_for_each_entry(entry, head, mon.crdtgrp_list) { | |
341 | if (__mon_event_count(entry->mon.rmid, rr)) | |
342 | return; | |
343 | } | |
344 | } | |
345 | } | |
6a445edc VS |
346 | static int dom_data_init(struct rdt_resource *r) |
347 | { | |
348 | struct rmid_entry *entry = NULL; | |
349 | int i, nr_rmids; | |
350 | ||
351 | nr_rmids = r->num_rmid; | |
352 | rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL); | |
353 | if (!rmid_ptrs) | |
354 | return -ENOMEM; | |
355 | ||
356 | for (i = 0; i < nr_rmids; i++) { | |
357 | entry = &rmid_ptrs[i]; | |
358 | INIT_LIST_HEAD(&entry->list); | |
359 | ||
360 | entry->rmid = i; | |
361 | list_add_tail(&entry->list, &rmid_free_lru); | |
362 | } | |
363 | ||
364 | /* | |
365 | * RMID 0 is special and is always allocated. It's used for all | |
366 | * tasks that are not monitored. | |
367 | */ | |
368 | entry = __rmid_entry(0); | |
369 | list_del(&entry->list); | |
370 | ||
371 | return 0; | |
372 | } | |
373 | ||
374 | static struct mon_evt llc_occupancy_event = { | |
375 | .name = "llc_occupancy", | |
376 | .evtid = QOS_L3_OCCUP_EVENT_ID, | |
377 | }; | |
378 | ||
379 | /* | |
380 | * Initialize the event list for the resource. | |
381 | * | |
382 | * Note that MBM events are also part of RDT_RESOURCE_L3 resource | |
383 | * because as per the SDM the total and local memory bandwidth | |
384 | * are enumerated as part of L3 monitoring. | |
385 | */ | |
386 | static void l3_mon_evt_init(struct rdt_resource *r) | |
387 | { | |
388 | INIT_LIST_HEAD(&r->evt_list); | |
389 | ||
390 | if (is_llc_occupancy_enabled()) | |
391 | list_add_tail(&llc_occupancy_event.list, &r->evt_list); | |
392 | } | |
393 | ||
394 | int rdt_get_mon_l3_config(struct rdt_resource *r) | |
395 | { | |
396 | int ret; | |
397 | ||
398 | r->mon_scale = boot_cpu_data.x86_cache_occ_scale; | |
399 | r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; | |
400 | ||
401 | /* | |
402 | * A reasonable upper limit on the max threshold is the number | |
403 | * of lines tagged per RMID if all RMIDs have the same number of | |
404 | * lines tagged in the LLC. | |
405 | * | |
406 | * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC. | |
407 | */ | |
408 | intel_cqm_threshold = boot_cpu_data.x86_cache_size * 1024 / r->num_rmid; | |
409 | ||
410 | /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */ | |
411 | intel_cqm_threshold /= r->mon_scale; | |
412 | ||
413 | ret = dom_data_init(r); | |
414 | if (ret) | |
415 | return ret; | |
416 | ||
417 | l3_mon_evt_init(r); | |
418 | ||
419 | r->mon_capable = true; | |
420 | r->mon_enabled = true; | |
421 | ||
422 | return 0; | |
423 | } |