]> git.ipfire.org Git - thirdparty/kernel/stable.git/blob - drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
KVM: x86/mmu: Remove unnecessary ‘NULL’ values from sptep
[thirdparty/kernel/stable.git] / drivers / iommu / arm / arm-smmu-v3 / arm-smmu-v3.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * IOMMU API for ARM architected SMMUv3 implementations.
4 *
5 * Copyright (C) 2015 ARM Limited
6 *
7 * Author: Will Deacon <will.deacon@arm.com>
8 *
9 * This driver is powered by bad coffee and bombay mix.
10 */
11
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29
30 #include "arm-smmu-v3.h"
31 #include "../../dma-iommu.h"
32 #include "../../iommu-sva.h"
33
34 static bool disable_bypass = true;
35 module_param(disable_bypass, bool, 0444);
36 MODULE_PARM_DESC(disable_bypass,
37 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
38
39 static bool disable_msipolling;
40 module_param(disable_msipolling, bool, 0444);
41 MODULE_PARM_DESC(disable_msipolling,
42 "Disable MSI-based polling for CMD_SYNC completion.");
43
44 enum arm_smmu_msi_index {
45 EVTQ_MSI_INDEX,
46 GERROR_MSI_INDEX,
47 PRIQ_MSI_INDEX,
48 ARM_SMMU_MAX_MSIS,
49 };
50
51 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
52 [EVTQ_MSI_INDEX] = {
53 ARM_SMMU_EVTQ_IRQ_CFG0,
54 ARM_SMMU_EVTQ_IRQ_CFG1,
55 ARM_SMMU_EVTQ_IRQ_CFG2,
56 },
57 [GERROR_MSI_INDEX] = {
58 ARM_SMMU_GERROR_IRQ_CFG0,
59 ARM_SMMU_GERROR_IRQ_CFG1,
60 ARM_SMMU_GERROR_IRQ_CFG2,
61 },
62 [PRIQ_MSI_INDEX] = {
63 ARM_SMMU_PRIQ_IRQ_CFG0,
64 ARM_SMMU_PRIQ_IRQ_CFG1,
65 ARM_SMMU_PRIQ_IRQ_CFG2,
66 },
67 };
68
69 struct arm_smmu_option_prop {
70 u32 opt;
71 const char *prop;
72 };
73
74 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
75 DEFINE_MUTEX(arm_smmu_asid_lock);
76
77 /*
78 * Special value used by SVA when a process dies, to quiesce a CD without
79 * disabling it.
80 */
81 struct arm_smmu_ctx_desc quiet_cd = { 0 };
82
83 static struct arm_smmu_option_prop arm_smmu_options[] = {
84 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
85 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
86 { 0, NULL},
87 };
88
89 static void parse_driver_options(struct arm_smmu_device *smmu)
90 {
91 int i = 0;
92
93 do {
94 if (of_property_read_bool(smmu->dev->of_node,
95 arm_smmu_options[i].prop)) {
96 smmu->options |= arm_smmu_options[i].opt;
97 dev_notice(smmu->dev, "option %s\n",
98 arm_smmu_options[i].prop);
99 }
100 } while (arm_smmu_options[++i].opt);
101 }
102
103 /* Low-level queue manipulation functions */
104 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
105 {
106 u32 space, prod, cons;
107
108 prod = Q_IDX(q, q->prod);
109 cons = Q_IDX(q, q->cons);
110
111 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
112 space = (1 << q->max_n_shift) - (prod - cons);
113 else
114 space = cons - prod;
115
116 return space >= n;
117 }
118
119 static bool queue_full(struct arm_smmu_ll_queue *q)
120 {
121 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
122 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
123 }
124
125 static bool queue_empty(struct arm_smmu_ll_queue *q)
126 {
127 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
128 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
129 }
130
131 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
132 {
133 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
134 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
135 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
136 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
137 }
138
139 static void queue_sync_cons_out(struct arm_smmu_queue *q)
140 {
141 /*
142 * Ensure that all CPU accesses (reads and writes) to the queue
143 * are complete before we update the cons pointer.
144 */
145 __iomb();
146 writel_relaxed(q->llq.cons, q->cons_reg);
147 }
148
149 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
150 {
151 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
152 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
153 }
154
155 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
156 {
157 struct arm_smmu_ll_queue *llq = &q->llq;
158
159 if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
160 return;
161
162 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
163 Q_IDX(llq, llq->cons);
164 queue_sync_cons_out(q);
165 }
166
167 static int queue_sync_prod_in(struct arm_smmu_queue *q)
168 {
169 u32 prod;
170 int ret = 0;
171
172 /*
173 * We can't use the _relaxed() variant here, as we must prevent
174 * speculative reads of the queue before we have determined that
175 * prod has indeed moved.
176 */
177 prod = readl(q->prod_reg);
178
179 if (Q_OVF(prod) != Q_OVF(q->llq.prod))
180 ret = -EOVERFLOW;
181
182 q->llq.prod = prod;
183 return ret;
184 }
185
186 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
187 {
188 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
189 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
190 }
191
192 static void queue_poll_init(struct arm_smmu_device *smmu,
193 struct arm_smmu_queue_poll *qp)
194 {
195 qp->delay = 1;
196 qp->spin_cnt = 0;
197 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
198 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
199 }
200
201 static int queue_poll(struct arm_smmu_queue_poll *qp)
202 {
203 if (ktime_compare(ktime_get(), qp->timeout) > 0)
204 return -ETIMEDOUT;
205
206 if (qp->wfe) {
207 wfe();
208 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
209 cpu_relax();
210 } else {
211 udelay(qp->delay);
212 qp->delay *= 2;
213 qp->spin_cnt = 0;
214 }
215
216 return 0;
217 }
218
219 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
220 {
221 int i;
222
223 for (i = 0; i < n_dwords; ++i)
224 *dst++ = cpu_to_le64(*src++);
225 }
226
227 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
228 {
229 int i;
230
231 for (i = 0; i < n_dwords; ++i)
232 *dst++ = le64_to_cpu(*src++);
233 }
234
235 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
236 {
237 if (queue_empty(&q->llq))
238 return -EAGAIN;
239
240 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
241 queue_inc_cons(&q->llq);
242 queue_sync_cons_out(q);
243 return 0;
244 }
245
246 /* High-level queue accessors */
247 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
248 {
249 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
250 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
251
252 switch (ent->opcode) {
253 case CMDQ_OP_TLBI_EL2_ALL:
254 case CMDQ_OP_TLBI_NSNH_ALL:
255 break;
256 case CMDQ_OP_PREFETCH_CFG:
257 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
258 break;
259 case CMDQ_OP_CFGI_CD:
260 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
261 fallthrough;
262 case CMDQ_OP_CFGI_STE:
263 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
264 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
265 break;
266 case CMDQ_OP_CFGI_CD_ALL:
267 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
268 break;
269 case CMDQ_OP_CFGI_ALL:
270 /* Cover the entire SID range */
271 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
272 break;
273 case CMDQ_OP_TLBI_NH_VA:
274 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
275 fallthrough;
276 case CMDQ_OP_TLBI_EL2_VA:
277 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
278 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
279 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
280 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
281 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
282 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
283 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
284 break;
285 case CMDQ_OP_TLBI_S2_IPA:
286 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
287 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
288 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
290 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
291 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
292 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
293 break;
294 case CMDQ_OP_TLBI_NH_ASID:
295 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
296 fallthrough;
297 case CMDQ_OP_TLBI_S12_VMALL:
298 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
299 break;
300 case CMDQ_OP_TLBI_EL2_ASID:
301 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
302 break;
303 case CMDQ_OP_ATC_INV:
304 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
305 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
306 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
307 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
308 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
309 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
310 break;
311 case CMDQ_OP_PRI_RESP:
312 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
313 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
314 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
315 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
316 switch (ent->pri.resp) {
317 case PRI_RESP_DENY:
318 case PRI_RESP_FAIL:
319 case PRI_RESP_SUCC:
320 break;
321 default:
322 return -EINVAL;
323 }
324 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
325 break;
326 case CMDQ_OP_RESUME:
327 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
328 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
329 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
330 break;
331 case CMDQ_OP_CMD_SYNC:
332 if (ent->sync.msiaddr) {
333 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
334 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
335 } else {
336 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
337 }
338 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
339 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
340 break;
341 default:
342 return -ENOENT;
343 }
344
345 return 0;
346 }
347
348 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
349 {
350 return &smmu->cmdq;
351 }
352
353 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
354 struct arm_smmu_queue *q, u32 prod)
355 {
356 struct arm_smmu_cmdq_ent ent = {
357 .opcode = CMDQ_OP_CMD_SYNC,
358 };
359
360 /*
361 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
362 * payload, so the write will zero the entire command on that platform.
363 */
364 if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
365 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
366 q->ent_dwords * 8;
367 }
368
369 arm_smmu_cmdq_build_cmd(cmd, &ent);
370 }
371
372 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
373 struct arm_smmu_queue *q)
374 {
375 static const char * const cerror_str[] = {
376 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
377 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
378 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
379 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
380 };
381
382 int i;
383 u64 cmd[CMDQ_ENT_DWORDS];
384 u32 cons = readl_relaxed(q->cons_reg);
385 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
386 struct arm_smmu_cmdq_ent cmd_sync = {
387 .opcode = CMDQ_OP_CMD_SYNC,
388 };
389
390 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
391 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
392
393 switch (idx) {
394 case CMDQ_ERR_CERROR_ABT_IDX:
395 dev_err(smmu->dev, "retrying command fetch\n");
396 return;
397 case CMDQ_ERR_CERROR_NONE_IDX:
398 return;
399 case CMDQ_ERR_CERROR_ATC_INV_IDX:
400 /*
401 * ATC Invalidation Completion timeout. CONS is still pointing
402 * at the CMD_SYNC. Attempt to complete other pending commands
403 * by repeating the CMD_SYNC, though we might well end up back
404 * here since the ATC invalidation may still be pending.
405 */
406 return;
407 case CMDQ_ERR_CERROR_ILL_IDX:
408 default:
409 break;
410 }
411
412 /*
413 * We may have concurrent producers, so we need to be careful
414 * not to touch any of the shadow cmdq state.
415 */
416 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
417 dev_err(smmu->dev, "skipping command in error state:\n");
418 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
419 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
420
421 /* Convert the erroneous command into a CMD_SYNC */
422 arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
423
424 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
425 }
426
427 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
428 {
429 __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
430 }
431
432 /*
433 * Command queue locking.
434 * This is a form of bastardised rwlock with the following major changes:
435 *
436 * - The only LOCK routines are exclusive_trylock() and shared_lock().
437 * Neither have barrier semantics, and instead provide only a control
438 * dependency.
439 *
440 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
441 * fails if the caller appears to be the last lock holder (yes, this is
442 * racy). All successful UNLOCK routines have RELEASE semantics.
443 */
444 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
445 {
446 int val;
447
448 /*
449 * We can try to avoid the cmpxchg() loop by simply incrementing the
450 * lock counter. When held in exclusive state, the lock counter is set
451 * to INT_MIN so these increments won't hurt as the value will remain
452 * negative.
453 */
454 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
455 return;
456
457 do {
458 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
459 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
460 }
461
462 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
463 {
464 (void)atomic_dec_return_release(&cmdq->lock);
465 }
466
467 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
468 {
469 if (atomic_read(&cmdq->lock) == 1)
470 return false;
471
472 arm_smmu_cmdq_shared_unlock(cmdq);
473 return true;
474 }
475
476 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
477 ({ \
478 bool __ret; \
479 local_irq_save(flags); \
480 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
481 if (!__ret) \
482 local_irq_restore(flags); \
483 __ret; \
484 })
485
486 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
487 ({ \
488 atomic_set_release(&cmdq->lock, 0); \
489 local_irq_restore(flags); \
490 })
491
492
493 /*
494 * Command queue insertion.
495 * This is made fiddly by our attempts to achieve some sort of scalability
496 * since there is one queue shared amongst all of the CPUs in the system. If
497 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
498 * then you'll *love* this monstrosity.
499 *
500 * The basic idea is to split the queue up into ranges of commands that are
501 * owned by a given CPU; the owner may not have written all of the commands
502 * itself, but is responsible for advancing the hardware prod pointer when
503 * the time comes. The algorithm is roughly:
504 *
505 * 1. Allocate some space in the queue. At this point we also discover
506 * whether the head of the queue is currently owned by another CPU,
507 * or whether we are the owner.
508 *
509 * 2. Write our commands into our allocated slots in the queue.
510 *
511 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
512 *
513 * 4. If we are an owner:
514 * a. Wait for the previous owner to finish.
515 * b. Mark the queue head as unowned, which tells us the range
516 * that we are responsible for publishing.
517 * c. Wait for all commands in our owned range to become valid.
518 * d. Advance the hardware prod pointer.
519 * e. Tell the next owner we've finished.
520 *
521 * 5. If we are inserting a CMD_SYNC (we may or may not have been an
522 * owner), then we need to stick around until it has completed:
523 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
524 * to clear the first 4 bytes.
525 * b. Otherwise, we spin waiting for the hardware cons pointer to
526 * advance past our command.
527 *
528 * The devil is in the details, particularly the use of locking for handling
529 * SYNC completion and freeing up space in the queue before we think that it is
530 * full.
531 */
532 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
533 u32 sprod, u32 eprod, bool set)
534 {
535 u32 swidx, sbidx, ewidx, ebidx;
536 struct arm_smmu_ll_queue llq = {
537 .max_n_shift = cmdq->q.llq.max_n_shift,
538 .prod = sprod,
539 };
540
541 ewidx = BIT_WORD(Q_IDX(&llq, eprod));
542 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
543
544 while (llq.prod != eprod) {
545 unsigned long mask;
546 atomic_long_t *ptr;
547 u32 limit = BITS_PER_LONG;
548
549 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
550 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
551
552 ptr = &cmdq->valid_map[swidx];
553
554 if ((swidx == ewidx) && (sbidx < ebidx))
555 limit = ebidx;
556
557 mask = GENMASK(limit - 1, sbidx);
558
559 /*
560 * The valid bit is the inverse of the wrap bit. This means
561 * that a zero-initialised queue is invalid and, after marking
562 * all entries as valid, they become invalid again when we
563 * wrap.
564 */
565 if (set) {
566 atomic_long_xor(mask, ptr);
567 } else { /* Poll */
568 unsigned long valid;
569
570 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
571 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
572 }
573
574 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
575 }
576 }
577
578 /* Mark all entries in the range [sprod, eprod) as valid */
579 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
580 u32 sprod, u32 eprod)
581 {
582 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
583 }
584
585 /* Wait for all entries in the range [sprod, eprod) to become valid */
586 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
587 u32 sprod, u32 eprod)
588 {
589 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
590 }
591
592 /* Wait for the command queue to become non-full */
593 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
594 struct arm_smmu_ll_queue *llq)
595 {
596 unsigned long flags;
597 struct arm_smmu_queue_poll qp;
598 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
599 int ret = 0;
600
601 /*
602 * Try to update our copy of cons by grabbing exclusive cmdq access. If
603 * that fails, spin until somebody else updates it for us.
604 */
605 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
606 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
607 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
608 llq->val = READ_ONCE(cmdq->q.llq.val);
609 return 0;
610 }
611
612 queue_poll_init(smmu, &qp);
613 do {
614 llq->val = READ_ONCE(cmdq->q.llq.val);
615 if (!queue_full(llq))
616 break;
617
618 ret = queue_poll(&qp);
619 } while (!ret);
620
621 return ret;
622 }
623
624 /*
625 * Wait until the SMMU signals a CMD_SYNC completion MSI.
626 * Must be called with the cmdq lock held in some capacity.
627 */
628 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
629 struct arm_smmu_ll_queue *llq)
630 {
631 int ret = 0;
632 struct arm_smmu_queue_poll qp;
633 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
634 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
635
636 queue_poll_init(smmu, &qp);
637
638 /*
639 * The MSI won't generate an event, since it's being written back
640 * into the command queue.
641 */
642 qp.wfe = false;
643 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
644 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
645 return ret;
646 }
647
648 /*
649 * Wait until the SMMU cons index passes llq->prod.
650 * Must be called with the cmdq lock held in some capacity.
651 */
652 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
653 struct arm_smmu_ll_queue *llq)
654 {
655 struct arm_smmu_queue_poll qp;
656 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
657 u32 prod = llq->prod;
658 int ret = 0;
659
660 queue_poll_init(smmu, &qp);
661 llq->val = READ_ONCE(cmdq->q.llq.val);
662 do {
663 if (queue_consumed(llq, prod))
664 break;
665
666 ret = queue_poll(&qp);
667
668 /*
669 * This needs to be a readl() so that our subsequent call
670 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
671 *
672 * Specifically, we need to ensure that we observe all
673 * shared_lock()s by other CMD_SYNCs that share our owner,
674 * so that a failing call to tryunlock() means that we're
675 * the last one out and therefore we can safely advance
676 * cmdq->q.llq.cons. Roughly speaking:
677 *
678 * CPU 0 CPU1 CPU2 (us)
679 *
680 * if (sync)
681 * shared_lock();
682 *
683 * dma_wmb();
684 * set_valid_map();
685 *
686 * if (owner) {
687 * poll_valid_map();
688 * <control dependency>
689 * writel(prod_reg);
690 *
691 * readl(cons_reg);
692 * tryunlock();
693 *
694 * Requires us to see CPU 0's shared_lock() acquisition.
695 */
696 llq->cons = readl(cmdq->q.cons_reg);
697 } while (!ret);
698
699 return ret;
700 }
701
702 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
703 struct arm_smmu_ll_queue *llq)
704 {
705 if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
706 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
707
708 return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
709 }
710
711 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
712 u32 prod, int n)
713 {
714 int i;
715 struct arm_smmu_ll_queue llq = {
716 .max_n_shift = cmdq->q.llq.max_n_shift,
717 .prod = prod,
718 };
719
720 for (i = 0; i < n; ++i) {
721 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
722
723 prod = queue_inc_prod_n(&llq, i);
724 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
725 }
726 }
727
728 /*
729 * This is the actual insertion function, and provides the following
730 * ordering guarantees to callers:
731 *
732 * - There is a dma_wmb() before publishing any commands to the queue.
733 * This can be relied upon to order prior writes to data structures
734 * in memory (such as a CD or an STE) before the command.
735 *
736 * - On completion of a CMD_SYNC, there is a control dependency.
737 * This can be relied upon to order subsequent writes to memory (e.g.
738 * freeing an IOVA) after completion of the CMD_SYNC.
739 *
740 * - Command insertion is totally ordered, so if two CPUs each race to
741 * insert their own list of commands then all of the commands from one
742 * CPU will appear before any of the commands from the other CPU.
743 */
744 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
745 u64 *cmds, int n, bool sync)
746 {
747 u64 cmd_sync[CMDQ_ENT_DWORDS];
748 u32 prod;
749 unsigned long flags;
750 bool owner;
751 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
752 struct arm_smmu_ll_queue llq, head;
753 int ret = 0;
754
755 llq.max_n_shift = cmdq->q.llq.max_n_shift;
756
757 /* 1. Allocate some space in the queue */
758 local_irq_save(flags);
759 llq.val = READ_ONCE(cmdq->q.llq.val);
760 do {
761 u64 old;
762
763 while (!queue_has_space(&llq, n + sync)) {
764 local_irq_restore(flags);
765 if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
766 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
767 local_irq_save(flags);
768 }
769
770 head.cons = llq.cons;
771 head.prod = queue_inc_prod_n(&llq, n + sync) |
772 CMDQ_PROD_OWNED_FLAG;
773
774 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
775 if (old == llq.val)
776 break;
777
778 llq.val = old;
779 } while (1);
780 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
781 head.prod &= ~CMDQ_PROD_OWNED_FLAG;
782 llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
783
784 /*
785 * 2. Write our commands into the queue
786 * Dependency ordering from the cmpxchg() loop above.
787 */
788 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
789 if (sync) {
790 prod = queue_inc_prod_n(&llq, n);
791 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
792 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
793
794 /*
795 * In order to determine completion of our CMD_SYNC, we must
796 * ensure that the queue can't wrap twice without us noticing.
797 * We achieve that by taking the cmdq lock as shared before
798 * marking our slot as valid.
799 */
800 arm_smmu_cmdq_shared_lock(cmdq);
801 }
802
803 /* 3. Mark our slots as valid, ensuring commands are visible first */
804 dma_wmb();
805 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
806
807 /* 4. If we are the owner, take control of the SMMU hardware */
808 if (owner) {
809 /* a. Wait for previous owner to finish */
810 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
811
812 /* b. Stop gathering work by clearing the owned flag */
813 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
814 &cmdq->q.llq.atomic.prod);
815 prod &= ~CMDQ_PROD_OWNED_FLAG;
816
817 /*
818 * c. Wait for any gathered work to be written to the queue.
819 * Note that we read our own entries so that we have the control
820 * dependency required by (d).
821 */
822 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
823
824 /*
825 * d. Advance the hardware prod pointer
826 * Control dependency ordering from the entries becoming valid.
827 */
828 writel_relaxed(prod, cmdq->q.prod_reg);
829
830 /*
831 * e. Tell the next owner we're done
832 * Make sure we've updated the hardware first, so that we don't
833 * race to update prod and potentially move it backwards.
834 */
835 atomic_set_release(&cmdq->owner_prod, prod);
836 }
837
838 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
839 if (sync) {
840 llq.prod = queue_inc_prod_n(&llq, n);
841 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
842 if (ret) {
843 dev_err_ratelimited(smmu->dev,
844 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
845 llq.prod,
846 readl_relaxed(cmdq->q.prod_reg),
847 readl_relaxed(cmdq->q.cons_reg));
848 }
849
850 /*
851 * Try to unlock the cmdq lock. This will fail if we're the last
852 * reader, in which case we can safely update cmdq->q.llq.cons
853 */
854 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
855 WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
856 arm_smmu_cmdq_shared_unlock(cmdq);
857 }
858 }
859
860 local_irq_restore(flags);
861 return ret;
862 }
863
864 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
865 struct arm_smmu_cmdq_ent *ent,
866 bool sync)
867 {
868 u64 cmd[CMDQ_ENT_DWORDS];
869
870 if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
871 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
872 ent->opcode);
873 return -EINVAL;
874 }
875
876 return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
877 }
878
879 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
880 struct arm_smmu_cmdq_ent *ent)
881 {
882 return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
883 }
884
885 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
886 struct arm_smmu_cmdq_ent *ent)
887 {
888 return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
889 }
890
891 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
892 struct arm_smmu_cmdq_batch *cmds,
893 struct arm_smmu_cmdq_ent *cmd)
894 {
895 int index;
896
897 if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
898 (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
899 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
900 cmds->num = 0;
901 }
902
903 if (cmds->num == CMDQ_BATCH_ENTRIES) {
904 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
905 cmds->num = 0;
906 }
907
908 index = cmds->num * CMDQ_ENT_DWORDS;
909 if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
910 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
911 cmd->opcode);
912 return;
913 }
914
915 cmds->num++;
916 }
917
918 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
919 struct arm_smmu_cmdq_batch *cmds)
920 {
921 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
922 }
923
924 static int arm_smmu_page_response(struct device *dev,
925 struct iommu_fault_event *unused,
926 struct iommu_page_response *resp)
927 {
928 struct arm_smmu_cmdq_ent cmd = {0};
929 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
930 int sid = master->streams[0].id;
931
932 if (master->stall_enabled) {
933 cmd.opcode = CMDQ_OP_RESUME;
934 cmd.resume.sid = sid;
935 cmd.resume.stag = resp->grpid;
936 switch (resp->code) {
937 case IOMMU_PAGE_RESP_INVALID:
938 case IOMMU_PAGE_RESP_FAILURE:
939 cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
940 break;
941 case IOMMU_PAGE_RESP_SUCCESS:
942 cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
943 break;
944 default:
945 return -EINVAL;
946 }
947 } else {
948 return -ENODEV;
949 }
950
951 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
952 /*
953 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
954 * RESUME consumption guarantees that the stalled transaction will be
955 * terminated... at some point in the future. PRI_RESP is fire and
956 * forget.
957 */
958
959 return 0;
960 }
961
962 /* Context descriptor manipulation functions */
963 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
964 {
965 struct arm_smmu_cmdq_ent cmd = {
966 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
967 CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
968 .tlbi.asid = asid,
969 };
970
971 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
972 }
973
974 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
975 int ssid, bool leaf)
976 {
977 size_t i;
978 unsigned long flags;
979 struct arm_smmu_master *master;
980 struct arm_smmu_cmdq_batch cmds;
981 struct arm_smmu_device *smmu = smmu_domain->smmu;
982 struct arm_smmu_cmdq_ent cmd = {
983 .opcode = CMDQ_OP_CFGI_CD,
984 .cfgi = {
985 .ssid = ssid,
986 .leaf = leaf,
987 },
988 };
989
990 cmds.num = 0;
991
992 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
993 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
994 for (i = 0; i < master->num_streams; i++) {
995 cmd.cfgi.sid = master->streams[i].id;
996 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
997 }
998 }
999 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1000
1001 arm_smmu_cmdq_batch_submit(smmu, &cmds);
1002 }
1003
1004 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1005 struct arm_smmu_l1_ctx_desc *l1_desc)
1006 {
1007 size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1008
1009 l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1010 &l1_desc->l2ptr_dma, GFP_KERNEL);
1011 if (!l1_desc->l2ptr) {
1012 dev_warn(smmu->dev,
1013 "failed to allocate context descriptor table\n");
1014 return -ENOMEM;
1015 }
1016 return 0;
1017 }
1018
1019 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1020 struct arm_smmu_l1_ctx_desc *l1_desc)
1021 {
1022 u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1023 CTXDESC_L1_DESC_V;
1024
1025 /* See comment in arm_smmu_write_ctx_desc() */
1026 WRITE_ONCE(*dst, cpu_to_le64(val));
1027 }
1028
1029 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1030 u32 ssid)
1031 {
1032 __le64 *l1ptr;
1033 unsigned int idx;
1034 struct arm_smmu_l1_ctx_desc *l1_desc;
1035 struct arm_smmu_device *smmu = smmu_domain->smmu;
1036 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1037
1038 if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1039 return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1040
1041 idx = ssid >> CTXDESC_SPLIT;
1042 l1_desc = &cdcfg->l1_desc[idx];
1043 if (!l1_desc->l2ptr) {
1044 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1045 return NULL;
1046
1047 l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1048 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1049 /* An invalid L1CD can be cached */
1050 arm_smmu_sync_cd(smmu_domain, ssid, false);
1051 }
1052 idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1053 return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1054 }
1055
1056 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1057 struct arm_smmu_ctx_desc *cd)
1058 {
1059 /*
1060 * This function handles the following cases:
1061 *
1062 * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0).
1063 * (2) Install a secondary CD, for SID+SSID traffic.
1064 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1065 * CD, then invalidate the old entry and mappings.
1066 * (4) Quiesce the context without clearing the valid bit. Disable
1067 * translation, and ignore any translation fault.
1068 * (5) Remove a secondary CD.
1069 */
1070 u64 val;
1071 bool cd_live;
1072 __le64 *cdptr;
1073
1074 if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1075 return -E2BIG;
1076
1077 cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1078 if (!cdptr)
1079 return -ENOMEM;
1080
1081 val = le64_to_cpu(cdptr[0]);
1082 cd_live = !!(val & CTXDESC_CD_0_V);
1083
1084 if (!cd) { /* (5) */
1085 val = 0;
1086 } else if (cd == &quiet_cd) { /* (4) */
1087 val |= CTXDESC_CD_0_TCR_EPD0;
1088 } else if (cd_live) { /* (3) */
1089 val &= ~CTXDESC_CD_0_ASID;
1090 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1091 /*
1092 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1093 * this substream's traffic
1094 */
1095 } else { /* (1) and (2) */
1096 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1097 cdptr[2] = 0;
1098 cdptr[3] = cpu_to_le64(cd->mair);
1099
1100 /*
1101 * STE is live, and the SMMU might read dwords of this CD in any
1102 * order. Ensure that it observes valid values before reading
1103 * V=1.
1104 */
1105 arm_smmu_sync_cd(smmu_domain, ssid, true);
1106
1107 val = cd->tcr |
1108 #ifdef __BIG_ENDIAN
1109 CTXDESC_CD_0_ENDI |
1110 #endif
1111 CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1112 (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1113 CTXDESC_CD_0_AA64 |
1114 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1115 CTXDESC_CD_0_V;
1116
1117 if (smmu_domain->stall_enabled)
1118 val |= CTXDESC_CD_0_S;
1119 }
1120
1121 /*
1122 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1123 * "Configuration structures and configuration invalidation completion"
1124 *
1125 * The size of single-copy atomic reads made by the SMMU is
1126 * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1127 * field within an aligned 64-bit span of a structure can be altered
1128 * without first making the structure invalid.
1129 */
1130 WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1131 arm_smmu_sync_cd(smmu_domain, ssid, true);
1132 return 0;
1133 }
1134
1135 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1136 {
1137 int ret;
1138 size_t l1size;
1139 size_t max_contexts;
1140 struct arm_smmu_device *smmu = smmu_domain->smmu;
1141 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1142 struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1143
1144 max_contexts = 1 << cfg->s1cdmax;
1145
1146 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1147 max_contexts <= CTXDESC_L2_ENTRIES) {
1148 cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1149 cdcfg->num_l1_ents = max_contexts;
1150
1151 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1152 } else {
1153 cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1154 cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1155 CTXDESC_L2_ENTRIES);
1156
1157 cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1158 sizeof(*cdcfg->l1_desc),
1159 GFP_KERNEL);
1160 if (!cdcfg->l1_desc)
1161 return -ENOMEM;
1162
1163 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1164 }
1165
1166 cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1167 GFP_KERNEL);
1168 if (!cdcfg->cdtab) {
1169 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1170 ret = -ENOMEM;
1171 goto err_free_l1;
1172 }
1173
1174 return 0;
1175
1176 err_free_l1:
1177 if (cdcfg->l1_desc) {
1178 devm_kfree(smmu->dev, cdcfg->l1_desc);
1179 cdcfg->l1_desc = NULL;
1180 }
1181 return ret;
1182 }
1183
1184 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1185 {
1186 int i;
1187 size_t size, l1size;
1188 struct arm_smmu_device *smmu = smmu_domain->smmu;
1189 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1190
1191 if (cdcfg->l1_desc) {
1192 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1193
1194 for (i = 0; i < cdcfg->num_l1_ents; i++) {
1195 if (!cdcfg->l1_desc[i].l2ptr)
1196 continue;
1197
1198 dmam_free_coherent(smmu->dev, size,
1199 cdcfg->l1_desc[i].l2ptr,
1200 cdcfg->l1_desc[i].l2ptr_dma);
1201 }
1202 devm_kfree(smmu->dev, cdcfg->l1_desc);
1203 cdcfg->l1_desc = NULL;
1204
1205 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1206 } else {
1207 l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1208 }
1209
1210 dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1211 cdcfg->cdtab_dma = 0;
1212 cdcfg->cdtab = NULL;
1213 }
1214
1215 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1216 {
1217 bool free;
1218 struct arm_smmu_ctx_desc *old_cd;
1219
1220 if (!cd->asid)
1221 return false;
1222
1223 free = refcount_dec_and_test(&cd->refs);
1224 if (free) {
1225 old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1226 WARN_ON(old_cd != cd);
1227 }
1228 return free;
1229 }
1230
1231 /* Stream table manipulation functions */
1232 static void
1233 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1234 {
1235 u64 val = 0;
1236
1237 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1238 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1239
1240 /* See comment in arm_smmu_write_ctx_desc() */
1241 WRITE_ONCE(*dst, cpu_to_le64(val));
1242 }
1243
1244 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1245 {
1246 struct arm_smmu_cmdq_ent cmd = {
1247 .opcode = CMDQ_OP_CFGI_STE,
1248 .cfgi = {
1249 .sid = sid,
1250 .leaf = true,
1251 },
1252 };
1253
1254 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1255 }
1256
1257 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1258 __le64 *dst)
1259 {
1260 /*
1261 * This is hideously complicated, but we only really care about
1262 * three cases at the moment:
1263 *
1264 * 1. Invalid (all zero) -> bypass/fault (init)
1265 * 2. Bypass/fault -> translation/bypass (attach)
1266 * 3. Translation/bypass -> bypass/fault (detach)
1267 *
1268 * Given that we can't update the STE atomically and the SMMU
1269 * doesn't read the thing in a defined order, that leaves us
1270 * with the following maintenance requirements:
1271 *
1272 * 1. Update Config, return (init time STEs aren't live)
1273 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1274 * 3. Update Config, sync
1275 */
1276 u64 val = le64_to_cpu(dst[0]);
1277 bool ste_live = false;
1278 struct arm_smmu_device *smmu = NULL;
1279 struct arm_smmu_s1_cfg *s1_cfg = NULL;
1280 struct arm_smmu_s2_cfg *s2_cfg = NULL;
1281 struct arm_smmu_domain *smmu_domain = NULL;
1282 struct arm_smmu_cmdq_ent prefetch_cmd = {
1283 .opcode = CMDQ_OP_PREFETCH_CFG,
1284 .prefetch = {
1285 .sid = sid,
1286 },
1287 };
1288
1289 if (master) {
1290 smmu_domain = master->domain;
1291 smmu = master->smmu;
1292 }
1293
1294 if (smmu_domain) {
1295 switch (smmu_domain->stage) {
1296 case ARM_SMMU_DOMAIN_S1:
1297 s1_cfg = &smmu_domain->s1_cfg;
1298 break;
1299 case ARM_SMMU_DOMAIN_S2:
1300 case ARM_SMMU_DOMAIN_NESTED:
1301 s2_cfg = &smmu_domain->s2_cfg;
1302 break;
1303 default:
1304 break;
1305 }
1306 }
1307
1308 if (val & STRTAB_STE_0_V) {
1309 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1310 case STRTAB_STE_0_CFG_BYPASS:
1311 break;
1312 case STRTAB_STE_0_CFG_S1_TRANS:
1313 case STRTAB_STE_0_CFG_S2_TRANS:
1314 ste_live = true;
1315 break;
1316 case STRTAB_STE_0_CFG_ABORT:
1317 BUG_ON(!disable_bypass);
1318 break;
1319 default:
1320 BUG(); /* STE corruption */
1321 }
1322 }
1323
1324 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1325 val = STRTAB_STE_0_V;
1326
1327 /* Bypass/fault */
1328 if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1329 if (!smmu_domain && disable_bypass)
1330 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1331 else
1332 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1333
1334 dst[0] = cpu_to_le64(val);
1335 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1336 STRTAB_STE_1_SHCFG_INCOMING));
1337 dst[2] = 0; /* Nuke the VMID */
1338 /*
1339 * The SMMU can perform negative caching, so we must sync
1340 * the STE regardless of whether the old value was live.
1341 */
1342 if (smmu)
1343 arm_smmu_sync_ste_for_sid(smmu, sid);
1344 return;
1345 }
1346
1347 if (s1_cfg) {
1348 u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1349 STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1350
1351 BUG_ON(ste_live);
1352 dst[1] = cpu_to_le64(
1353 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1354 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1355 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1356 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1357 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1358
1359 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1360 !master->stall_enabled)
1361 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1362
1363 val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1364 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1365 FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1366 FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1367 }
1368
1369 if (s2_cfg) {
1370 BUG_ON(ste_live);
1371 dst[2] = cpu_to_le64(
1372 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1373 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1374 #ifdef __BIG_ENDIAN
1375 STRTAB_STE_2_S2ENDI |
1376 #endif
1377 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1378 STRTAB_STE_2_S2R);
1379
1380 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1381
1382 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1383 }
1384
1385 if (master->ats_enabled)
1386 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1387 STRTAB_STE_1_EATS_TRANS));
1388
1389 arm_smmu_sync_ste_for_sid(smmu, sid);
1390 /* See comment in arm_smmu_write_ctx_desc() */
1391 WRITE_ONCE(dst[0], cpu_to_le64(val));
1392 arm_smmu_sync_ste_for_sid(smmu, sid);
1393
1394 /* It's likely that we'll want to use the new STE soon */
1395 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1396 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1397 }
1398
1399 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
1400 {
1401 unsigned int i;
1402 u64 val = STRTAB_STE_0_V;
1403
1404 if (disable_bypass && !force)
1405 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1406 else
1407 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1408
1409 for (i = 0; i < nent; ++i) {
1410 strtab[0] = cpu_to_le64(val);
1411 strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1412 STRTAB_STE_1_SHCFG_INCOMING));
1413 strtab[2] = 0;
1414 strtab += STRTAB_STE_DWORDS;
1415 }
1416 }
1417
1418 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1419 {
1420 size_t size;
1421 void *strtab;
1422 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1423 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1424
1425 if (desc->l2ptr)
1426 return 0;
1427
1428 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1429 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1430
1431 desc->span = STRTAB_SPLIT + 1;
1432 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1433 GFP_KERNEL);
1434 if (!desc->l2ptr) {
1435 dev_err(smmu->dev,
1436 "failed to allocate l2 stream table for SID %u\n",
1437 sid);
1438 return -ENOMEM;
1439 }
1440
1441 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
1442 arm_smmu_write_strtab_l1_desc(strtab, desc);
1443 return 0;
1444 }
1445
1446 static struct arm_smmu_master *
1447 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1448 {
1449 struct rb_node *node;
1450 struct arm_smmu_stream *stream;
1451
1452 lockdep_assert_held(&smmu->streams_mutex);
1453
1454 node = smmu->streams.rb_node;
1455 while (node) {
1456 stream = rb_entry(node, struct arm_smmu_stream, node);
1457 if (stream->id < sid)
1458 node = node->rb_right;
1459 else if (stream->id > sid)
1460 node = node->rb_left;
1461 else
1462 return stream->master;
1463 }
1464
1465 return NULL;
1466 }
1467
1468 /* IRQ and event handlers */
1469 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1470 {
1471 int ret;
1472 u32 reason;
1473 u32 perm = 0;
1474 struct arm_smmu_master *master;
1475 bool ssid_valid = evt[0] & EVTQ_0_SSV;
1476 u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1477 struct iommu_fault_event fault_evt = { };
1478 struct iommu_fault *flt = &fault_evt.fault;
1479
1480 switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1481 case EVT_ID_TRANSLATION_FAULT:
1482 reason = IOMMU_FAULT_REASON_PTE_FETCH;
1483 break;
1484 case EVT_ID_ADDR_SIZE_FAULT:
1485 reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1486 break;
1487 case EVT_ID_ACCESS_FAULT:
1488 reason = IOMMU_FAULT_REASON_ACCESS;
1489 break;
1490 case EVT_ID_PERMISSION_FAULT:
1491 reason = IOMMU_FAULT_REASON_PERMISSION;
1492 break;
1493 default:
1494 return -EOPNOTSUPP;
1495 }
1496
1497 /* Stage-2 is always pinned at the moment */
1498 if (evt[1] & EVTQ_1_S2)
1499 return -EFAULT;
1500
1501 if (evt[1] & EVTQ_1_RnW)
1502 perm |= IOMMU_FAULT_PERM_READ;
1503 else
1504 perm |= IOMMU_FAULT_PERM_WRITE;
1505
1506 if (evt[1] & EVTQ_1_InD)
1507 perm |= IOMMU_FAULT_PERM_EXEC;
1508
1509 if (evt[1] & EVTQ_1_PnU)
1510 perm |= IOMMU_FAULT_PERM_PRIV;
1511
1512 if (evt[1] & EVTQ_1_STALL) {
1513 flt->type = IOMMU_FAULT_PAGE_REQ;
1514 flt->prm = (struct iommu_fault_page_request) {
1515 .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1516 .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1517 .perm = perm,
1518 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1519 };
1520
1521 if (ssid_valid) {
1522 flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1523 flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1524 }
1525 } else {
1526 flt->type = IOMMU_FAULT_DMA_UNRECOV;
1527 flt->event = (struct iommu_fault_unrecoverable) {
1528 .reason = reason,
1529 .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1530 .perm = perm,
1531 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1532 };
1533
1534 if (ssid_valid) {
1535 flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1536 flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1537 }
1538 }
1539
1540 mutex_lock(&smmu->streams_mutex);
1541 master = arm_smmu_find_master(smmu, sid);
1542 if (!master) {
1543 ret = -EINVAL;
1544 goto out_unlock;
1545 }
1546
1547 ret = iommu_report_device_fault(master->dev, &fault_evt);
1548 if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1549 /* Nobody cared, abort the access */
1550 struct iommu_page_response resp = {
1551 .pasid = flt->prm.pasid,
1552 .grpid = flt->prm.grpid,
1553 .code = IOMMU_PAGE_RESP_FAILURE,
1554 };
1555 arm_smmu_page_response(master->dev, &fault_evt, &resp);
1556 }
1557
1558 out_unlock:
1559 mutex_unlock(&smmu->streams_mutex);
1560 return ret;
1561 }
1562
1563 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1564 {
1565 int i, ret;
1566 struct arm_smmu_device *smmu = dev;
1567 struct arm_smmu_queue *q = &smmu->evtq.q;
1568 struct arm_smmu_ll_queue *llq = &q->llq;
1569 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1570 DEFAULT_RATELIMIT_BURST);
1571 u64 evt[EVTQ_ENT_DWORDS];
1572
1573 do {
1574 while (!queue_remove_raw(q, evt)) {
1575 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1576
1577 ret = arm_smmu_handle_evt(smmu, evt);
1578 if (!ret || !__ratelimit(&rs))
1579 continue;
1580
1581 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1582 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1583 dev_info(smmu->dev, "\t0x%016llx\n",
1584 (unsigned long long)evt[i]);
1585
1586 cond_resched();
1587 }
1588
1589 /*
1590 * Not much we can do on overflow, so scream and pretend we're
1591 * trying harder.
1592 */
1593 if (queue_sync_prod_in(q) == -EOVERFLOW)
1594 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1595 } while (!queue_empty(llq));
1596
1597 /* Sync our overflow flag, as we believe we're up to speed */
1598 queue_sync_cons_ovf(q);
1599 return IRQ_HANDLED;
1600 }
1601
1602 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1603 {
1604 u32 sid, ssid;
1605 u16 grpid;
1606 bool ssv, last;
1607
1608 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1609 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1610 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1611 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1612 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1613
1614 dev_info(smmu->dev, "unexpected PRI request received:\n");
1615 dev_info(smmu->dev,
1616 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1617 sid, ssid, grpid, last ? "L" : "",
1618 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1619 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1620 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1621 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1622 evt[1] & PRIQ_1_ADDR_MASK);
1623
1624 if (last) {
1625 struct arm_smmu_cmdq_ent cmd = {
1626 .opcode = CMDQ_OP_PRI_RESP,
1627 .substream_valid = ssv,
1628 .pri = {
1629 .sid = sid,
1630 .ssid = ssid,
1631 .grpid = grpid,
1632 .resp = PRI_RESP_DENY,
1633 },
1634 };
1635
1636 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1637 }
1638 }
1639
1640 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1641 {
1642 struct arm_smmu_device *smmu = dev;
1643 struct arm_smmu_queue *q = &smmu->priq.q;
1644 struct arm_smmu_ll_queue *llq = &q->llq;
1645 u64 evt[PRIQ_ENT_DWORDS];
1646
1647 do {
1648 while (!queue_remove_raw(q, evt))
1649 arm_smmu_handle_ppr(smmu, evt);
1650
1651 if (queue_sync_prod_in(q) == -EOVERFLOW)
1652 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1653 } while (!queue_empty(llq));
1654
1655 /* Sync our overflow flag, as we believe we're up to speed */
1656 queue_sync_cons_ovf(q);
1657 return IRQ_HANDLED;
1658 }
1659
1660 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1661
1662 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1663 {
1664 u32 gerror, gerrorn, active;
1665 struct arm_smmu_device *smmu = dev;
1666
1667 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1668 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1669
1670 active = gerror ^ gerrorn;
1671 if (!(active & GERROR_ERR_MASK))
1672 return IRQ_NONE; /* No errors pending */
1673
1674 dev_warn(smmu->dev,
1675 "unexpected global error reported (0x%08x), this could be serious\n",
1676 active);
1677
1678 if (active & GERROR_SFM_ERR) {
1679 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1680 arm_smmu_device_disable(smmu);
1681 }
1682
1683 if (active & GERROR_MSI_GERROR_ABT_ERR)
1684 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1685
1686 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1687 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1688
1689 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1690 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1691
1692 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1693 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1694
1695 if (active & GERROR_PRIQ_ABT_ERR)
1696 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1697
1698 if (active & GERROR_EVTQ_ABT_ERR)
1699 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1700
1701 if (active & GERROR_CMDQ_ERR)
1702 arm_smmu_cmdq_skip_err(smmu);
1703
1704 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1705 return IRQ_HANDLED;
1706 }
1707
1708 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1709 {
1710 struct arm_smmu_device *smmu = dev;
1711
1712 arm_smmu_evtq_thread(irq, dev);
1713 if (smmu->features & ARM_SMMU_FEAT_PRI)
1714 arm_smmu_priq_thread(irq, dev);
1715
1716 return IRQ_HANDLED;
1717 }
1718
1719 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1720 {
1721 arm_smmu_gerror_handler(irq, dev);
1722 return IRQ_WAKE_THREAD;
1723 }
1724
1725 static void
1726 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1727 struct arm_smmu_cmdq_ent *cmd)
1728 {
1729 size_t log2_span;
1730 size_t span_mask;
1731 /* ATC invalidates are always on 4096-bytes pages */
1732 size_t inval_grain_shift = 12;
1733 unsigned long page_start, page_end;
1734
1735 /*
1736 * ATS and PASID:
1737 *
1738 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1739 * prefix. In that case all ATC entries within the address range are
1740 * invalidated, including those that were requested with a PASID! There
1741 * is no way to invalidate only entries without PASID.
1742 *
1743 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1744 * traffic), translation requests without PASID create ATC entries
1745 * without PASID, which must be invalidated with substream_valid clear.
1746 * This has the unpleasant side-effect of invalidating all PASID-tagged
1747 * ATC entries within the address range.
1748 */
1749 *cmd = (struct arm_smmu_cmdq_ent) {
1750 .opcode = CMDQ_OP_ATC_INV,
1751 .substream_valid = (ssid != IOMMU_NO_PASID),
1752 .atc.ssid = ssid,
1753 };
1754
1755 if (!size) {
1756 cmd->atc.size = ATC_INV_SIZE_ALL;
1757 return;
1758 }
1759
1760 page_start = iova >> inval_grain_shift;
1761 page_end = (iova + size - 1) >> inval_grain_shift;
1762
1763 /*
1764 * In an ATS Invalidate Request, the address must be aligned on the
1765 * range size, which must be a power of two number of page sizes. We
1766 * thus have to choose between grossly over-invalidating the region, or
1767 * splitting the invalidation into multiple commands. For simplicity
1768 * we'll go with the first solution, but should refine it in the future
1769 * if multiple commands are shown to be more efficient.
1770 *
1771 * Find the smallest power of two that covers the range. The most
1772 * significant differing bit between the start and end addresses,
1773 * fls(start ^ end), indicates the required span. For example:
1774 *
1775 * We want to invalidate pages [8; 11]. This is already the ideal range:
1776 * x = 0b1000 ^ 0b1011 = 0b11
1777 * span = 1 << fls(x) = 4
1778 *
1779 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1780 * x = 0b0111 ^ 0b1010 = 0b1101
1781 * span = 1 << fls(x) = 16
1782 */
1783 log2_span = fls_long(page_start ^ page_end);
1784 span_mask = (1ULL << log2_span) - 1;
1785
1786 page_start &= ~span_mask;
1787
1788 cmd->atc.addr = page_start << inval_grain_shift;
1789 cmd->atc.size = log2_span;
1790 }
1791
1792 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1793 {
1794 int i;
1795 struct arm_smmu_cmdq_ent cmd;
1796 struct arm_smmu_cmdq_batch cmds;
1797
1798 arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
1799
1800 cmds.num = 0;
1801 for (i = 0; i < master->num_streams; i++) {
1802 cmd.atc.sid = master->streams[i].id;
1803 arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1804 }
1805
1806 return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1807 }
1808
1809 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1810 unsigned long iova, size_t size)
1811 {
1812 int i;
1813 unsigned long flags;
1814 struct arm_smmu_cmdq_ent cmd;
1815 struct arm_smmu_master *master;
1816 struct arm_smmu_cmdq_batch cmds;
1817
1818 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1819 return 0;
1820
1821 /*
1822 * Ensure that we've completed prior invalidation of the main TLBs
1823 * before we read 'nr_ats_masters' in case of a concurrent call to
1824 * arm_smmu_enable_ats():
1825 *
1826 * // unmap() // arm_smmu_enable_ats()
1827 * TLBI+SYNC atomic_inc(&nr_ats_masters);
1828 * smp_mb(); [...]
1829 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
1830 *
1831 * Ensures that we always see the incremented 'nr_ats_masters' count if
1832 * ATS was enabled at the PCI device before completion of the TLBI.
1833 */
1834 smp_mb();
1835 if (!atomic_read(&smmu_domain->nr_ats_masters))
1836 return 0;
1837
1838 arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1839
1840 cmds.num = 0;
1841
1842 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1843 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1844 if (!master->ats_enabled)
1845 continue;
1846
1847 for (i = 0; i < master->num_streams; i++) {
1848 cmd.atc.sid = master->streams[i].id;
1849 arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1850 }
1851 }
1852 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1853
1854 return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1855 }
1856
1857 /* IO_PGTABLE API */
1858 static void arm_smmu_tlb_inv_context(void *cookie)
1859 {
1860 struct arm_smmu_domain *smmu_domain = cookie;
1861 struct arm_smmu_device *smmu = smmu_domain->smmu;
1862 struct arm_smmu_cmdq_ent cmd;
1863
1864 /*
1865 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1866 * PTEs previously cleared by unmaps on the current CPU not yet visible
1867 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1868 * insertion to guarantee those are observed before the TLBI. Do be
1869 * careful, 007.
1870 */
1871 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1872 arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1873 } else {
1874 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1875 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1876 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1877 }
1878 arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
1879 }
1880
1881 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1882 unsigned long iova, size_t size,
1883 size_t granule,
1884 struct arm_smmu_domain *smmu_domain)
1885 {
1886 struct arm_smmu_device *smmu = smmu_domain->smmu;
1887 unsigned long end = iova + size, num_pages = 0, tg = 0;
1888 size_t inv_range = granule;
1889 struct arm_smmu_cmdq_batch cmds;
1890
1891 if (!size)
1892 return;
1893
1894 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1895 /* Get the leaf page size */
1896 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1897
1898 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1899 cmd->tlbi.tg = (tg - 10) / 2;
1900
1901 /*
1902 * Determine what level the granule is at. For non-leaf, io-pgtable
1903 * assumes .tlb_flush_walk can invalidate multiple levels at once,
1904 * so ignore the nominal last-level granule and leave TTL=0.
1905 */
1906 if (cmd->tlbi.leaf)
1907 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1908
1909 num_pages = size >> tg;
1910 }
1911
1912 cmds.num = 0;
1913
1914 while (iova < end) {
1915 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1916 /*
1917 * On each iteration of the loop, the range is 5 bits
1918 * worth of the aligned size remaining.
1919 * The range in pages is:
1920 *
1921 * range = (num_pages & (0x1f << __ffs(num_pages)))
1922 */
1923 unsigned long scale, num;
1924
1925 /* Determine the power of 2 multiple number of pages */
1926 scale = __ffs(num_pages);
1927 cmd->tlbi.scale = scale;
1928
1929 /* Determine how many chunks of 2^scale size we have */
1930 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1931 cmd->tlbi.num = num - 1;
1932
1933 /* range is num * 2^scale * pgsize */
1934 inv_range = num << (scale + tg);
1935
1936 /* Clear out the lower order bits for the next iteration */
1937 num_pages -= num << scale;
1938 }
1939
1940 cmd->tlbi.addr = iova;
1941 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1942 iova += inv_range;
1943 }
1944 arm_smmu_cmdq_batch_submit(smmu, &cmds);
1945 }
1946
1947 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1948 size_t granule, bool leaf,
1949 struct arm_smmu_domain *smmu_domain)
1950 {
1951 struct arm_smmu_cmdq_ent cmd = {
1952 .tlbi = {
1953 .leaf = leaf,
1954 },
1955 };
1956
1957 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1958 cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1959 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1960 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1961 } else {
1962 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1963 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1964 }
1965 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1966
1967 /*
1968 * Unfortunately, this can't be leaf-only since we may have
1969 * zapped an entire table.
1970 */
1971 arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size);
1972 }
1973
1974 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1975 size_t granule, bool leaf,
1976 struct arm_smmu_domain *smmu_domain)
1977 {
1978 struct arm_smmu_cmdq_ent cmd = {
1979 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1980 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1981 .tlbi = {
1982 .asid = asid,
1983 .leaf = leaf,
1984 },
1985 };
1986
1987 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1988 }
1989
1990 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1991 unsigned long iova, size_t granule,
1992 void *cookie)
1993 {
1994 struct arm_smmu_domain *smmu_domain = cookie;
1995 struct iommu_domain *domain = &smmu_domain->domain;
1996
1997 iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1998 }
1999
2000 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2001 size_t granule, void *cookie)
2002 {
2003 arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2004 }
2005
2006 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2007 .tlb_flush_all = arm_smmu_tlb_inv_context,
2008 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
2009 .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
2010 };
2011
2012 /* IOMMU API */
2013 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2014 {
2015 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2016
2017 switch (cap) {
2018 case IOMMU_CAP_CACHE_COHERENCY:
2019 /* Assume that a coherent TCU implies coherent TBUs */
2020 return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2021 case IOMMU_CAP_NOEXEC:
2022 case IOMMU_CAP_DEFERRED_FLUSH:
2023 return true;
2024 default:
2025 return false;
2026 }
2027 }
2028
2029 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2030 {
2031 struct arm_smmu_domain *smmu_domain;
2032
2033 if (type == IOMMU_DOMAIN_SVA)
2034 return arm_smmu_sva_domain_alloc();
2035
2036 if (type != IOMMU_DOMAIN_UNMANAGED &&
2037 type != IOMMU_DOMAIN_DMA &&
2038 type != IOMMU_DOMAIN_IDENTITY)
2039 return NULL;
2040
2041 /*
2042 * Allocate the domain and initialise some of its data structures.
2043 * We can't really do anything meaningful until we've added a
2044 * master.
2045 */
2046 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2047 if (!smmu_domain)
2048 return NULL;
2049
2050 mutex_init(&smmu_domain->init_mutex);
2051 INIT_LIST_HEAD(&smmu_domain->devices);
2052 spin_lock_init(&smmu_domain->devices_lock);
2053 INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2054
2055 return &smmu_domain->domain;
2056 }
2057
2058 static void arm_smmu_domain_free(struct iommu_domain *domain)
2059 {
2060 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2061 struct arm_smmu_device *smmu = smmu_domain->smmu;
2062
2063 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2064
2065 /* Free the CD and ASID, if we allocated them */
2066 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2067 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2068
2069 /* Prevent SVA from touching the CD while we're freeing it */
2070 mutex_lock(&arm_smmu_asid_lock);
2071 if (cfg->cdcfg.cdtab)
2072 arm_smmu_free_cd_tables(smmu_domain);
2073 arm_smmu_free_asid(&cfg->cd);
2074 mutex_unlock(&arm_smmu_asid_lock);
2075 } else {
2076 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2077 if (cfg->vmid)
2078 ida_free(&smmu->vmid_map, cfg->vmid);
2079 }
2080
2081 kfree(smmu_domain);
2082 }
2083
2084 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2085 struct arm_smmu_master *master,
2086 struct io_pgtable_cfg *pgtbl_cfg)
2087 {
2088 int ret;
2089 u32 asid;
2090 struct arm_smmu_device *smmu = smmu_domain->smmu;
2091 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2092 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2093
2094 refcount_set(&cfg->cd.refs, 1);
2095
2096 /* Prevent SVA from modifying the ASID until it is written to the CD */
2097 mutex_lock(&arm_smmu_asid_lock);
2098 ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2099 XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2100 if (ret)
2101 goto out_unlock;
2102
2103 cfg->s1cdmax = master->ssid_bits;
2104
2105 smmu_domain->stall_enabled = master->stall_enabled;
2106
2107 ret = arm_smmu_alloc_cd_tables(smmu_domain);
2108 if (ret)
2109 goto out_free_asid;
2110
2111 cfg->cd.asid = (u16)asid;
2112 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2113 cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2114 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2115 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2116 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2117 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2118 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2119 CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2120 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2121
2122 /*
2123 * Note that this will end up calling arm_smmu_sync_cd() before
2124 * the master has been added to the devices list for this domain.
2125 * This isn't an issue because the STE hasn't been installed yet.
2126 */
2127 ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd);
2128 if (ret)
2129 goto out_free_cd_tables;
2130
2131 mutex_unlock(&arm_smmu_asid_lock);
2132 return 0;
2133
2134 out_free_cd_tables:
2135 arm_smmu_free_cd_tables(smmu_domain);
2136 out_free_asid:
2137 arm_smmu_free_asid(&cfg->cd);
2138 out_unlock:
2139 mutex_unlock(&arm_smmu_asid_lock);
2140 return ret;
2141 }
2142
2143 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2144 struct arm_smmu_master *master,
2145 struct io_pgtable_cfg *pgtbl_cfg)
2146 {
2147 int vmid;
2148 struct arm_smmu_device *smmu = smmu_domain->smmu;
2149 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2150 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2151
2152 /* Reserve VMID 0 for stage-2 bypass STEs */
2153 vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2154 GFP_KERNEL);
2155 if (vmid < 0)
2156 return vmid;
2157
2158 vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2159 cfg->vmid = (u16)vmid;
2160 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2161 cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2162 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2163 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2164 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2165 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2166 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2167 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2168 return 0;
2169 }
2170
2171 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2172 struct arm_smmu_master *master)
2173 {
2174 int ret;
2175 unsigned long ias, oas;
2176 enum io_pgtable_fmt fmt;
2177 struct io_pgtable_cfg pgtbl_cfg;
2178 struct io_pgtable_ops *pgtbl_ops;
2179 int (*finalise_stage_fn)(struct arm_smmu_domain *,
2180 struct arm_smmu_master *,
2181 struct io_pgtable_cfg *);
2182 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2183 struct arm_smmu_device *smmu = smmu_domain->smmu;
2184
2185 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2186 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2187 return 0;
2188 }
2189
2190 /* Restrict the stage to what we can actually support */
2191 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2192 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2193 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2194 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2195
2196 switch (smmu_domain->stage) {
2197 case ARM_SMMU_DOMAIN_S1:
2198 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2199 ias = min_t(unsigned long, ias, VA_BITS);
2200 oas = smmu->ias;
2201 fmt = ARM_64_LPAE_S1;
2202 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2203 break;
2204 case ARM_SMMU_DOMAIN_NESTED:
2205 case ARM_SMMU_DOMAIN_S2:
2206 ias = smmu->ias;
2207 oas = smmu->oas;
2208 fmt = ARM_64_LPAE_S2;
2209 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2210 break;
2211 default:
2212 return -EINVAL;
2213 }
2214
2215 pgtbl_cfg = (struct io_pgtable_cfg) {
2216 .pgsize_bitmap = smmu->pgsize_bitmap,
2217 .ias = ias,
2218 .oas = oas,
2219 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2220 .tlb = &arm_smmu_flush_ops,
2221 .iommu_dev = smmu->dev,
2222 };
2223
2224 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2225 if (!pgtbl_ops)
2226 return -ENOMEM;
2227
2228 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2229 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2230 domain->geometry.force_aperture = true;
2231
2232 ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2233 if (ret < 0) {
2234 free_io_pgtable_ops(pgtbl_ops);
2235 return ret;
2236 }
2237
2238 smmu_domain->pgtbl_ops = pgtbl_ops;
2239 return 0;
2240 }
2241
2242 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2243 {
2244 __le64 *step;
2245 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2246
2247 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2248 struct arm_smmu_strtab_l1_desc *l1_desc;
2249 int idx;
2250
2251 /* Two-level walk */
2252 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2253 l1_desc = &cfg->l1_desc[idx];
2254 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2255 step = &l1_desc->l2ptr[idx];
2256 } else {
2257 /* Simple linear lookup */
2258 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2259 }
2260
2261 return step;
2262 }
2263
2264 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2265 {
2266 int i, j;
2267 struct arm_smmu_device *smmu = master->smmu;
2268
2269 for (i = 0; i < master->num_streams; ++i) {
2270 u32 sid = master->streams[i].id;
2271 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2272
2273 /* Bridged PCI devices may end up with duplicated IDs */
2274 for (j = 0; j < i; j++)
2275 if (master->streams[j].id == sid)
2276 break;
2277 if (j < i)
2278 continue;
2279
2280 arm_smmu_write_strtab_ent(master, sid, step);
2281 }
2282 }
2283
2284 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2285 {
2286 struct device *dev = master->dev;
2287 struct arm_smmu_device *smmu = master->smmu;
2288 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2289
2290 if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2291 return false;
2292
2293 if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2294 return false;
2295
2296 return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2297 }
2298
2299 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2300 {
2301 size_t stu;
2302 struct pci_dev *pdev;
2303 struct arm_smmu_device *smmu = master->smmu;
2304 struct arm_smmu_domain *smmu_domain = master->domain;
2305
2306 /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2307 if (!master->ats_enabled)
2308 return;
2309
2310 /* Smallest Translation Unit: log2 of the smallest supported granule */
2311 stu = __ffs(smmu->pgsize_bitmap);
2312 pdev = to_pci_dev(master->dev);
2313
2314 atomic_inc(&smmu_domain->nr_ats_masters);
2315 arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2316 if (pci_enable_ats(pdev, stu))
2317 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2318 }
2319
2320 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2321 {
2322 struct arm_smmu_domain *smmu_domain = master->domain;
2323
2324 if (!master->ats_enabled)
2325 return;
2326
2327 pci_disable_ats(to_pci_dev(master->dev));
2328 /*
2329 * Ensure ATS is disabled at the endpoint before we issue the
2330 * ATC invalidation via the SMMU.
2331 */
2332 wmb();
2333 arm_smmu_atc_inv_master(master);
2334 atomic_dec(&smmu_domain->nr_ats_masters);
2335 }
2336
2337 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2338 {
2339 int ret;
2340 int features;
2341 int num_pasids;
2342 struct pci_dev *pdev;
2343
2344 if (!dev_is_pci(master->dev))
2345 return -ENODEV;
2346
2347 pdev = to_pci_dev(master->dev);
2348
2349 features = pci_pasid_features(pdev);
2350 if (features < 0)
2351 return features;
2352
2353 num_pasids = pci_max_pasids(pdev);
2354 if (num_pasids <= 0)
2355 return num_pasids;
2356
2357 ret = pci_enable_pasid(pdev, features);
2358 if (ret) {
2359 dev_err(&pdev->dev, "Failed to enable PASID\n");
2360 return ret;
2361 }
2362
2363 master->ssid_bits = min_t(u8, ilog2(num_pasids),
2364 master->smmu->ssid_bits);
2365 return 0;
2366 }
2367
2368 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2369 {
2370 struct pci_dev *pdev;
2371
2372 if (!dev_is_pci(master->dev))
2373 return;
2374
2375 pdev = to_pci_dev(master->dev);
2376
2377 if (!pdev->pasid_enabled)
2378 return;
2379
2380 master->ssid_bits = 0;
2381 pci_disable_pasid(pdev);
2382 }
2383
2384 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2385 {
2386 unsigned long flags;
2387 struct arm_smmu_domain *smmu_domain = master->domain;
2388
2389 if (!smmu_domain)
2390 return;
2391
2392 arm_smmu_disable_ats(master);
2393
2394 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2395 list_del(&master->domain_head);
2396 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2397
2398 master->domain = NULL;
2399 master->ats_enabled = false;
2400 arm_smmu_install_ste_for_dev(master);
2401 }
2402
2403 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2404 {
2405 int ret = 0;
2406 unsigned long flags;
2407 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2408 struct arm_smmu_device *smmu;
2409 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2410 struct arm_smmu_master *master;
2411
2412 if (!fwspec)
2413 return -ENOENT;
2414
2415 master = dev_iommu_priv_get(dev);
2416 smmu = master->smmu;
2417
2418 /*
2419 * Checking that SVA is disabled ensures that this device isn't bound to
2420 * any mm, and can be safely detached from its old domain. Bonds cannot
2421 * be removed concurrently since we're holding the group mutex.
2422 */
2423 if (arm_smmu_master_sva_enabled(master)) {
2424 dev_err(dev, "cannot attach - SVA enabled\n");
2425 return -EBUSY;
2426 }
2427
2428 arm_smmu_detach_dev(master);
2429
2430 mutex_lock(&smmu_domain->init_mutex);
2431
2432 if (!smmu_domain->smmu) {
2433 smmu_domain->smmu = smmu;
2434 ret = arm_smmu_domain_finalise(domain, master);
2435 if (ret) {
2436 smmu_domain->smmu = NULL;
2437 goto out_unlock;
2438 }
2439 } else if (smmu_domain->smmu != smmu) {
2440 ret = -EINVAL;
2441 goto out_unlock;
2442 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2443 master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2444 ret = -EINVAL;
2445 goto out_unlock;
2446 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2447 smmu_domain->stall_enabled != master->stall_enabled) {
2448 ret = -EINVAL;
2449 goto out_unlock;
2450 }
2451
2452 master->domain = smmu_domain;
2453
2454 /*
2455 * The SMMU does not support enabling ATS with bypass. When the STE is
2456 * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and
2457 * Translated transactions are denied as though ATS is disabled for the
2458 * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
2459 * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
2460 */
2461 if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2462 master->ats_enabled = arm_smmu_ats_supported(master);
2463
2464 arm_smmu_install_ste_for_dev(master);
2465
2466 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2467 list_add(&master->domain_head, &smmu_domain->devices);
2468 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2469
2470 arm_smmu_enable_ats(master);
2471
2472 out_unlock:
2473 mutex_unlock(&smmu_domain->init_mutex);
2474 return ret;
2475 }
2476
2477 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2478 phys_addr_t paddr, size_t pgsize, size_t pgcount,
2479 int prot, gfp_t gfp, size_t *mapped)
2480 {
2481 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2482
2483 if (!ops)
2484 return -ENODEV;
2485
2486 return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2487 }
2488
2489 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2490 size_t pgsize, size_t pgcount,
2491 struct iommu_iotlb_gather *gather)
2492 {
2493 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2494 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2495
2496 if (!ops)
2497 return 0;
2498
2499 return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2500 }
2501
2502 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2503 {
2504 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2505
2506 if (smmu_domain->smmu)
2507 arm_smmu_tlb_inv_context(smmu_domain);
2508 }
2509
2510 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2511 struct iommu_iotlb_gather *gather)
2512 {
2513 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2514
2515 if (!gather->pgsize)
2516 return;
2517
2518 arm_smmu_tlb_inv_range_domain(gather->start,
2519 gather->end - gather->start + 1,
2520 gather->pgsize, true, smmu_domain);
2521 }
2522
2523 static phys_addr_t
2524 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2525 {
2526 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2527
2528 if (!ops)
2529 return 0;
2530
2531 return ops->iova_to_phys(ops, iova);
2532 }
2533
2534 static struct platform_driver arm_smmu_driver;
2535
2536 static
2537 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2538 {
2539 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2540 fwnode);
2541 put_device(dev);
2542 return dev ? dev_get_drvdata(dev) : NULL;
2543 }
2544
2545 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2546 {
2547 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2548
2549 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2550 limit *= 1UL << STRTAB_SPLIT;
2551
2552 return sid < limit;
2553 }
2554
2555 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2556 {
2557 /* Check the SIDs are in range of the SMMU and our stream table */
2558 if (!arm_smmu_sid_in_range(smmu, sid))
2559 return -ERANGE;
2560
2561 /* Ensure l2 strtab is initialised */
2562 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2563 return arm_smmu_init_l2_strtab(smmu, sid);
2564
2565 return 0;
2566 }
2567
2568 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2569 struct arm_smmu_master *master)
2570 {
2571 int i;
2572 int ret = 0;
2573 struct arm_smmu_stream *new_stream, *cur_stream;
2574 struct rb_node **new_node, *parent_node = NULL;
2575 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2576
2577 master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2578 GFP_KERNEL);
2579 if (!master->streams)
2580 return -ENOMEM;
2581 master->num_streams = fwspec->num_ids;
2582
2583 mutex_lock(&smmu->streams_mutex);
2584 for (i = 0; i < fwspec->num_ids; i++) {
2585 u32 sid = fwspec->ids[i];
2586
2587 new_stream = &master->streams[i];
2588 new_stream->id = sid;
2589 new_stream->master = master;
2590
2591 ret = arm_smmu_init_sid_strtab(smmu, sid);
2592 if (ret)
2593 break;
2594
2595 /* Insert into SID tree */
2596 new_node = &(smmu->streams.rb_node);
2597 while (*new_node) {
2598 cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2599 node);
2600 parent_node = *new_node;
2601 if (cur_stream->id > new_stream->id) {
2602 new_node = &((*new_node)->rb_left);
2603 } else if (cur_stream->id < new_stream->id) {
2604 new_node = &((*new_node)->rb_right);
2605 } else {
2606 dev_warn(master->dev,
2607 "stream %u already in tree\n",
2608 cur_stream->id);
2609 ret = -EINVAL;
2610 break;
2611 }
2612 }
2613 if (ret)
2614 break;
2615
2616 rb_link_node(&new_stream->node, parent_node, new_node);
2617 rb_insert_color(&new_stream->node, &smmu->streams);
2618 }
2619
2620 if (ret) {
2621 for (i--; i >= 0; i--)
2622 rb_erase(&master->streams[i].node, &smmu->streams);
2623 kfree(master->streams);
2624 }
2625 mutex_unlock(&smmu->streams_mutex);
2626
2627 return ret;
2628 }
2629
2630 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2631 {
2632 int i;
2633 struct arm_smmu_device *smmu = master->smmu;
2634 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2635
2636 if (!smmu || !master->streams)
2637 return;
2638
2639 mutex_lock(&smmu->streams_mutex);
2640 for (i = 0; i < fwspec->num_ids; i++)
2641 rb_erase(&master->streams[i].node, &smmu->streams);
2642 mutex_unlock(&smmu->streams_mutex);
2643
2644 kfree(master->streams);
2645 }
2646
2647 static struct iommu_ops arm_smmu_ops;
2648
2649 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2650 {
2651 int ret;
2652 struct arm_smmu_device *smmu;
2653 struct arm_smmu_master *master;
2654 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2655
2656 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2657 return ERR_PTR(-ENODEV);
2658
2659 if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2660 return ERR_PTR(-EBUSY);
2661
2662 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2663 if (!smmu)
2664 return ERR_PTR(-ENODEV);
2665
2666 master = kzalloc(sizeof(*master), GFP_KERNEL);
2667 if (!master)
2668 return ERR_PTR(-ENOMEM);
2669
2670 master->dev = dev;
2671 master->smmu = smmu;
2672 INIT_LIST_HEAD(&master->bonds);
2673 dev_iommu_priv_set(dev, master);
2674
2675 ret = arm_smmu_insert_master(smmu, master);
2676 if (ret)
2677 goto err_free_master;
2678
2679 device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2680 master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2681
2682 /*
2683 * Note that PASID must be enabled before, and disabled after ATS:
2684 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2685 *
2686 * Behavior is undefined if this bit is Set and the value of the PASID
2687 * Enable, Execute Requested Enable, or Privileged Mode Requested bits
2688 * are changed.
2689 */
2690 arm_smmu_enable_pasid(master);
2691
2692 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2693 master->ssid_bits = min_t(u8, master->ssid_bits,
2694 CTXDESC_LINEAR_CDMAX);
2695
2696 if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2697 device_property_read_bool(dev, "dma-can-stall")) ||
2698 smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2699 master->stall_enabled = true;
2700
2701 return &smmu->iommu;
2702
2703 err_free_master:
2704 kfree(master);
2705 dev_iommu_priv_set(dev, NULL);
2706 return ERR_PTR(ret);
2707 }
2708
2709 static void arm_smmu_release_device(struct device *dev)
2710 {
2711 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2712
2713 if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2714 iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2715 arm_smmu_detach_dev(master);
2716 arm_smmu_disable_pasid(master);
2717 arm_smmu_remove_master(master);
2718 kfree(master);
2719 }
2720
2721 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2722 {
2723 struct iommu_group *group;
2724
2725 /*
2726 * We don't support devices sharing stream IDs other than PCI RID
2727 * aliases, since the necessary ID-to-device lookup becomes rather
2728 * impractical given a potential sparse 32-bit stream ID space.
2729 */
2730 if (dev_is_pci(dev))
2731 group = pci_device_group(dev);
2732 else
2733 group = generic_device_group(dev);
2734
2735 return group;
2736 }
2737
2738 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2739 {
2740 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2741 int ret = 0;
2742
2743 mutex_lock(&smmu_domain->init_mutex);
2744 if (smmu_domain->smmu)
2745 ret = -EPERM;
2746 else
2747 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2748 mutex_unlock(&smmu_domain->init_mutex);
2749
2750 return ret;
2751 }
2752
2753 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2754 {
2755 return iommu_fwspec_add_ids(dev, args->args, 1);
2756 }
2757
2758 static void arm_smmu_get_resv_regions(struct device *dev,
2759 struct list_head *head)
2760 {
2761 struct iommu_resv_region *region;
2762 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2763
2764 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2765 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
2766 if (!region)
2767 return;
2768
2769 list_add_tail(&region->list, head);
2770
2771 iommu_dma_get_resv_regions(dev, head);
2772 }
2773
2774 static int arm_smmu_dev_enable_feature(struct device *dev,
2775 enum iommu_dev_features feat)
2776 {
2777 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2778
2779 if (!master)
2780 return -ENODEV;
2781
2782 switch (feat) {
2783 case IOMMU_DEV_FEAT_IOPF:
2784 if (!arm_smmu_master_iopf_supported(master))
2785 return -EINVAL;
2786 if (master->iopf_enabled)
2787 return -EBUSY;
2788 master->iopf_enabled = true;
2789 return 0;
2790 case IOMMU_DEV_FEAT_SVA:
2791 if (!arm_smmu_master_sva_supported(master))
2792 return -EINVAL;
2793 if (arm_smmu_master_sva_enabled(master))
2794 return -EBUSY;
2795 return arm_smmu_master_enable_sva(master);
2796 default:
2797 return -EINVAL;
2798 }
2799 }
2800
2801 static int arm_smmu_dev_disable_feature(struct device *dev,
2802 enum iommu_dev_features feat)
2803 {
2804 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2805
2806 if (!master)
2807 return -EINVAL;
2808
2809 switch (feat) {
2810 case IOMMU_DEV_FEAT_IOPF:
2811 if (!master->iopf_enabled)
2812 return -EINVAL;
2813 if (master->sva_enabled)
2814 return -EBUSY;
2815 master->iopf_enabled = false;
2816 return 0;
2817 case IOMMU_DEV_FEAT_SVA:
2818 if (!arm_smmu_master_sva_enabled(master))
2819 return -EINVAL;
2820 return arm_smmu_master_disable_sva(master);
2821 default:
2822 return -EINVAL;
2823 }
2824 }
2825
2826 /*
2827 * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
2828 * PCIe link and save the data to memory by DMA. The hardware is restricted to
2829 * use identity mapping only.
2830 */
2831 #define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
2832 (pdev)->device == 0xa12e)
2833
2834 static int arm_smmu_def_domain_type(struct device *dev)
2835 {
2836 if (dev_is_pci(dev)) {
2837 struct pci_dev *pdev = to_pci_dev(dev);
2838
2839 if (IS_HISI_PTT_DEVICE(pdev))
2840 return IOMMU_DOMAIN_IDENTITY;
2841 }
2842
2843 return 0;
2844 }
2845
2846 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
2847 {
2848 struct iommu_domain *domain;
2849
2850 domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
2851 if (WARN_ON(IS_ERR(domain)) || !domain)
2852 return;
2853
2854 arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
2855 }
2856
2857 static struct iommu_ops arm_smmu_ops = {
2858 .capable = arm_smmu_capable,
2859 .domain_alloc = arm_smmu_domain_alloc,
2860 .probe_device = arm_smmu_probe_device,
2861 .release_device = arm_smmu_release_device,
2862 .device_group = arm_smmu_device_group,
2863 .of_xlate = arm_smmu_of_xlate,
2864 .get_resv_regions = arm_smmu_get_resv_regions,
2865 .remove_dev_pasid = arm_smmu_remove_dev_pasid,
2866 .dev_enable_feat = arm_smmu_dev_enable_feature,
2867 .dev_disable_feat = arm_smmu_dev_disable_feature,
2868 .page_response = arm_smmu_page_response,
2869 .def_domain_type = arm_smmu_def_domain_type,
2870 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2871 .owner = THIS_MODULE,
2872 .default_domain_ops = &(const struct iommu_domain_ops) {
2873 .attach_dev = arm_smmu_attach_dev,
2874 .map_pages = arm_smmu_map_pages,
2875 .unmap_pages = arm_smmu_unmap_pages,
2876 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
2877 .iotlb_sync = arm_smmu_iotlb_sync,
2878 .iova_to_phys = arm_smmu_iova_to_phys,
2879 .enable_nesting = arm_smmu_enable_nesting,
2880 .free = arm_smmu_domain_free,
2881 }
2882 };
2883
2884 /* Probing and initialisation functions */
2885 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2886 struct arm_smmu_queue *q,
2887 void __iomem *page,
2888 unsigned long prod_off,
2889 unsigned long cons_off,
2890 size_t dwords, const char *name)
2891 {
2892 size_t qsz;
2893
2894 do {
2895 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2896 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2897 GFP_KERNEL);
2898 if (q->base || qsz < PAGE_SIZE)
2899 break;
2900
2901 q->llq.max_n_shift--;
2902 } while (1);
2903
2904 if (!q->base) {
2905 dev_err(smmu->dev,
2906 "failed to allocate queue (0x%zx bytes) for %s\n",
2907 qsz, name);
2908 return -ENOMEM;
2909 }
2910
2911 if (!WARN_ON(q->base_dma & (qsz - 1))) {
2912 dev_info(smmu->dev, "allocated %u entries for %s\n",
2913 1 << q->llq.max_n_shift, name);
2914 }
2915
2916 q->prod_reg = page + prod_off;
2917 q->cons_reg = page + cons_off;
2918 q->ent_dwords = dwords;
2919
2920 q->q_base = Q_BASE_RWA;
2921 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2922 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2923
2924 q->llq.prod = q->llq.cons = 0;
2925 return 0;
2926 }
2927
2928 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2929 {
2930 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2931 unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2932
2933 atomic_set(&cmdq->owner_prod, 0);
2934 atomic_set(&cmdq->lock, 0);
2935
2936 cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
2937 GFP_KERNEL);
2938 if (!cmdq->valid_map)
2939 return -ENOMEM;
2940
2941 return 0;
2942 }
2943
2944 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2945 {
2946 int ret;
2947
2948 /* cmdq */
2949 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2950 ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2951 CMDQ_ENT_DWORDS, "cmdq");
2952 if (ret)
2953 return ret;
2954
2955 ret = arm_smmu_cmdq_init(smmu);
2956 if (ret)
2957 return ret;
2958
2959 /* evtq */
2960 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2961 ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2962 EVTQ_ENT_DWORDS, "evtq");
2963 if (ret)
2964 return ret;
2965
2966 if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2967 (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2968 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2969 if (!smmu->evtq.iopf)
2970 return -ENOMEM;
2971 }
2972
2973 /* priq */
2974 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2975 return 0;
2976
2977 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2978 ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2979 PRIQ_ENT_DWORDS, "priq");
2980 }
2981
2982 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2983 {
2984 unsigned int i;
2985 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2986 void *strtab = smmu->strtab_cfg.strtab;
2987
2988 cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
2989 sizeof(*cfg->l1_desc), GFP_KERNEL);
2990 if (!cfg->l1_desc)
2991 return -ENOMEM;
2992
2993 for (i = 0; i < cfg->num_l1_ents; ++i) {
2994 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2995 strtab += STRTAB_L1_DESC_DWORDS << 3;
2996 }
2997
2998 return 0;
2999 }
3000
3001 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3002 {
3003 void *strtab;
3004 u64 reg;
3005 u32 size, l1size;
3006 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3007
3008 /* Calculate the L1 size, capped to the SIDSIZE. */
3009 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3010 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3011 cfg->num_l1_ents = 1 << size;
3012
3013 size += STRTAB_SPLIT;
3014 if (size < smmu->sid_bits)
3015 dev_warn(smmu->dev,
3016 "2-level strtab only covers %u/%u bits of SID\n",
3017 size, smmu->sid_bits);
3018
3019 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3020 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3021 GFP_KERNEL);
3022 if (!strtab) {
3023 dev_err(smmu->dev,
3024 "failed to allocate l1 stream table (%u bytes)\n",
3025 l1size);
3026 return -ENOMEM;
3027 }
3028 cfg->strtab = strtab;
3029
3030 /* Configure strtab_base_cfg for 2 levels */
3031 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3032 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3033 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3034 cfg->strtab_base_cfg = reg;
3035
3036 return arm_smmu_init_l1_strtab(smmu);
3037 }
3038
3039 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3040 {
3041 void *strtab;
3042 u64 reg;
3043 u32 size;
3044 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3045
3046 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3047 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3048 GFP_KERNEL);
3049 if (!strtab) {
3050 dev_err(smmu->dev,
3051 "failed to allocate linear stream table (%u bytes)\n",
3052 size);
3053 return -ENOMEM;
3054 }
3055 cfg->strtab = strtab;
3056 cfg->num_l1_ents = 1 << smmu->sid_bits;
3057
3058 /* Configure strtab_base_cfg for a linear table covering all SIDs */
3059 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3060 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3061 cfg->strtab_base_cfg = reg;
3062
3063 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
3064 return 0;
3065 }
3066
3067 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3068 {
3069 u64 reg;
3070 int ret;
3071
3072 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3073 ret = arm_smmu_init_strtab_2lvl(smmu);
3074 else
3075 ret = arm_smmu_init_strtab_linear(smmu);
3076
3077 if (ret)
3078 return ret;
3079
3080 /* Set the strtab base address */
3081 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3082 reg |= STRTAB_BASE_RA;
3083 smmu->strtab_cfg.strtab_base = reg;
3084
3085 ida_init(&smmu->vmid_map);
3086
3087 return 0;
3088 }
3089
3090 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3091 {
3092 int ret;
3093
3094 mutex_init(&smmu->streams_mutex);
3095 smmu->streams = RB_ROOT;
3096
3097 ret = arm_smmu_init_queues(smmu);
3098 if (ret)
3099 return ret;
3100
3101 return arm_smmu_init_strtab(smmu);
3102 }
3103
3104 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3105 unsigned int reg_off, unsigned int ack_off)
3106 {
3107 u32 reg;
3108
3109 writel_relaxed(val, smmu->base + reg_off);
3110 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3111 1, ARM_SMMU_POLL_TIMEOUT_US);
3112 }
3113
3114 /* GBPA is "special" */
3115 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3116 {
3117 int ret;
3118 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3119
3120 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3121 1, ARM_SMMU_POLL_TIMEOUT_US);
3122 if (ret)
3123 return ret;
3124
3125 reg &= ~clr;
3126 reg |= set;
3127 writel_relaxed(reg | GBPA_UPDATE, gbpa);
3128 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3129 1, ARM_SMMU_POLL_TIMEOUT_US);
3130
3131 if (ret)
3132 dev_err(smmu->dev, "GBPA not responding to update\n");
3133 return ret;
3134 }
3135
3136 static void arm_smmu_free_msis(void *data)
3137 {
3138 struct device *dev = data;
3139 platform_msi_domain_free_irqs(dev);
3140 }
3141
3142 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3143 {
3144 phys_addr_t doorbell;
3145 struct device *dev = msi_desc_to_dev(desc);
3146 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3147 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3148
3149 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3150 doorbell &= MSI_CFG0_ADDR_MASK;
3151
3152 writeq_relaxed(doorbell, smmu->base + cfg[0]);
3153 writel_relaxed(msg->data, smmu->base + cfg[1]);
3154 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3155 }
3156
3157 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3158 {
3159 int ret, nvec = ARM_SMMU_MAX_MSIS;
3160 struct device *dev = smmu->dev;
3161
3162 /* Clear the MSI address regs */
3163 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3164 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3165
3166 if (smmu->features & ARM_SMMU_FEAT_PRI)
3167 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3168 else
3169 nvec--;
3170
3171 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3172 return;
3173
3174 if (!dev->msi.domain) {
3175 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3176 return;
3177 }
3178
3179 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3180 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3181 if (ret) {
3182 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3183 return;
3184 }
3185
3186 smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3187 smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3188 smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3189
3190 /* Add callback to free MSIs on teardown */
3191 devm_add_action(dev, arm_smmu_free_msis, dev);
3192 }
3193
3194 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3195 {
3196 int irq, ret;
3197
3198 arm_smmu_setup_msis(smmu);
3199
3200 /* Request interrupt lines */
3201 irq = smmu->evtq.q.irq;
3202 if (irq) {
3203 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3204 arm_smmu_evtq_thread,
3205 IRQF_ONESHOT,
3206 "arm-smmu-v3-evtq", smmu);
3207 if (ret < 0)
3208 dev_warn(smmu->dev, "failed to enable evtq irq\n");
3209 } else {
3210 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3211 }
3212
3213 irq = smmu->gerr_irq;
3214 if (irq) {
3215 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3216 0, "arm-smmu-v3-gerror", smmu);
3217 if (ret < 0)
3218 dev_warn(smmu->dev, "failed to enable gerror irq\n");
3219 } else {
3220 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3221 }
3222
3223 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3224 irq = smmu->priq.q.irq;
3225 if (irq) {
3226 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3227 arm_smmu_priq_thread,
3228 IRQF_ONESHOT,
3229 "arm-smmu-v3-priq",
3230 smmu);
3231 if (ret < 0)
3232 dev_warn(smmu->dev,
3233 "failed to enable priq irq\n");
3234 } else {
3235 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3236 }
3237 }
3238 }
3239
3240 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3241 {
3242 int ret, irq;
3243 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3244
3245 /* Disable IRQs first */
3246 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3247 ARM_SMMU_IRQ_CTRLACK);
3248 if (ret) {
3249 dev_err(smmu->dev, "failed to disable irqs\n");
3250 return ret;
3251 }
3252
3253 irq = smmu->combined_irq;
3254 if (irq) {
3255 /*
3256 * Cavium ThunderX2 implementation doesn't support unique irq
3257 * lines. Use a single irq line for all the SMMUv3 interrupts.
3258 */
3259 ret = devm_request_threaded_irq(smmu->dev, irq,
3260 arm_smmu_combined_irq_handler,
3261 arm_smmu_combined_irq_thread,
3262 IRQF_ONESHOT,
3263 "arm-smmu-v3-combined-irq", smmu);
3264 if (ret < 0)
3265 dev_warn(smmu->dev, "failed to enable combined irq\n");
3266 } else
3267 arm_smmu_setup_unique_irqs(smmu);
3268
3269 if (smmu->features & ARM_SMMU_FEAT_PRI)
3270 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3271
3272 /* Enable interrupt generation on the SMMU */
3273 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3274 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3275 if (ret)
3276 dev_warn(smmu->dev, "failed to enable irqs\n");
3277
3278 return 0;
3279 }
3280
3281 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3282 {
3283 int ret;
3284
3285 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3286 if (ret)
3287 dev_err(smmu->dev, "failed to clear cr0\n");
3288
3289 return ret;
3290 }
3291
3292 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3293 {
3294 int ret;
3295 u32 reg, enables;
3296 struct arm_smmu_cmdq_ent cmd;
3297
3298 /* Clear CR0 and sync (disables SMMU and queue processing) */
3299 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3300 if (reg & CR0_SMMUEN) {
3301 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3302 WARN_ON(is_kdump_kernel() && !disable_bypass);
3303 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3304 }
3305
3306 ret = arm_smmu_device_disable(smmu);
3307 if (ret)
3308 return ret;
3309
3310 /* CR1 (table and queue memory attributes) */
3311 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3312 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3313 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3314 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3315 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3316 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3317 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3318
3319 /* CR2 (random crap) */
3320 reg = CR2_PTM | CR2_RECINVSID;
3321
3322 if (smmu->features & ARM_SMMU_FEAT_E2H)
3323 reg |= CR2_E2H;
3324
3325 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3326
3327 /* Stream table */
3328 writeq_relaxed(smmu->strtab_cfg.strtab_base,
3329 smmu->base + ARM_SMMU_STRTAB_BASE);
3330 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3331 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3332
3333 /* Command queue */
3334 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3335 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3336 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3337
3338 enables = CR0_CMDQEN;
3339 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3340 ARM_SMMU_CR0ACK);
3341 if (ret) {
3342 dev_err(smmu->dev, "failed to enable command queue\n");
3343 return ret;
3344 }
3345
3346 /* Invalidate any cached configuration */
3347 cmd.opcode = CMDQ_OP_CFGI_ALL;
3348 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3349
3350 /* Invalidate any stale TLB entries */
3351 if (smmu->features & ARM_SMMU_FEAT_HYP) {
3352 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3353 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3354 }
3355
3356 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3357 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3358
3359 /* Event queue */
3360 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3361 writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3362 writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3363
3364 enables |= CR0_EVTQEN;
3365 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3366 ARM_SMMU_CR0ACK);
3367 if (ret) {
3368 dev_err(smmu->dev, "failed to enable event queue\n");
3369 return ret;
3370 }
3371
3372 /* PRI queue */
3373 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3374 writeq_relaxed(smmu->priq.q.q_base,
3375 smmu->base + ARM_SMMU_PRIQ_BASE);
3376 writel_relaxed(smmu->priq.q.llq.prod,
3377 smmu->page1 + ARM_SMMU_PRIQ_PROD);
3378 writel_relaxed(smmu->priq.q.llq.cons,
3379 smmu->page1 + ARM_SMMU_PRIQ_CONS);
3380
3381 enables |= CR0_PRIQEN;
3382 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3383 ARM_SMMU_CR0ACK);
3384 if (ret) {
3385 dev_err(smmu->dev, "failed to enable PRI queue\n");
3386 return ret;
3387 }
3388 }
3389
3390 if (smmu->features & ARM_SMMU_FEAT_ATS) {
3391 enables |= CR0_ATSCHK;
3392 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3393 ARM_SMMU_CR0ACK);
3394 if (ret) {
3395 dev_err(smmu->dev, "failed to enable ATS check\n");
3396 return ret;
3397 }
3398 }
3399
3400 ret = arm_smmu_setup_irqs(smmu);
3401 if (ret) {
3402 dev_err(smmu->dev, "failed to setup irqs\n");
3403 return ret;
3404 }
3405
3406 if (is_kdump_kernel())
3407 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3408
3409 /* Enable the SMMU interface, or ensure bypass */
3410 if (!bypass || disable_bypass) {
3411 enables |= CR0_SMMUEN;
3412 } else {
3413 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3414 if (ret)
3415 return ret;
3416 }
3417 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3418 ARM_SMMU_CR0ACK);
3419 if (ret) {
3420 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3421 return ret;
3422 }
3423
3424 return 0;
3425 }
3426
3427 #define IIDR_IMPLEMENTER_ARM 0x43b
3428 #define IIDR_PRODUCTID_ARM_MMU_600 0x483
3429 #define IIDR_PRODUCTID_ARM_MMU_700 0x487
3430
3431 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3432 {
3433 u32 reg;
3434 unsigned int implementer, productid, variant, revision;
3435
3436 reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3437 implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3438 productid = FIELD_GET(IIDR_PRODUCTID, reg);
3439 variant = FIELD_GET(IIDR_VARIANT, reg);
3440 revision = FIELD_GET(IIDR_REVISION, reg);
3441
3442 switch (implementer) {
3443 case IIDR_IMPLEMENTER_ARM:
3444 switch (productid) {
3445 case IIDR_PRODUCTID_ARM_MMU_600:
3446 /* Arm erratum 1076982 */
3447 if (variant == 0 && revision <= 2)
3448 smmu->features &= ~ARM_SMMU_FEAT_SEV;
3449 /* Arm erratum 1209401 */
3450 if (variant < 2)
3451 smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3452 break;
3453 case IIDR_PRODUCTID_ARM_MMU_700:
3454 /* Arm erratum 2812531 */
3455 smmu->features &= ~ARM_SMMU_FEAT_BTM;
3456 smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3457 /* Arm errata 2268618, 2812531 */
3458 smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3459 break;
3460 }
3461 break;
3462 }
3463 }
3464
3465 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3466 {
3467 u32 reg;
3468 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3469
3470 /* IDR0 */
3471 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3472
3473 /* 2-level structures */
3474 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3475 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3476
3477 if (reg & IDR0_CD2L)
3478 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3479
3480 /*
3481 * Translation table endianness.
3482 * We currently require the same endianness as the CPU, but this
3483 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3484 */
3485 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3486 case IDR0_TTENDIAN_MIXED:
3487 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3488 break;
3489 #ifdef __BIG_ENDIAN
3490 case IDR0_TTENDIAN_BE:
3491 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3492 break;
3493 #else
3494 case IDR0_TTENDIAN_LE:
3495 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3496 break;
3497 #endif
3498 default:
3499 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3500 return -ENXIO;
3501 }
3502
3503 /* Boolean feature flags */
3504 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3505 smmu->features |= ARM_SMMU_FEAT_PRI;
3506
3507 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3508 smmu->features |= ARM_SMMU_FEAT_ATS;
3509
3510 if (reg & IDR0_SEV)
3511 smmu->features |= ARM_SMMU_FEAT_SEV;
3512
3513 if (reg & IDR0_MSI) {
3514 smmu->features |= ARM_SMMU_FEAT_MSI;
3515 if (coherent && !disable_msipolling)
3516 smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3517 }
3518
3519 if (reg & IDR0_HYP) {
3520 smmu->features |= ARM_SMMU_FEAT_HYP;
3521 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3522 smmu->features |= ARM_SMMU_FEAT_E2H;
3523 }
3524
3525 /*
3526 * The coherency feature as set by FW is used in preference to the ID
3527 * register, but warn on mismatch.
3528 */
3529 if (!!(reg & IDR0_COHACC) != coherent)
3530 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3531 coherent ? "true" : "false");
3532
3533 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3534 case IDR0_STALL_MODEL_FORCE:
3535 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3536 fallthrough;
3537 case IDR0_STALL_MODEL_STALL:
3538 smmu->features |= ARM_SMMU_FEAT_STALLS;
3539 }
3540
3541 if (reg & IDR0_S1P)
3542 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3543
3544 if (reg & IDR0_S2P)
3545 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3546
3547 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3548 dev_err(smmu->dev, "no translation support!\n");
3549 return -ENXIO;
3550 }
3551
3552 /* We only support the AArch64 table format at present */
3553 switch (FIELD_GET(IDR0_TTF, reg)) {
3554 case IDR0_TTF_AARCH32_64:
3555 smmu->ias = 40;
3556 fallthrough;
3557 case IDR0_TTF_AARCH64:
3558 break;
3559 default:
3560 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3561 return -ENXIO;
3562 }
3563
3564 /* ASID/VMID sizes */
3565 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3566 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3567
3568 /* IDR1 */
3569 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3570 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3571 dev_err(smmu->dev, "embedded implementation not supported\n");
3572 return -ENXIO;
3573 }
3574
3575 /* Queue sizes, capped to ensure natural alignment */
3576 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3577 FIELD_GET(IDR1_CMDQS, reg));
3578 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3579 /*
3580 * We don't support splitting up batches, so one batch of
3581 * commands plus an extra sync needs to fit inside the command
3582 * queue. There's also no way we can handle the weird alignment
3583 * restrictions on the base pointer for a unit-length queue.
3584 */
3585 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3586 CMDQ_BATCH_ENTRIES);
3587 return -ENXIO;
3588 }
3589
3590 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3591 FIELD_GET(IDR1_EVTQS, reg));
3592 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3593 FIELD_GET(IDR1_PRIQS, reg));
3594
3595 /* SID/SSID sizes */
3596 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3597 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3598 smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
3599
3600 /*
3601 * If the SMMU supports fewer bits than would fill a single L2 stream
3602 * table, use a linear table instead.
3603 */
3604 if (smmu->sid_bits <= STRTAB_SPLIT)
3605 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3606
3607 /* IDR3 */
3608 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3609 if (FIELD_GET(IDR3_RIL, reg))
3610 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3611
3612 /* IDR5 */
3613 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3614
3615 /* Maximum number of outstanding stalls */
3616 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3617
3618 /* Page sizes */
3619 if (reg & IDR5_GRAN64K)
3620 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3621 if (reg & IDR5_GRAN16K)
3622 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3623 if (reg & IDR5_GRAN4K)
3624 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3625
3626 /* Input address size */
3627 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3628 smmu->features |= ARM_SMMU_FEAT_VAX;
3629
3630 /* Output address size */
3631 switch (FIELD_GET(IDR5_OAS, reg)) {
3632 case IDR5_OAS_32_BIT:
3633 smmu->oas = 32;
3634 break;
3635 case IDR5_OAS_36_BIT:
3636 smmu->oas = 36;
3637 break;
3638 case IDR5_OAS_40_BIT:
3639 smmu->oas = 40;
3640 break;
3641 case IDR5_OAS_42_BIT:
3642 smmu->oas = 42;
3643 break;
3644 case IDR5_OAS_44_BIT:
3645 smmu->oas = 44;
3646 break;
3647 case IDR5_OAS_52_BIT:
3648 smmu->oas = 52;
3649 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3650 break;
3651 default:
3652 dev_info(smmu->dev,
3653 "unknown output address size. Truncating to 48-bit\n");
3654 fallthrough;
3655 case IDR5_OAS_48_BIT:
3656 smmu->oas = 48;
3657 }
3658
3659 if (arm_smmu_ops.pgsize_bitmap == -1UL)
3660 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3661 else
3662 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3663
3664 /* Set the DMA mask for our table walker */
3665 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3666 dev_warn(smmu->dev,
3667 "failed to set DMA mask for table walker\n");
3668
3669 smmu->ias = max(smmu->ias, smmu->oas);
3670
3671 if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3672 (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3673 smmu->features |= ARM_SMMU_FEAT_NESTING;
3674
3675 arm_smmu_device_iidr_probe(smmu);
3676
3677 if (arm_smmu_sva_supported(smmu))
3678 smmu->features |= ARM_SMMU_FEAT_SVA;
3679
3680 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3681 smmu->ias, smmu->oas, smmu->features);
3682 return 0;
3683 }
3684
3685 #ifdef CONFIG_ACPI
3686 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3687 {
3688 switch (model) {
3689 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3690 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3691 break;
3692 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3693 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3694 break;
3695 }
3696
3697 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3698 }
3699
3700 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3701 struct arm_smmu_device *smmu)
3702 {
3703 struct acpi_iort_smmu_v3 *iort_smmu;
3704 struct device *dev = smmu->dev;
3705 struct acpi_iort_node *node;
3706
3707 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3708
3709 /* Retrieve SMMUv3 specific data */
3710 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3711
3712 acpi_smmu_get_options(iort_smmu->model, smmu);
3713
3714 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3715 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3716
3717 return 0;
3718 }
3719 #else
3720 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3721 struct arm_smmu_device *smmu)
3722 {
3723 return -ENODEV;
3724 }
3725 #endif
3726
3727 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3728 struct arm_smmu_device *smmu)
3729 {
3730 struct device *dev = &pdev->dev;
3731 u32 cells;
3732 int ret = -EINVAL;
3733
3734 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3735 dev_err(dev, "missing #iommu-cells property\n");
3736 else if (cells != 1)
3737 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3738 else
3739 ret = 0;
3740
3741 parse_driver_options(smmu);
3742
3743 if (of_dma_is_coherent(dev->of_node))
3744 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3745
3746 return ret;
3747 }
3748
3749 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3750 {
3751 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3752 return SZ_64K;
3753 else
3754 return SZ_128K;
3755 }
3756
3757 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3758 resource_size_t size)
3759 {
3760 struct resource res = DEFINE_RES_MEM(start, size);
3761
3762 return devm_ioremap_resource(dev, &res);
3763 }
3764
3765 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3766 {
3767 struct list_head rmr_list;
3768 struct iommu_resv_region *e;
3769
3770 INIT_LIST_HEAD(&rmr_list);
3771 iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3772
3773 list_for_each_entry(e, &rmr_list, list) {
3774 __le64 *step;
3775 struct iommu_iort_rmr_data *rmr;
3776 int ret, i;
3777
3778 rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3779 for (i = 0; i < rmr->num_sids; i++) {
3780 ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3781 if (ret) {
3782 dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3783 rmr->sids[i]);
3784 continue;
3785 }
3786
3787 step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
3788 arm_smmu_init_bypass_stes(step, 1, true);
3789 }
3790 }
3791
3792 iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3793 }
3794
3795 static int arm_smmu_device_probe(struct platform_device *pdev)
3796 {
3797 int irq, ret;
3798 struct resource *res;
3799 resource_size_t ioaddr;
3800 struct arm_smmu_device *smmu;
3801 struct device *dev = &pdev->dev;
3802 bool bypass;
3803
3804 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3805 if (!smmu)
3806 return -ENOMEM;
3807 smmu->dev = dev;
3808
3809 if (dev->of_node) {
3810 ret = arm_smmu_device_dt_probe(pdev, smmu);
3811 } else {
3812 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3813 if (ret == -ENODEV)
3814 return ret;
3815 }
3816
3817 /* Set bypass mode according to firmware probing result */
3818 bypass = !!ret;
3819
3820 /* Base address */
3821 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3822 if (!res)
3823 return -EINVAL;
3824 if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3825 dev_err(dev, "MMIO region too small (%pr)\n", res);
3826 return -EINVAL;
3827 }
3828 ioaddr = res->start;
3829
3830 /*
3831 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3832 * the PMCG registers which are reserved by the PMU driver.
3833 */
3834 smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3835 if (IS_ERR(smmu->base))
3836 return PTR_ERR(smmu->base);
3837
3838 if (arm_smmu_resource_size(smmu) > SZ_64K) {
3839 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3840 ARM_SMMU_REG_SZ);
3841 if (IS_ERR(smmu->page1))
3842 return PTR_ERR(smmu->page1);
3843 } else {
3844 smmu->page1 = smmu->base;
3845 }
3846
3847 /* Interrupt lines */
3848
3849 irq = platform_get_irq_byname_optional(pdev, "combined");
3850 if (irq > 0)
3851 smmu->combined_irq = irq;
3852 else {
3853 irq = platform_get_irq_byname_optional(pdev, "eventq");
3854 if (irq > 0)
3855 smmu->evtq.q.irq = irq;
3856
3857 irq = platform_get_irq_byname_optional(pdev, "priq");
3858 if (irq > 0)
3859 smmu->priq.q.irq = irq;
3860
3861 irq = platform_get_irq_byname_optional(pdev, "gerror");
3862 if (irq > 0)
3863 smmu->gerr_irq = irq;
3864 }
3865 /* Probe the h/w */
3866 ret = arm_smmu_device_hw_probe(smmu);
3867 if (ret)
3868 return ret;
3869
3870 /* Initialise in-memory data structures */
3871 ret = arm_smmu_init_structures(smmu);
3872 if (ret)
3873 return ret;
3874
3875 /* Record our private device structure */
3876 platform_set_drvdata(pdev, smmu);
3877
3878 /* Check for RMRs and install bypass STEs if any */
3879 arm_smmu_rmr_install_bypass_ste(smmu);
3880
3881 /* Reset the device */
3882 ret = arm_smmu_device_reset(smmu, bypass);
3883 if (ret)
3884 return ret;
3885
3886 /* And we're up. Go go go! */
3887 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3888 "smmu3.%pa", &ioaddr);
3889 if (ret)
3890 return ret;
3891
3892 ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3893 if (ret) {
3894 dev_err(dev, "Failed to register iommu\n");
3895 iommu_device_sysfs_remove(&smmu->iommu);
3896 return ret;
3897 }
3898
3899 return 0;
3900 }
3901
3902 static void arm_smmu_device_remove(struct platform_device *pdev)
3903 {
3904 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3905
3906 iommu_device_unregister(&smmu->iommu);
3907 iommu_device_sysfs_remove(&smmu->iommu);
3908 arm_smmu_device_disable(smmu);
3909 iopf_queue_free(smmu->evtq.iopf);
3910 ida_destroy(&smmu->vmid_map);
3911 }
3912
3913 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3914 {
3915 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3916
3917 arm_smmu_device_disable(smmu);
3918 }
3919
3920 static const struct of_device_id arm_smmu_of_match[] = {
3921 { .compatible = "arm,smmu-v3", },
3922 { },
3923 };
3924 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3925
3926 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3927 {
3928 arm_smmu_sva_notifier_synchronize();
3929 platform_driver_unregister(drv);
3930 }
3931
3932 static struct platform_driver arm_smmu_driver = {
3933 .driver = {
3934 .name = "arm-smmu-v3",
3935 .of_match_table = arm_smmu_of_match,
3936 .suppress_bind_attrs = true,
3937 },
3938 .probe = arm_smmu_device_probe,
3939 .remove_new = arm_smmu_device_remove,
3940 .shutdown = arm_smmu_device_shutdown,
3941 };
3942 module_driver(arm_smmu_driver, platform_driver_register,
3943 arm_smmu_driver_unregister);
3944
3945 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3946 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3947 MODULE_ALIAS("platform:arm-smmu-v3");
3948 MODULE_LICENSE("GPL v2");