1 From: Suresh Siddha <suresh.b.siddha@intel.com>
2 Subject: x64, x2apic/intr-remap: Intel vt-d, IOMMU code reorganization
3 References: fate #303948 and fate #303984
4 Patch-Mainline: queued for .28
5 Commit-ID: e61d98d8dad0048619bb138b0ff996422ffae53b
7 Signed-off-by: Thomas Renninger <trenn@suse.de>
9 code reorganization of the generic Intel vt-d parsing related routines and linux
10 iommu routines specific to Intel vt-d.
12 drivers/pci/dmar.c now contains the generic vt-d parsing related routines
13 drivers/pci/intel_iommu.c contains the iommu routines specific to vt-d
15 Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
16 Cc: akpm@linux-foundation.org
17 Cc: arjan@linux.intel.com
18 Cc: andi@firstfloor.org
19 Cc: ebiederm@xmission.com
20 Cc: jbarnes@virtuousgeek.org
22 Signed-off-by: Ingo Molnar <mingo@elte.hu>
25 drivers/pci/dma_remapping.h | 155 +++++++++++++++++++++++++++++++++++++++++
26 drivers/pci/dmar.c | 90 ++++++++++++++++++++++++
27 drivers/pci/intel-iommu.c | 92 ++----------------------
28 drivers/pci/intel-iommu.h | 163 +++-----------------------------------------
29 4 files changed, 264 insertions(+), 236 deletions(-)
31 --- a/drivers/pci/dmar.c
32 +++ b/drivers/pci/dmar.c
34 * Author: Shaohua Li <shaohua.li@intel.com>
35 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
37 - * This file implements early detection/parsing of DMA Remapping Devices
38 + * This file implements early detection/parsing of Remapping Devices
39 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
42 + * These routines are used by both DMA-remapping and Interrupt-remapping
45 #include <linux/pci.h>
46 @@ -300,6 +302,37 @@ parse_dmar_table(void)
50 +int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
51 + struct pci_dev *dev)
56 + for (index = 0; index < cnt; index++)
57 + if (dev == devices[index])
60 + /* Check our parent */
61 + dev = dev->bus->self;
67 +struct dmar_drhd_unit *
68 +dmar_find_matched_drhd_unit(struct pci_dev *dev)
70 + struct dmar_drhd_unit *drhd = NULL;
72 + list_for_each_entry(drhd, &dmar_drhd_units, list) {
73 + if (drhd->include_all || dmar_pci_device_match(drhd->devices,
74 + drhd->devices_cnt, dev))
82 int __init dmar_table_init(void)
84 @@ -341,3 +374,58 @@ int __init early_dmar_detect(void)
86 return (ACPI_SUCCESS(status) ? 1 : 0);
89 +struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
90 + struct dmar_drhd_unit *drhd)
95 + iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
97 + printk(KERN_ERR "IOMMU: can't map the region\n");
100 + iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
101 + iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
103 + /* the registers might be more than one page */
104 + map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
105 + cap_max_fault_reg_offset(iommu->cap));
106 + map_size = PAGE_ALIGN_4K(map_size);
107 + if (map_size > PAGE_SIZE_4K) {
108 + iounmap(iommu->reg);
109 + iommu->reg = ioremap(drhd->reg_base_addr, map_size);
111 + printk(KERN_ERR "IOMMU: can't map the region\n");
116 + ver = readl(iommu->reg + DMAR_VER_REG);
117 + pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
118 + drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
119 + iommu->cap, iommu->ecap);
121 + spin_lock_init(&iommu->register_lock);
123 + drhd->iommu = iommu;
130 +void free_iommu(struct intel_iommu *iommu)
136 + free_dmar_iommu(iommu);
140 + iounmap(iommu->reg);
144 +++ b/drivers/pci/dma_remapping.h
146 +#ifndef _DMA_REMAPPING_H
147 +#define _DMA_REMAPPING_H
150 + * We need a fixed PAGE_SIZE of 4K irrespective of
151 + * arch PAGE_SIZE for IOMMU page tables.
153 +#define PAGE_SHIFT_4K (12)
154 +#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
155 +#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
156 +#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
158 +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
159 +#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
160 +#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
166 + * 12-63: Context Ptr (12 - (haw-1))
173 +#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
174 +static inline bool root_present(struct root_entry *root)
176 + return (root->val & 1);
178 +static inline void set_root_present(struct root_entry *root)
182 +static inline void set_root_value(struct root_entry *root, unsigned long value)
184 + root->val |= value & PAGE_MASK_4K;
187 +struct context_entry;
188 +static inline struct context_entry *
189 +get_context_addr_from_root(struct root_entry *root)
191 + return (struct context_entry *)
192 + (root_present(root)?phys_to_virt(
193 + root->val & PAGE_MASK_4K):
200 + * 1: fault processing disable
201 + * 2-3: translation type
202 + * 12-63: address space root
204 + * 0-2: address width
208 +struct context_entry {
212 +#define context_present(c) ((c).lo & 1)
213 +#define context_fault_disable(c) (((c).lo >> 1) & 1)
214 +#define context_translation_type(c) (((c).lo >> 2) & 3)
215 +#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
216 +#define context_address_width(c) ((c).hi & 7)
217 +#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
219 +#define context_set_present(c) do {(c).lo |= 1;} while (0)
220 +#define context_set_fault_enable(c) \
221 + do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
222 +#define context_set_translation_type(c, val) \
224 + (c).lo &= (((u64)-1) << 4) | 3; \
225 + (c).lo |= ((val) & 3) << 2; \
227 +#define CONTEXT_TT_MULTI_LEVEL 0
228 +#define context_set_address_root(c, val) \
229 + do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
230 +#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
231 +#define context_set_domain_id(c, val) \
232 + do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
233 +#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
241 + * 12-63: Host physcial address
246 +#define dma_clear_pte(p) do {(p).val = 0;} while (0)
248 +#define DMA_PTE_READ (1)
249 +#define DMA_PTE_WRITE (2)
251 +#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
252 +#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
253 +#define dma_set_pte_prot(p, prot) \
254 + do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
255 +#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
256 +#define dma_set_pte_addr(p, addr) do {\
257 + (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
258 +#define dma_pte_present(p) (((p).val & 3) != 0)
262 +struct dmar_domain {
263 + int id; /* domain id */
264 + struct intel_iommu *iommu; /* back pointer to owning iommu */
266 + struct list_head devices; /* all devices' list */
267 + struct iova_domain iovad; /* iova's that belong to this domain */
269 + struct dma_pte *pgd; /* virtual address */
270 + spinlock_t mapping_lock; /* page table lock */
271 + int gaw; /* max guest address width */
273 + /* adjusted guest address width, 0 is level 2 30-bit */
276 +#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
280 +/* PCI domain-device relationship */
281 +struct device_domain_info {
282 + struct list_head link; /* link to domain siblings */
283 + struct list_head global; /* link to global list */
284 + u8 bus; /* PCI bus numer */
285 + u8 devfn; /* PCI devfn number */
286 + struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
287 + struct dmar_domain *domain; /* pointer to domain */
290 +extern int init_dmars(void);
291 +extern void free_dmar_iommu(struct intel_iommu *iommu);
293 +#ifndef CONFIG_DMAR_GFX_WA
294 +static inline void iommu_prepare_gfx_mapping(void)
298 +#endif /* !CONFIG_DMAR_GFX_WA */
301 --- a/drivers/pci/intel-iommu.c
302 +++ b/drivers/pci/intel-iommu.c
303 @@ -992,6 +992,8 @@ static int iommu_init_domains(struct int
307 + spin_lock_init(&iommu->lock);
310 * if Caching mode is set, then invalid translations are tagged
311 * with domainid 0. Hence we need to pre-allocate it.
312 @@ -1000,62 +1002,15 @@ static int iommu_init_domains(struct int
313 set_bit(0, iommu->domain_ids);
316 -static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
317 - struct dmar_drhd_unit *drhd)
323 - iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
325 - printk(KERN_ERR "IOMMU: can't map the region\n");
328 - iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
329 - iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
331 - /* the registers might be more than one page */
332 - map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
333 - cap_max_fault_reg_offset(iommu->cap));
334 - map_size = PAGE_ALIGN_4K(map_size);
335 - if (map_size > PAGE_SIZE_4K) {
336 - iounmap(iommu->reg);
337 - iommu->reg = ioremap(drhd->reg_base_addr, map_size);
339 - printk(KERN_ERR "IOMMU: can't map the region\n");
344 - ver = readl(iommu->reg + DMAR_VER_REG);
345 - pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
346 - drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
347 - iommu->cap, iommu->ecap);
348 - ret = iommu_init_domains(iommu);
351 - spin_lock_init(&iommu->lock);
352 - spin_lock_init(&iommu->register_lock);
354 - drhd->iommu = iommu;
357 - iounmap(iommu->reg);
363 static void domain_exit(struct dmar_domain *domain);
364 -static void free_iommu(struct intel_iommu *iommu)
366 +void free_dmar_iommu(struct intel_iommu *iommu)
368 struct dmar_domain *domain;
374 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
375 for (; i < cap_ndoms(iommu->cap); ) {
376 domain = iommu->domains[i];
377 @@ -1080,10 +1035,6 @@ static void free_iommu(struct intel_iomm
379 /* free context mapping */
380 free_context_table(iommu);
383 - iounmap(iommu->reg);
387 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
388 @@ -1428,37 +1379,6 @@ find_domain(struct pci_dev *pdev)
392 -static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
393 - struct pci_dev *dev)
398 - for (index = 0; index < cnt; index++)
399 - if (dev == devices[index])
402 - /* Check our parent */
403 - dev = dev->bus->self;
409 -static struct dmar_drhd_unit *
410 -dmar_find_matched_drhd_unit(struct pci_dev *dev)
412 - struct dmar_drhd_unit *drhd = NULL;
414 - list_for_each_entry(drhd, &dmar_drhd_units, list) {
415 - if (drhd->include_all || dmar_pci_device_match(drhd->devices,
416 - drhd->devices_cnt, dev))
423 /* domain is initialized */
424 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
426 @@ -1765,6 +1685,10 @@ int __init init_dmars(void)
430 + ret = iommu_init_domains(iommu);
436 * we could share the same root & context tables
437 --- a/drivers/pci/intel-iommu.h
438 +++ b/drivers/pci/intel-iommu.h
440 #include <linux/sysdev.h>
442 #include <linux/io.h>
445 - * We need a fixed PAGE_SIZE of 4K irrespective of
446 - * arch PAGE_SIZE for IOMMU page tables.
448 -#define PAGE_SHIFT_4K (12)
449 -#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
450 -#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
451 -#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
453 -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
454 -#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
455 -#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
456 +#include "dma_remapping.h"
459 * Intel IOMMU register specification per version 1.0 public spec.
460 @@ -187,158 +175,31 @@ static inline void dmar_writeq(void __io
461 #define dma_frcd_source_id(c) (c & 0xffff)
462 #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
467 - * 12-63: Context Ptr (12 - (haw-1))
474 -#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
475 -static inline bool root_present(struct root_entry *root)
477 - return (root->val & 1);
479 -static inline void set_root_present(struct root_entry *root)
483 -static inline void set_root_value(struct root_entry *root, unsigned long value)
485 - root->val |= value & PAGE_MASK_4K;
488 -struct context_entry;
489 -static inline struct context_entry *
490 -get_context_addr_from_root(struct root_entry *root)
492 - return (struct context_entry *)
493 - (root_present(root)?phys_to_virt(
494 - root->val & PAGE_MASK_4K):
501 - * 1: fault processing disable
502 - * 2-3: translation type
503 - * 12-63: address space root
505 - * 0-2: address width
509 -struct context_entry {
513 -#define context_present(c) ((c).lo & 1)
514 -#define context_fault_disable(c) (((c).lo >> 1) & 1)
515 -#define context_translation_type(c) (((c).lo >> 2) & 3)
516 -#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
517 -#define context_address_width(c) ((c).hi & 7)
518 -#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
520 -#define context_set_present(c) do {(c).lo |= 1;} while (0)
521 -#define context_set_fault_enable(c) \
522 - do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
523 -#define context_set_translation_type(c, val) \
525 - (c).lo &= (((u64)-1) << 4) | 3; \
526 - (c).lo |= ((val) & 3) << 2; \
528 -#define CONTEXT_TT_MULTI_LEVEL 0
529 -#define context_set_address_root(c, val) \
530 - do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
531 -#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
532 -#define context_set_domain_id(c, val) \
533 - do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
534 -#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
542 - * 12-63: Host physcial address
547 -#define dma_clear_pte(p) do {(p).val = 0;} while (0)
549 -#define DMA_PTE_READ (1)
550 -#define DMA_PTE_WRITE (2)
552 -#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
553 -#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
554 -#define dma_set_pte_prot(p, prot) \
555 - do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
556 -#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
557 -#define dma_set_pte_addr(p, addr) do {\
558 - (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
559 -#define dma_pte_present(p) (((p).val & 3) != 0)
563 -struct dmar_domain {
564 - int id; /* domain id */
565 - struct intel_iommu *iommu; /* back pointer to owning iommu */
567 - struct list_head devices; /* all devices' list */
568 - struct iova_domain iovad; /* iova's that belong to this domain */
570 - struct dma_pte *pgd; /* virtual address */
571 - spinlock_t mapping_lock; /* page table lock */
572 - int gaw; /* max guest address width */
574 - /* adjusted guest address width, 0 is level 2 30-bit */
577 -#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
581 -/* PCI domain-device relationship */
582 -struct device_domain_info {
583 - struct list_head link; /* link to domain siblings */
584 - struct list_head global; /* link to global list */
585 - u8 bus; /* PCI bus numer */
586 - u8 devfn; /* PCI devfn number */
587 - struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
588 - struct dmar_domain *domain; /* pointer to domain */
591 -extern int init_dmars(void);
594 void __iomem *reg; /* Pointer to hardware regs, virtual addr */
597 - unsigned long *domain_ids; /* bitmap of domains */
598 - struct dmar_domain **domains; /* ptr to domains */
600 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
601 - spinlock_t lock; /* protect context, domain ids */
602 spinlock_t register_lock; /* protect register handling */
605 + unsigned long *domain_ids; /* bitmap of domains */
606 + struct dmar_domain **domains; /* ptr to domains */
607 + spinlock_t lock; /* protect context, domain ids */
608 struct root_entry *root_entry; /* virtual address */
611 unsigned char name[7]; /* Device Name */
612 struct msi_msg saved_msg;
613 struct sys_device sysdev;
617 -#ifndef CONFIG_DMAR_GFX_WA
618 -static inline void iommu_prepare_gfx_mapping(void)
622 -#endif /* !CONFIG_DMAR_GFX_WA */
623 +extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
625 +extern struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
626 + struct dmar_drhd_unit *drhd);
627 +extern void free_iommu(struct intel_iommu *iommu);