]> git.ipfire.org Git - people/arne_f/kernel.git/blame - drivers/iommu/intel-iommu.c
iommu/vt-d: Use correct domain id to flush virtual machine domains
[people/arne_f/kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
18 */
19
20#include <linux/init.h>
21#include <linux/bitmap.h>
5e0d2a6f 22#include <linux/debugfs.h>
54485c30 23#include <linux/export.h>
ba395927
KA
24#include <linux/slab.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
ba395927
KA
27#include <linux/spinlock.h>
28#include <linux/pci.h>
29#include <linux/dmar.h>
30#include <linux/dma-mapping.h>
31#include <linux/mempool.h>
75f05569 32#include <linux/memory.h>
5e0d2a6f 33#include <linux/timer.h>
38717946 34#include <linux/iova.h>
5d450806 35#include <linux/iommu.h>
38717946 36#include <linux/intel-iommu.h>
134fac3f 37#include <linux/syscore_ops.h>
69575d38 38#include <linux/tboot.h>
adb2fe02 39#include <linux/dmi.h>
5cdede24 40#include <linux/pci-ats.h>
0ee332c1 41#include <linux/memblock.h>
36746436 42#include <linux/dma-contiguous.h>
8a8f422d 43#include <asm/irq_remapping.h>
ba395927 44#include <asm/cacheflush.h>
46a7fa27 45#include <asm/iommu.h>
ba395927 46
078e1ee2
JR
47#include "irq_remapping.h"
48
5b6985ce
FY
49#define ROOT_SIZE VTD_PAGE_SIZE
50#define CONTEXT_SIZE VTD_PAGE_SIZE
51
ba395927
KA
52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 54#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
55
56#define IOAPIC_RANGE_START (0xfee00000)
57#define IOAPIC_RANGE_END (0xfeefffff)
58#define IOVA_START_ADDR (0x1000)
59
60#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
4ed0d3e6 62#define MAX_AGAW_WIDTH 64
5c645b35 63#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 64
2ebe3151
DW
65#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
71 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 73
f27be03b 74#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 75#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 76#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 77
df08cdc7
AM
78/* page table handling */
79#define LEVEL_STRIDE (9)
80#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
81
6d1c56a9
OBC
82/*
83 * This bitmap is used to advertise the page sizes our hardware support
84 * to the IOMMU core, which will then use this information to split
85 * physically contiguous memory regions it is mapping into page sizes
86 * that we support.
87 *
88 * Traditionally the IOMMU core just handed us the mappings directly,
89 * after making sure the size is an order of a 4KiB page and that the
90 * mapping has natural alignment.
91 *
92 * To retain this behavior, we currently advertise that we support
93 * all page sizes that are an order of 4KiB.
94 *
95 * If at some point we'd like to utilize the IOMMU core's new behavior,
96 * we could change this to advertise the real page sizes we support.
97 */
98#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
99
df08cdc7
AM
100static inline int agaw_to_level(int agaw)
101{
102 return agaw + 2;
103}
104
105static inline int agaw_to_width(int agaw)
106{
5c645b35 107 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
108}
109
110static inline int width_to_agaw(int width)
111{
5c645b35 112 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
113}
114
115static inline unsigned int level_to_offset_bits(int level)
116{
117 return (level - 1) * LEVEL_STRIDE;
118}
119
120static inline int pfn_level_offset(unsigned long pfn, int level)
121{
122 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
123}
124
125static inline unsigned long level_mask(int level)
126{
127 return -1UL << level_to_offset_bits(level);
128}
129
130static inline unsigned long level_size(int level)
131{
132 return 1UL << level_to_offset_bits(level);
133}
134
135static inline unsigned long align_to_level(unsigned long pfn, int level)
136{
137 return (pfn + level_size(level) - 1) & level_mask(level);
138}
fd18de50 139
6dd9a7c7
YS
140static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
141{
5c645b35 142 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
143}
144
dd4e8319
DW
145/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
146 are never going to work. */
147static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
148{
149 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
150}
151
152static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
153{
154 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
155}
156static inline unsigned long page_to_dma_pfn(struct page *pg)
157{
158 return mm_to_dma_pfn(page_to_pfn(pg));
159}
160static inline unsigned long virt_to_dma_pfn(void *p)
161{
162 return page_to_dma_pfn(virt_to_page(p));
163}
164
d9630fe9
WH
165/* global iommu list, set NULL for ignored DMAR units */
166static struct intel_iommu **g_iommus;
167
e0fc7e0b 168static void __init check_tylersburg_isoch(void);
9af88143
DW
169static int rwbf_quirk;
170
b779260b
JC
171/*
172 * set to 1 to panic kernel if can't successfully enable VT-d
173 * (used when kernel is launched w/ TXT)
174 */
175static int force_on = 0;
176
46b08e1a
MM
177/*
178 * 0: Present
179 * 1-11: Reserved
180 * 12-63: Context Ptr (12 - (haw-1))
181 * 64-127: Reserved
182 */
183struct root_entry {
184 u64 val;
185 u64 rsvd1;
186};
187#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188static inline bool root_present(struct root_entry *root)
189{
190 return (root->val & 1);
191}
192static inline void set_root_present(struct root_entry *root)
193{
194 root->val |= 1;
195}
196static inline void set_root_value(struct root_entry *root, unsigned long value)
197{
198 root->val |= value & VTD_PAGE_MASK;
199}
200
201static inline struct context_entry *
202get_context_addr_from_root(struct root_entry *root)
203{
204 return (struct context_entry *)
205 (root_present(root)?phys_to_virt(
206 root->val & VTD_PAGE_MASK) :
207 NULL);
208}
209
7a8fc25e
MM
210/*
211 * low 64 bits:
212 * 0: present
213 * 1: fault processing disable
214 * 2-3: translation type
215 * 12-63: address space root
216 * high 64 bits:
217 * 0-2: address width
218 * 3-6: aval
219 * 8-23: domain id
220 */
221struct context_entry {
222 u64 lo;
223 u64 hi;
224};
c07e7d21
MM
225
226static inline bool context_present(struct context_entry *context)
227{
228 return (context->lo & 1);
229}
230static inline void context_set_present(struct context_entry *context)
231{
232 context->lo |= 1;
233}
234
235static inline void context_set_fault_enable(struct context_entry *context)
236{
237 context->lo &= (((u64)-1) << 2) | 1;
238}
239
c07e7d21
MM
240static inline void context_set_translation_type(struct context_entry *context,
241 unsigned long value)
242{
243 context->lo &= (((u64)-1) << 4) | 3;
244 context->lo |= (value & 3) << 2;
245}
246
247static inline void context_set_address_root(struct context_entry *context,
248 unsigned long value)
249{
250 context->lo |= value & VTD_PAGE_MASK;
251}
252
253static inline void context_set_address_width(struct context_entry *context,
254 unsigned long value)
255{
256 context->hi |= value & 7;
257}
258
259static inline void context_set_domain_id(struct context_entry *context,
260 unsigned long value)
261{
262 context->hi |= (value & ((1 << 16) - 1)) << 8;
263}
264
265static inline void context_clear_entry(struct context_entry *context)
266{
267 context->lo = 0;
268 context->hi = 0;
269}
7a8fc25e 270
622ba12a
MM
271/*
272 * 0: readable
273 * 1: writable
274 * 2-6: reserved
275 * 7: super page
9cf06697
SY
276 * 8-10: available
277 * 11: snoop behavior
622ba12a
MM
278 * 12-63: Host physcial address
279 */
280struct dma_pte {
281 u64 val;
282};
622ba12a 283
19c239ce
MM
284static inline void dma_clear_pte(struct dma_pte *pte)
285{
286 pte->val = 0;
287}
288
19c239ce
MM
289static inline u64 dma_pte_addr(struct dma_pte *pte)
290{
c85994e4
DW
291#ifdef CONFIG_64BIT
292 return pte->val & VTD_PAGE_MASK;
293#else
294 /* Must have a full atomic 64-bit read */
1a8bd481 295 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 296#endif
19c239ce
MM
297}
298
19c239ce
MM
299static inline bool dma_pte_present(struct dma_pte *pte)
300{
301 return (pte->val & 3) != 0;
302}
622ba12a 303
4399c8bf
AK
304static inline bool dma_pte_superpage(struct dma_pte *pte)
305{
c3c75eb7 306 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
307}
308
75e6bf96
DW
309static inline int first_pte_in_page(struct dma_pte *pte)
310{
311 return !((unsigned long)pte & ~VTD_PAGE_MASK);
312}
313
2c2e2c38
FY
314/*
315 * This domain is a statically identity mapping domain.
316 * 1. This domain creats a static 1:1 mapping to all usable memory.
317 * 2. It maps to each iommu if successful.
318 * 3. Each iommu mapps to this domain if successful.
319 */
19943b0e
DW
320static struct dmar_domain *si_domain;
321static int hw_pass_through = 1;
2c2e2c38 322
3b5410e7 323/* devices under the same p2p bridge are owned in one domain */
cdc7b837 324#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 325
1ce28feb
WH
326/* domain represents a virtual machine, more than one devices
327 * across iommus may be owned in one domain, e.g. kvm guest.
328 */
329#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
330
2c2e2c38
FY
331/* si_domain contains mulitple devices */
332#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
333
1b198bb0
MT
334/* define the limit of IOMMUs supported in each domain */
335#ifdef CONFIG_X86
336# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
337#else
338# define IOMMU_UNITS_SUPPORTED 64
339#endif
340
99126f7c
MM
341struct dmar_domain {
342 int id; /* domain id */
4c923d47 343 int nid; /* node id */
1b198bb0
MT
344 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
345 /* bitmap of iommus this domain uses*/
99126f7c
MM
346
347 struct list_head devices; /* all devices' list */
348 struct iova_domain iovad; /* iova's that belong to this domain */
349
350 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
351 int gaw; /* max guest address width */
352
353 /* adjusted guest address width, 0 is level 2 30-bit */
354 int agaw;
355
3b5410e7 356 int flags; /* flags to find out type of domain */
8e604097
WH
357
358 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 359 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 360 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
361 int iommu_superpage;/* Level of superpages supported:
362 0 == 4KiB (no superpages), 1 == 2MiB,
363 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 364 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 365 u64 max_addr; /* maximum mapped address */
99126f7c
MM
366};
367
a647dacb
MM
368/* PCI domain-device relationship */
369struct device_domain_info {
370 struct list_head link; /* link to domain siblings */
371 struct list_head global; /* link to global list */
276dbf99 372 u8 bus; /* PCI bus number */
a647dacb 373 u8 devfn; /* PCI devfn number */
0bcb3e28 374 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 375 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
376 struct dmar_domain *domain; /* pointer to domain */
377};
378
b94e4117
JL
379struct dmar_rmrr_unit {
380 struct list_head list; /* list of rmrr units */
381 struct acpi_dmar_header *hdr; /* ACPI header */
382 u64 base_address; /* reserved base address*/
383 u64 end_address; /* reserved end address */
832bd858 384 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
385 int devices_cnt; /* target device count */
386};
387
388struct dmar_atsr_unit {
389 struct list_head list; /* list of ATSR units */
390 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 391 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
392 int devices_cnt; /* target device count */
393 u8 include_all:1; /* include all ports */
394};
395
396static LIST_HEAD(dmar_atsr_units);
397static LIST_HEAD(dmar_rmrr_units);
398
399#define for_each_rmrr_units(rmrr) \
400 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
401
5e0d2a6f 402static void flush_unmaps_timeout(unsigned long data);
403
b707cb02 404static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
5e0d2a6f 405
80b20dd8 406#define HIGH_WATER_MARK 250
407struct deferred_flush_tables {
408 int next;
409 struct iova *iova[HIGH_WATER_MARK];
410 struct dmar_domain *domain[HIGH_WATER_MARK];
ea8ea460 411 struct page *freelist[HIGH_WATER_MARK];
80b20dd8 412};
413
414static struct deferred_flush_tables *deferred_flush;
415
5e0d2a6f 416/* bitmap for indexing intel_iommus */
5e0d2a6f 417static int g_num_of_iommus;
418
419static DEFINE_SPINLOCK(async_umap_flush_lock);
420static LIST_HEAD(unmaps_to_do);
421
422static int timer_on;
423static long list_size;
5e0d2a6f 424
92d03cc8 425static void domain_exit(struct dmar_domain *domain);
ba395927 426static void domain_remove_dev_info(struct dmar_domain *domain);
b94e4117 427static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 428 struct device *dev);
92d03cc8 429static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 430 struct device *dev);
ba395927 431
d3f13810 432#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
433int dmar_disabled = 0;
434#else
435int dmar_disabled = 1;
d3f13810 436#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 437
8bc1f85c
ED
438int intel_iommu_enabled = 0;
439EXPORT_SYMBOL_GPL(intel_iommu_enabled);
440
2d9e667e 441static int dmar_map_gfx = 1;
7d3b03ce 442static int dmar_forcedac;
5e0d2a6f 443static int intel_iommu_strict;
6dd9a7c7 444static int intel_iommu_superpage = 1;
ba395927 445
c0771df8
DW
446int intel_iommu_gfx_mapped;
447EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
448
ba395927
KA
449#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
450static DEFINE_SPINLOCK(device_domain_lock);
451static LIST_HEAD(device_domain_list);
452
b22f6434 453static const struct iommu_ops intel_iommu_ops;
a8bcbb0d 454
ba395927
KA
455static int __init intel_iommu_setup(char *str)
456{
457 if (!str)
458 return -EINVAL;
459 while (*str) {
0cd5c3c8
KM
460 if (!strncmp(str, "on", 2)) {
461 dmar_disabled = 0;
462 printk(KERN_INFO "Intel-IOMMU: enabled\n");
463 } else if (!strncmp(str, "off", 3)) {
ba395927 464 dmar_disabled = 1;
0cd5c3c8 465 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
466 } else if (!strncmp(str, "igfx_off", 8)) {
467 dmar_map_gfx = 0;
468 printk(KERN_INFO
469 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 470 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 471 printk(KERN_INFO
7d3b03ce
KA
472 "Intel-IOMMU: Forcing DAC for PCI devices\n");
473 dmar_forcedac = 1;
5e0d2a6f 474 } else if (!strncmp(str, "strict", 6)) {
475 printk(KERN_INFO
476 "Intel-IOMMU: disable batched IOTLB flush\n");
477 intel_iommu_strict = 1;
6dd9a7c7
YS
478 } else if (!strncmp(str, "sp_off", 6)) {
479 printk(KERN_INFO
480 "Intel-IOMMU: disable supported super page\n");
481 intel_iommu_superpage = 0;
ba395927
KA
482 }
483
484 str += strcspn(str, ",");
485 while (*str == ',')
486 str++;
487 }
488 return 0;
489}
490__setup("intel_iommu=", intel_iommu_setup);
491
492static struct kmem_cache *iommu_domain_cache;
493static struct kmem_cache *iommu_devinfo_cache;
494static struct kmem_cache *iommu_iova_cache;
495
4c923d47 496static inline void *alloc_pgtable_page(int node)
eb3fa7cb 497{
4c923d47
SS
498 struct page *page;
499 void *vaddr = NULL;
eb3fa7cb 500
4c923d47
SS
501 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
502 if (page)
503 vaddr = page_address(page);
eb3fa7cb 504 return vaddr;
ba395927
KA
505}
506
507static inline void free_pgtable_page(void *vaddr)
508{
509 free_page((unsigned long)vaddr);
510}
511
512static inline void *alloc_domain_mem(void)
513{
354bb65e 514 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
515}
516
38717946 517static void free_domain_mem(void *vaddr)
ba395927
KA
518{
519 kmem_cache_free(iommu_domain_cache, vaddr);
520}
521
522static inline void * alloc_devinfo_mem(void)
523{
354bb65e 524 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
525}
526
527static inline void free_devinfo_mem(void *vaddr)
528{
529 kmem_cache_free(iommu_devinfo_cache, vaddr);
530}
531
532struct iova *alloc_iova_mem(void)
533{
354bb65e 534 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
535}
536
537void free_iova_mem(struct iova *iova)
538{
539 kmem_cache_free(iommu_iova_cache, iova);
540}
541
1b573683 542
4ed0d3e6 543static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
544{
545 unsigned long sagaw;
546 int agaw = -1;
547
548 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 549 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
550 agaw >= 0; agaw--) {
551 if (test_bit(agaw, &sagaw))
552 break;
553 }
554
555 return agaw;
556}
557
4ed0d3e6
FY
558/*
559 * Calculate max SAGAW for each iommu.
560 */
561int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
562{
563 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
564}
565
566/*
567 * calculate agaw for each iommu.
568 * "SAGAW" may be different across iommus, use a default agaw, and
569 * get a supported less agaw for iommus that don't support the default agaw.
570 */
571int iommu_calculate_agaw(struct intel_iommu *iommu)
572{
573 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
574}
575
2c2e2c38 576/* This functionin only returns single iommu in a domain */
8c11e798
WH
577static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
578{
579 int iommu_id;
580
2c2e2c38 581 /* si_domain and vm domain should not get here. */
1ce28feb 582 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 583 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 584
1b198bb0 585 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
8c11e798
WH
586 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
587 return NULL;
588
589 return g_iommus[iommu_id];
590}
591
8e604097
WH
592static void domain_update_iommu_coherency(struct dmar_domain *domain)
593{
d0501960
DW
594 struct dmar_drhd_unit *drhd;
595 struct intel_iommu *iommu;
596 int i, found = 0;
2e12bc29 597
d0501960 598 domain->iommu_coherency = 1;
8e604097 599
1b198bb0 600 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
d0501960 601 found = 1;
8e604097
WH
602 if (!ecap_coherent(g_iommus[i]->ecap)) {
603 domain->iommu_coherency = 0;
604 break;
605 }
8e604097 606 }
d0501960
DW
607 if (found)
608 return;
609
610 /* No hardware attached; use lowest common denominator */
611 rcu_read_lock();
612 for_each_active_iommu(iommu, drhd) {
613 if (!ecap_coherent(iommu->ecap)) {
614 domain->iommu_coherency = 0;
615 break;
616 }
617 }
618 rcu_read_unlock();
8e604097
WH
619}
620
58c610bd
SY
621static void domain_update_iommu_snooping(struct dmar_domain *domain)
622{
623 int i;
624
625 domain->iommu_snooping = 1;
626
1b198bb0 627 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
628 if (!ecap_sc_support(g_iommus[i]->ecap)) {
629 domain->iommu_snooping = 0;
630 break;
631 }
58c610bd
SY
632 }
633}
634
6dd9a7c7
YS
635static void domain_update_iommu_superpage(struct dmar_domain *domain)
636{
8140a95d
AK
637 struct dmar_drhd_unit *drhd;
638 struct intel_iommu *iommu = NULL;
639 int mask = 0xf;
6dd9a7c7
YS
640
641 if (!intel_iommu_superpage) {
642 domain->iommu_superpage = 0;
643 return;
644 }
645
8140a95d 646 /* set iommu_superpage to the smallest common denominator */
0e242612 647 rcu_read_lock();
8140a95d
AK
648 for_each_active_iommu(iommu, drhd) {
649 mask &= cap_super_page_val(iommu->cap);
6dd9a7c7
YS
650 if (!mask) {
651 break;
652 }
653 }
0e242612
JL
654 rcu_read_unlock();
655
6dd9a7c7
YS
656 domain->iommu_superpage = fls(mask);
657}
658
58c610bd
SY
659/* Some capabilities may be different across iommus */
660static void domain_update_iommu_cap(struct dmar_domain *domain)
661{
662 domain_update_iommu_coherency(domain);
663 domain_update_iommu_snooping(domain);
6dd9a7c7 664 domain_update_iommu_superpage(domain);
58c610bd
SY
665}
666
156baca8 667static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
668{
669 struct dmar_drhd_unit *drhd = NULL;
b683b230 670 struct intel_iommu *iommu;
156baca8
DW
671 struct device *tmp;
672 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 673 u16 segment = 0;
c7151a8d
WH
674 int i;
675
156baca8
DW
676 if (dev_is_pci(dev)) {
677 pdev = to_pci_dev(dev);
678 segment = pci_domain_nr(pdev->bus);
679 } else if (ACPI_COMPANION(dev))
680 dev = &ACPI_COMPANION(dev)->dev;
681
0e242612 682 rcu_read_lock();
b683b230 683 for_each_active_iommu(iommu, drhd) {
156baca8 684 if (pdev && segment != drhd->segment)
276dbf99 685 continue;
c7151a8d 686
b683b230 687 for_each_active_dev_scope(drhd->devices,
156baca8
DW
688 drhd->devices_cnt, i, tmp) {
689 if (tmp == dev) {
690 *bus = drhd->devices[i].bus;
691 *devfn = drhd->devices[i].devfn;
b683b230 692 goto out;
156baca8
DW
693 }
694
695 if (!pdev || !dev_is_pci(tmp))
696 continue;
697
698 ptmp = to_pci_dev(tmp);
699 if (ptmp->subordinate &&
700 ptmp->subordinate->number <= pdev->bus->number &&
701 ptmp->subordinate->busn_res.end >= pdev->bus->number)
702 goto got_pdev;
924b6231 703 }
c7151a8d 704
156baca8
DW
705 if (pdev && drhd->include_all) {
706 got_pdev:
707 *bus = pdev->bus->number;
708 *devfn = pdev->devfn;
b683b230 709 goto out;
156baca8 710 }
c7151a8d 711 }
b683b230 712 iommu = NULL;
156baca8 713 out:
0e242612 714 rcu_read_unlock();
c7151a8d 715
b683b230 716 return iommu;
c7151a8d
WH
717}
718
5331fe6f
WH
719static void domain_flush_cache(struct dmar_domain *domain,
720 void *addr, int size)
721{
722 if (!domain->iommu_coherency)
723 clflush_cache_range(addr, size);
724}
725
ba395927
KA
726/* Gets context entry for a given bus and devfn */
727static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
728 u8 bus, u8 devfn)
729{
730 struct root_entry *root;
731 struct context_entry *context;
732 unsigned long phy_addr;
733 unsigned long flags;
734
735 spin_lock_irqsave(&iommu->lock, flags);
736 root = &iommu->root_entry[bus];
737 context = get_context_addr_from_root(root);
738 if (!context) {
4c923d47
SS
739 context = (struct context_entry *)
740 alloc_pgtable_page(iommu->node);
ba395927
KA
741 if (!context) {
742 spin_unlock_irqrestore(&iommu->lock, flags);
743 return NULL;
744 }
5b6985ce 745 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
746 phy_addr = virt_to_phys((void *)context);
747 set_root_value(root, phy_addr);
748 set_root_present(root);
749 __iommu_flush_cache(iommu, root, sizeof(*root));
750 }
751 spin_unlock_irqrestore(&iommu->lock, flags);
752 return &context[devfn];
753}
754
755static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
756{
757 struct root_entry *root;
758 struct context_entry *context;
759 int ret;
760 unsigned long flags;
761
762 spin_lock_irqsave(&iommu->lock, flags);
763 root = &iommu->root_entry[bus];
764 context = get_context_addr_from_root(root);
765 if (!context) {
766 ret = 0;
767 goto out;
768 }
c07e7d21 769 ret = context_present(&context[devfn]);
ba395927
KA
770out:
771 spin_unlock_irqrestore(&iommu->lock, flags);
772 return ret;
773}
774
775static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
776{
777 struct root_entry *root;
778 struct context_entry *context;
779 unsigned long flags;
780
781 spin_lock_irqsave(&iommu->lock, flags);
782 root = &iommu->root_entry[bus];
783 context = get_context_addr_from_root(root);
784 if (context) {
c07e7d21 785 context_clear_entry(&context[devfn]);
ba395927
KA
786 __iommu_flush_cache(iommu, &context[devfn], \
787 sizeof(*context));
788 }
789 spin_unlock_irqrestore(&iommu->lock, flags);
790}
791
792static void free_context_table(struct intel_iommu *iommu)
793{
794 struct root_entry *root;
795 int i;
796 unsigned long flags;
797 struct context_entry *context;
798
799 spin_lock_irqsave(&iommu->lock, flags);
800 if (!iommu->root_entry) {
801 goto out;
802 }
803 for (i = 0; i < ROOT_ENTRY_NR; i++) {
804 root = &iommu->root_entry[i];
805 context = get_context_addr_from_root(root);
806 if (context)
807 free_pgtable_page(context);
808 }
809 free_pgtable_page(iommu->root_entry);
810 iommu->root_entry = NULL;
811out:
812 spin_unlock_irqrestore(&iommu->lock, flags);
813}
814
b026fd28 815static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 816 unsigned long pfn, int *target_level)
ba395927 817{
b026fd28 818 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
819 struct dma_pte *parent, *pte = NULL;
820 int level = agaw_to_level(domain->agaw);
4399c8bf 821 int offset;
ba395927
KA
822
823 BUG_ON(!domain->pgd);
f9423606
JS
824
825 if (addr_width < BITS_PER_LONG && pfn >> addr_width)
826 /* Address beyond IOMMU's addressing capabilities. */
827 return NULL;
828
ba395927
KA
829 parent = domain->pgd;
830
5cf0a76f 831 while (1) {
ba395927
KA
832 void *tmp_page;
833
b026fd28 834 offset = pfn_level_offset(pfn, level);
ba395927 835 pte = &parent[offset];
5cf0a76f 836 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 837 break;
5cf0a76f 838 if (level == *target_level)
ba395927
KA
839 break;
840
19c239ce 841 if (!dma_pte_present(pte)) {
c85994e4
DW
842 uint64_t pteval;
843
4c923d47 844 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 845
206a73c1 846 if (!tmp_page)
ba395927 847 return NULL;
206a73c1 848
c85994e4 849 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 850 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 851 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
852 /* Someone else set it while we were thinking; use theirs. */
853 free_pgtable_page(tmp_page);
effad4b5 854 else
c85994e4 855 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 856 }
5cf0a76f
DW
857 if (level == 1)
858 break;
859
19c239ce 860 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
861 level--;
862 }
863
5cf0a76f
DW
864 if (!*target_level)
865 *target_level = level;
866
ba395927
KA
867 return pte;
868}
869
6dd9a7c7 870
ba395927 871/* return address's pte at specific level */
90dcfb5e
DW
872static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
873 unsigned long pfn,
6dd9a7c7 874 int level, int *large_page)
ba395927
KA
875{
876 struct dma_pte *parent, *pte = NULL;
877 int total = agaw_to_level(domain->agaw);
878 int offset;
879
880 parent = domain->pgd;
881 while (level <= total) {
90dcfb5e 882 offset = pfn_level_offset(pfn, total);
ba395927
KA
883 pte = &parent[offset];
884 if (level == total)
885 return pte;
886
6dd9a7c7
YS
887 if (!dma_pte_present(pte)) {
888 *large_page = total;
ba395927 889 break;
6dd9a7c7
YS
890 }
891
e16922af 892 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
893 *large_page = total;
894 return pte;
895 }
896
19c239ce 897 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
898 total--;
899 }
900 return NULL;
901}
902
ba395927 903/* clear last level pte, a tlb flush should be followed */
5cf0a76f 904static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
905 unsigned long start_pfn,
906 unsigned long last_pfn)
ba395927 907{
04b18e65 908 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
6dd9a7c7 909 unsigned int large_page = 1;
310a5ab9 910 struct dma_pte *first_pte, *pte;
66eae846 911
04b18e65 912 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 913 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 914 BUG_ON(start_pfn > last_pfn);
ba395927 915
04b18e65 916 /* we don't need lock here; nobody else touches the iova range */
59c36286 917 do {
6dd9a7c7
YS
918 large_page = 1;
919 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 920 if (!pte) {
6dd9a7c7 921 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
922 continue;
923 }
6dd9a7c7 924 do {
310a5ab9 925 dma_clear_pte(pte);
6dd9a7c7 926 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 927 pte++;
75e6bf96
DW
928 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
929
310a5ab9
DW
930 domain_flush_cache(domain, first_pte,
931 (void *)pte - (void *)first_pte);
59c36286
DW
932
933 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
934}
935
3269ee0b
AW
936static void dma_pte_free_level(struct dmar_domain *domain, int level,
937 struct dma_pte *pte, unsigned long pfn,
938 unsigned long start_pfn, unsigned long last_pfn)
939{
940 pfn = max(start_pfn, pfn);
941 pte = &pte[pfn_level_offset(pfn, level)];
942
943 do {
944 unsigned long level_pfn;
945 struct dma_pte *level_pte;
946
947 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
948 goto next;
949
950 level_pfn = pfn & level_mask(level - 1);
951 level_pte = phys_to_virt(dma_pte_addr(pte));
952
953 if (level > 2)
954 dma_pte_free_level(domain, level - 1, level_pte,
955 level_pfn, start_pfn, last_pfn);
956
957 /* If range covers entire pagetable, free it */
958 if (!(start_pfn > level_pfn ||
08336fd2 959 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
960 dma_clear_pte(pte);
961 domain_flush_cache(domain, pte, sizeof(*pte));
962 free_pgtable_page(level_pte);
963 }
964next:
965 pfn += level_size(level);
966 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
967}
968
ba395927
KA
969/* free page table pages. last level pte should already be cleared */
970static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
971 unsigned long start_pfn,
972 unsigned long last_pfn)
ba395927 973{
6660c63a 974 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927 975
6660c63a
DW
976 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
977 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 978 BUG_ON(start_pfn > last_pfn);
ba395927 979
f3a0a52f 980 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
981 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
982 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 983
ba395927 984 /* free pgd */
d794dc9b 985 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
986 free_pgtable_page(domain->pgd);
987 domain->pgd = NULL;
988 }
989}
990
ea8ea460
DW
991/* When a page at a given level is being unlinked from its parent, we don't
992 need to *modify* it at all. All we need to do is make a list of all the
993 pages which can be freed just as soon as we've flushed the IOTLB and we
994 know the hardware page-walk will no longer touch them.
995 The 'pte' argument is the *parent* PTE, pointing to the page that is to
996 be freed. */
997static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
998 int level, struct dma_pte *pte,
999 struct page *freelist)
1000{
1001 struct page *pg;
1002
1003 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1004 pg->freelist = freelist;
1005 freelist = pg;
1006
1007 if (level == 1)
1008 return freelist;
1009
adeb2590
JL
1010 pte = page_address(pg);
1011 do {
ea8ea460
DW
1012 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1013 freelist = dma_pte_list_pagetables(domain, level - 1,
1014 pte, freelist);
adeb2590
JL
1015 pte++;
1016 } while (!first_pte_in_page(pte));
ea8ea460
DW
1017
1018 return freelist;
1019}
1020
1021static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1022 struct dma_pte *pte, unsigned long pfn,
1023 unsigned long start_pfn,
1024 unsigned long last_pfn,
1025 struct page *freelist)
1026{
1027 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1028
1029 pfn = max(start_pfn, pfn);
1030 pte = &pte[pfn_level_offset(pfn, level)];
1031
1032 do {
1033 unsigned long level_pfn;
1034
1035 if (!dma_pte_present(pte))
1036 goto next;
1037
1038 level_pfn = pfn & level_mask(level);
1039
1040 /* If range covers entire pagetable, free it */
1041 if (start_pfn <= level_pfn &&
1042 last_pfn >= level_pfn + level_size(level) - 1) {
1043 /* These suborbinate page tables are going away entirely. Don't
1044 bother to clear them; we're just going to *free* them. */
1045 if (level > 1 && !dma_pte_superpage(pte))
1046 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1047
1048 dma_clear_pte(pte);
1049 if (!first_pte)
1050 first_pte = pte;
1051 last_pte = pte;
1052 } else if (level > 1) {
1053 /* Recurse down into a level that isn't *entirely* obsolete */
1054 freelist = dma_pte_clear_level(domain, level - 1,
1055 phys_to_virt(dma_pte_addr(pte)),
1056 level_pfn, start_pfn, last_pfn,
1057 freelist);
1058 }
1059next:
1060 pfn += level_size(level);
1061 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1062
1063 if (first_pte)
1064 domain_flush_cache(domain, first_pte,
1065 (void *)++last_pte - (void *)first_pte);
1066
1067 return freelist;
1068}
1069
1070/* We can't just free the pages because the IOMMU may still be walking
1071 the page tables, and may have cached the intermediate levels. The
1072 pages can only be freed after the IOTLB flush has been done. */
1073struct page *domain_unmap(struct dmar_domain *domain,
1074 unsigned long start_pfn,
1075 unsigned long last_pfn)
1076{
1077 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1078 struct page *freelist = NULL;
1079
1080 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
1081 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
1082 BUG_ON(start_pfn > last_pfn);
1083
1084 /* we don't need lock here; nobody else touches the iova range */
1085 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1086 domain->pgd, 0, start_pfn, last_pfn, NULL);
1087
1088 /* free pgd */
1089 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1090 struct page *pgd_page = virt_to_page(domain->pgd);
1091 pgd_page->freelist = freelist;
1092 freelist = pgd_page;
1093
1094 domain->pgd = NULL;
1095 }
1096
1097 return freelist;
1098}
1099
1100void dma_free_pagelist(struct page *freelist)
1101{
1102 struct page *pg;
1103
1104 while ((pg = freelist)) {
1105 freelist = pg->freelist;
1106 free_pgtable_page(page_address(pg));
1107 }
1108}
1109
ba395927
KA
1110/* iommu handling */
1111static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1112{
1113 struct root_entry *root;
1114 unsigned long flags;
1115
4c923d47 1116 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
1117 if (!root)
1118 return -ENOMEM;
1119
5b6985ce 1120 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1121
1122 spin_lock_irqsave(&iommu->lock, flags);
1123 iommu->root_entry = root;
1124 spin_unlock_irqrestore(&iommu->lock, flags);
1125
1126 return 0;
1127}
1128
ba395927
KA
1129static void iommu_set_root_entry(struct intel_iommu *iommu)
1130{
1131 void *addr;
c416daa9 1132 u32 sts;
ba395927
KA
1133 unsigned long flag;
1134
1135 addr = iommu->root_entry;
1136
1f5b3c3f 1137 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1138 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1139
c416daa9 1140 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1141
1142 /* Make sure hardware complete it */
1143 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1144 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1145
1f5b3c3f 1146 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1147}
1148
1149static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1150{
1151 u32 val;
1152 unsigned long flag;
1153
9af88143 1154 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1155 return;
ba395927 1156
1f5b3c3f 1157 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1158 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1159
1160 /* Make sure hardware complete it */
1161 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1162 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1163
1f5b3c3f 1164 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1165}
1166
1167/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1168static void __iommu_flush_context(struct intel_iommu *iommu,
1169 u16 did, u16 source_id, u8 function_mask,
1170 u64 type)
ba395927
KA
1171{
1172 u64 val = 0;
1173 unsigned long flag;
1174
ba395927
KA
1175 switch (type) {
1176 case DMA_CCMD_GLOBAL_INVL:
1177 val = DMA_CCMD_GLOBAL_INVL;
1178 break;
1179 case DMA_CCMD_DOMAIN_INVL:
1180 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1181 break;
1182 case DMA_CCMD_DEVICE_INVL:
1183 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1184 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1185 break;
1186 default:
1187 BUG();
1188 }
1189 val |= DMA_CCMD_ICC;
1190
1f5b3c3f 1191 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1192 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1193
1194 /* Make sure hardware complete it */
1195 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1196 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1197
1f5b3c3f 1198 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1199}
1200
ba395927 1201/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1202static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1203 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1204{
1205 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1206 u64 val = 0, val_iva = 0;
1207 unsigned long flag;
1208
ba395927
KA
1209 switch (type) {
1210 case DMA_TLB_GLOBAL_FLUSH:
1211 /* global flush doesn't need set IVA_REG */
1212 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1213 break;
1214 case DMA_TLB_DSI_FLUSH:
1215 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1216 break;
1217 case DMA_TLB_PSI_FLUSH:
1218 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1219 /* IH bit is passed in as part of address */
ba395927
KA
1220 val_iva = size_order | addr;
1221 break;
1222 default:
1223 BUG();
1224 }
1225 /* Note: set drain read/write */
1226#if 0
1227 /*
1228 * This is probably to be super secure.. Looks like we can
1229 * ignore it without any impact.
1230 */
1231 if (cap_read_drain(iommu->cap))
1232 val |= DMA_TLB_READ_DRAIN;
1233#endif
1234 if (cap_write_drain(iommu->cap))
1235 val |= DMA_TLB_WRITE_DRAIN;
1236
1f5b3c3f 1237 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1238 /* Note: Only uses first TLB reg currently */
1239 if (val_iva)
1240 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1241 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1242
1243 /* Make sure hardware complete it */
1244 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1245 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1246
1f5b3c3f 1247 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1248
1249 /* check IOTLB invalidation granularity */
1250 if (DMA_TLB_IAIG(val) == 0)
1251 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1252 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1253 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1254 (unsigned long long)DMA_TLB_IIRG(type),
1255 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1256}
1257
64ae892b
DW
1258static struct device_domain_info *
1259iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1260 u8 bus, u8 devfn)
93a23a72
YZ
1261{
1262 int found = 0;
1263 unsigned long flags;
1264 struct device_domain_info *info;
0bcb3e28 1265 struct pci_dev *pdev;
93a23a72
YZ
1266
1267 if (!ecap_dev_iotlb_support(iommu->ecap))
1268 return NULL;
1269
1270 if (!iommu->qi)
1271 return NULL;
1272
1273 spin_lock_irqsave(&device_domain_lock, flags);
1274 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1275 if (info->iommu == iommu && info->bus == bus &&
1276 info->devfn == devfn) {
93a23a72
YZ
1277 found = 1;
1278 break;
1279 }
1280 spin_unlock_irqrestore(&device_domain_lock, flags);
1281
0bcb3e28 1282 if (!found || !info->dev || !dev_is_pci(info->dev))
93a23a72
YZ
1283 return NULL;
1284
0bcb3e28
DW
1285 pdev = to_pci_dev(info->dev);
1286
1287 if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
93a23a72
YZ
1288 return NULL;
1289
0bcb3e28 1290 if (!dmar_find_matched_atsr_unit(pdev))
93a23a72
YZ
1291 return NULL;
1292
93a23a72
YZ
1293 return info;
1294}
1295
1296static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1297{
0bcb3e28 1298 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1299 return;
1300
0bcb3e28 1301 pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
93a23a72
YZ
1302}
1303
1304static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1305{
0bcb3e28
DW
1306 if (!info->dev || !dev_is_pci(info->dev) ||
1307 !pci_ats_enabled(to_pci_dev(info->dev)))
93a23a72
YZ
1308 return;
1309
0bcb3e28 1310 pci_disable_ats(to_pci_dev(info->dev));
93a23a72
YZ
1311}
1312
1313static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1314 u64 addr, unsigned mask)
1315{
1316 u16 sid, qdep;
1317 unsigned long flags;
1318 struct device_domain_info *info;
1319
1320 spin_lock_irqsave(&device_domain_lock, flags);
1321 list_for_each_entry(info, &domain->devices, link) {
0bcb3e28
DW
1322 struct pci_dev *pdev;
1323 if (!info->dev || !dev_is_pci(info->dev))
1324 continue;
1325
1326 pdev = to_pci_dev(info->dev);
1327 if (!pci_ats_enabled(pdev))
93a23a72
YZ
1328 continue;
1329
1330 sid = info->bus << 8 | info->devfn;
0bcb3e28 1331 qdep = pci_ats_queue_depth(pdev);
93a23a72
YZ
1332 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1333 }
1334 spin_unlock_irqrestore(&device_domain_lock, flags);
1335}
1336
1f0ef2aa 1337static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
ea8ea460 1338 unsigned long pfn, unsigned int pages, int ih, int map)
ba395927 1339{
9dd2fe89 1340 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1341 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1342
ba395927
KA
1343 BUG_ON(pages == 0);
1344
ea8ea460
DW
1345 if (ih)
1346 ih = 1 << 6;
ba395927 1347 /*
9dd2fe89
YZ
1348 * Fallback to domain selective flush if no PSI support or the size is
1349 * too big.
ba395927
KA
1350 * PSI requires page size to be 2 ^ x, and the base address is naturally
1351 * aligned to the size
1352 */
9dd2fe89
YZ
1353 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1354 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1355 DMA_TLB_DSI_FLUSH);
9dd2fe89 1356 else
ea8ea460 1357 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1358 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1359
1360 /*
82653633
NA
1361 * In caching mode, changes of pages from non-present to present require
1362 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1363 */
82653633 1364 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1365 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1366}
1367
f8bab735 1368static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1369{
1370 u32 pmen;
1371 unsigned long flags;
1372
1f5b3c3f 1373 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1374 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1375 pmen &= ~DMA_PMEN_EPM;
1376 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1377
1378 /* wait for the protected region status bit to clear */
1379 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1380 readl, !(pmen & DMA_PMEN_PRS), pmen);
1381
1f5b3c3f 1382 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1383}
1384
ba395927
KA
1385static int iommu_enable_translation(struct intel_iommu *iommu)
1386{
1387 u32 sts;
1388 unsigned long flags;
1389
1f5b3c3f 1390 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1391 iommu->gcmd |= DMA_GCMD_TE;
1392 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1393
1394 /* Make sure hardware complete it */
1395 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1396 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1397
1f5b3c3f 1398 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1399 return 0;
1400}
1401
1402static int iommu_disable_translation(struct intel_iommu *iommu)
1403{
1404 u32 sts;
1405 unsigned long flag;
1406
1f5b3c3f 1407 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1408 iommu->gcmd &= ~DMA_GCMD_TE;
1409 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1410
1411 /* Make sure hardware complete it */
1412 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1413 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1414
1f5b3c3f 1415 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1416 return 0;
1417}
1418
3460a6d9 1419
ba395927
KA
1420static int iommu_init_domains(struct intel_iommu *iommu)
1421{
1422 unsigned long ndomains;
1423 unsigned long nlongs;
1424
1425 ndomains = cap_ndoms(iommu->cap);
852bdb04
JL
1426 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1427 iommu->seq_id, ndomains);
ba395927
KA
1428 nlongs = BITS_TO_LONGS(ndomains);
1429
94a91b50
DD
1430 spin_lock_init(&iommu->lock);
1431
ba395927
KA
1432 /* TBD: there might be 64K domains,
1433 * consider other allocation for future chip
1434 */
1435 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1436 if (!iommu->domain_ids) {
852bdb04
JL
1437 pr_err("IOMMU%d: allocating domain id array failed\n",
1438 iommu->seq_id);
ba395927
KA
1439 return -ENOMEM;
1440 }
1441 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1442 GFP_KERNEL);
1443 if (!iommu->domains) {
852bdb04
JL
1444 pr_err("IOMMU%d: allocating domain array failed\n",
1445 iommu->seq_id);
1446 kfree(iommu->domain_ids);
1447 iommu->domain_ids = NULL;
ba395927
KA
1448 return -ENOMEM;
1449 }
1450
1451 /*
1452 * if Caching mode is set, then invalid translations are tagged
1453 * with domainid 0. Hence we need to pre-allocate it.
1454 */
1455 if (cap_caching_mode(iommu->cap))
1456 set_bit(0, iommu->domain_ids);
1457 return 0;
1458}
ba395927 1459
a868e6b7 1460static void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1461{
1462 struct dmar_domain *domain;
5ced12af 1463 int i, count;
c7151a8d 1464 unsigned long flags;
ba395927 1465
94a91b50 1466 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1467 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
a4eaa86c
JL
1468 /*
1469 * Domain id 0 is reserved for invalid translation
1470 * if hardware supports caching mode.
1471 */
1472 if (cap_caching_mode(iommu->cap) && i == 0)
1473 continue;
1474
94a91b50
DD
1475 domain = iommu->domains[i];
1476 clear_bit(i, iommu->domain_ids);
1477
1478 spin_lock_irqsave(&domain->iommu_lock, flags);
5ced12af
JL
1479 count = --domain->iommu_count;
1480 spin_unlock_irqrestore(&domain->iommu_lock, flags);
92d03cc8
JL
1481 if (count == 0)
1482 domain_exit(domain);
5e98c4b1 1483 }
ba395927
KA
1484 }
1485
1486 if (iommu->gcmd & DMA_GCMD_TE)
1487 iommu_disable_translation(iommu);
1488
ba395927
KA
1489 kfree(iommu->domains);
1490 kfree(iommu->domain_ids);
a868e6b7
JL
1491 iommu->domains = NULL;
1492 iommu->domain_ids = NULL;
ba395927 1493
d9630fe9
WH
1494 g_iommus[iommu->seq_id] = NULL;
1495
ba395927
KA
1496 /* free context mapping */
1497 free_context_table(iommu);
ba395927
KA
1498}
1499
92d03cc8 1500static struct dmar_domain *alloc_domain(bool vm)
ba395927 1501{
92d03cc8
JL
1502 /* domain id for virtual machine, it won't be set in context */
1503 static atomic_t vm_domid = ATOMIC_INIT(0);
ba395927 1504 struct dmar_domain *domain;
ba395927
KA
1505
1506 domain = alloc_domain_mem();
1507 if (!domain)
1508 return NULL;
1509
4c923d47 1510 domain->nid = -1;
92d03cc8 1511 domain->iommu_count = 0;
1b198bb0 1512 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
2c2e2c38 1513 domain->flags = 0;
92d03cc8
JL
1514 spin_lock_init(&domain->iommu_lock);
1515 INIT_LIST_HEAD(&domain->devices);
1516 if (vm) {
1517 domain->id = atomic_inc_return(&vm_domid);
1518 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
1519 }
2c2e2c38
FY
1520
1521 return domain;
1522}
1523
1524static int iommu_attach_domain(struct dmar_domain *domain,
1525 struct intel_iommu *iommu)
1526{
1527 int num;
1528 unsigned long ndomains;
1529 unsigned long flags;
1530
ba395927
KA
1531 ndomains = cap_ndoms(iommu->cap);
1532
1533 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1534
ba395927
KA
1535 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1536 if (num >= ndomains) {
1537 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1538 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1539 return -ENOMEM;
ba395927
KA
1540 }
1541
ba395927 1542 domain->id = num;
9ebd682e 1543 domain->iommu_count++;
2c2e2c38 1544 set_bit(num, iommu->domain_ids);
1b198bb0 1545 set_bit(iommu->seq_id, domain->iommu_bmp);
ba395927
KA
1546 iommu->domains[num] = domain;
1547 spin_unlock_irqrestore(&iommu->lock, flags);
1548
2c2e2c38 1549 return 0;
ba395927
KA
1550}
1551
2c2e2c38
FY
1552static void iommu_detach_domain(struct dmar_domain *domain,
1553 struct intel_iommu *iommu)
ba395927
KA
1554{
1555 unsigned long flags;
2c2e2c38 1556 int num, ndomains;
ba395927 1557
8c11e798 1558 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1559 ndomains = cap_ndoms(iommu->cap);
a45946ab 1560 for_each_set_bit(num, iommu->domain_ids, ndomains) {
2c2e2c38 1561 if (iommu->domains[num] == domain) {
92d03cc8
JL
1562 clear_bit(num, iommu->domain_ids);
1563 iommu->domains[num] = NULL;
2c2e2c38
FY
1564 break;
1565 }
2c2e2c38 1566 }
8c11e798 1567 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1568}
1569
1570static struct iova_domain reserved_iova_list;
8a443df4 1571static struct lock_class_key reserved_rbtree_key;
ba395927 1572
51a63e67 1573static int dmar_init_reserved_ranges(void)
ba395927
KA
1574{
1575 struct pci_dev *pdev = NULL;
1576 struct iova *iova;
1577 int i;
ba395927 1578
f661197e 1579 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1580
8a443df4
MG
1581 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1582 &reserved_rbtree_key);
1583
ba395927
KA
1584 /* IOAPIC ranges shouldn't be accessed by DMA */
1585 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1586 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1587 if (!iova) {
ba395927 1588 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1589 return -ENODEV;
1590 }
ba395927
KA
1591
1592 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1593 for_each_pci_dev(pdev) {
1594 struct resource *r;
1595
1596 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1597 r = &pdev->resource[i];
1598 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1599 continue;
1a4a4551
DW
1600 iova = reserve_iova(&reserved_iova_list,
1601 IOVA_PFN(r->start),
1602 IOVA_PFN(r->end));
51a63e67 1603 if (!iova) {
ba395927 1604 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1605 return -ENODEV;
1606 }
ba395927
KA
1607 }
1608 }
51a63e67 1609 return 0;
ba395927
KA
1610}
1611
1612static void domain_reserve_special_ranges(struct dmar_domain *domain)
1613{
1614 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1615}
1616
1617static inline int guestwidth_to_adjustwidth(int gaw)
1618{
1619 int agaw;
1620 int r = (gaw - 12) % 9;
1621
1622 if (r == 0)
1623 agaw = gaw;
1624 else
1625 agaw = gaw + 9 - r;
1626 if (agaw > 64)
1627 agaw = 64;
1628 return agaw;
1629}
1630
1631static int domain_init(struct dmar_domain *domain, int guest_width)
1632{
1633 struct intel_iommu *iommu;
1634 int adjust_width, agaw;
1635 unsigned long sagaw;
1636
f661197e 1637 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1638 domain_reserve_special_ranges(domain);
1639
1640 /* calculate AGAW */
8c11e798 1641 iommu = domain_get_iommu(domain);
ba395927
KA
1642 if (guest_width > cap_mgaw(iommu->cap))
1643 guest_width = cap_mgaw(iommu->cap);
1644 domain->gaw = guest_width;
1645 adjust_width = guestwidth_to_adjustwidth(guest_width);
1646 agaw = width_to_agaw(adjust_width);
1647 sagaw = cap_sagaw(iommu->cap);
1648 if (!test_bit(agaw, &sagaw)) {
1649 /* hardware doesn't support it, choose a bigger one */
1650 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1651 agaw = find_next_bit(&sagaw, 5, agaw);
1652 if (agaw >= 5)
1653 return -ENODEV;
1654 }
1655 domain->agaw = agaw;
ba395927 1656
8e604097
WH
1657 if (ecap_coherent(iommu->ecap))
1658 domain->iommu_coherency = 1;
1659 else
1660 domain->iommu_coherency = 0;
1661
58c610bd
SY
1662 if (ecap_sc_support(iommu->ecap))
1663 domain->iommu_snooping = 1;
1664 else
1665 domain->iommu_snooping = 0;
1666
214e39aa
DW
1667 if (intel_iommu_superpage)
1668 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1669 else
1670 domain->iommu_superpage = 0;
1671
4c923d47 1672 domain->nid = iommu->node;
c7151a8d 1673
ba395927 1674 /* always allocate the top pgd */
4c923d47 1675 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1676 if (!domain->pgd)
1677 return -ENOMEM;
5b6985ce 1678 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1679 return 0;
1680}
1681
1682static void domain_exit(struct dmar_domain *domain)
1683{
2c2e2c38
FY
1684 struct dmar_drhd_unit *drhd;
1685 struct intel_iommu *iommu;
ea8ea460 1686 struct page *freelist = NULL;
ba395927
KA
1687
1688 /* Domain 0 is reserved, so dont process it */
1689 if (!domain)
1690 return;
1691
7b668357
AW
1692 /* Flush any lazy unmaps that may reference this domain */
1693 if (!intel_iommu_strict)
1694 flush_unmaps_timeout(0);
1695
92d03cc8 1696 /* remove associated devices */
ba395927 1697 domain_remove_dev_info(domain);
92d03cc8 1698
ba395927
KA
1699 /* destroy iovas */
1700 put_iova_domain(&domain->iovad);
ba395927 1701
ea8ea460 1702 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1703
92d03cc8 1704 /* clear attached or cached domains */
0e242612 1705 rcu_read_lock();
2c2e2c38 1706 for_each_active_iommu(iommu, drhd)
92d03cc8
JL
1707 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1708 test_bit(iommu->seq_id, domain->iommu_bmp))
2c2e2c38 1709 iommu_detach_domain(domain, iommu);
0e242612 1710 rcu_read_unlock();
2c2e2c38 1711
ea8ea460
DW
1712 dma_free_pagelist(freelist);
1713
ba395927
KA
1714 free_domain_mem(domain);
1715}
1716
64ae892b
DW
1717static int domain_context_mapping_one(struct dmar_domain *domain,
1718 struct intel_iommu *iommu,
1719 u8 bus, u8 devfn, int translation)
ba395927
KA
1720{
1721 struct context_entry *context;
ba395927 1722 unsigned long flags;
ea6606b0
WH
1723 struct dma_pte *pgd;
1724 unsigned long num;
1725 unsigned long ndomains;
1726 int id;
1727 int agaw;
93a23a72 1728 struct device_domain_info *info = NULL;
ba395927
KA
1729
1730 pr_debug("Set context mapping for %02x:%02x.%d\n",
1731 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1732
ba395927 1733 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1734 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1735 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1736
ba395927
KA
1737 context = device_to_context_entry(iommu, bus, devfn);
1738 if (!context)
1739 return -ENOMEM;
1740 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1741 if (context_present(context)) {
ba395927
KA
1742 spin_unlock_irqrestore(&iommu->lock, flags);
1743 return 0;
1744 }
1745
ea6606b0
WH
1746 id = domain->id;
1747 pgd = domain->pgd;
1748
2c2e2c38
FY
1749 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1750 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1751 int found = 0;
1752
1753 /* find an available domain id for this device in iommu */
1754 ndomains = cap_ndoms(iommu->cap);
a45946ab 1755 for_each_set_bit(num, iommu->domain_ids, ndomains) {
ea6606b0
WH
1756 if (iommu->domains[num] == domain) {
1757 id = num;
1758 found = 1;
1759 break;
1760 }
ea6606b0
WH
1761 }
1762
1763 if (found == 0) {
1764 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1765 if (num >= ndomains) {
1766 spin_unlock_irqrestore(&iommu->lock, flags);
1767 printk(KERN_ERR "IOMMU: no free domain ids\n");
1768 return -EFAULT;
1769 }
1770
1771 set_bit(num, iommu->domain_ids);
1772 iommu->domains[num] = domain;
1773 id = num;
1774 }
1775
1776 /* Skip top levels of page tables for
1777 * iommu which has less agaw than default.
1672af11 1778 * Unnecessary for PT mode.
ea6606b0 1779 */
1672af11
CW
1780 if (translation != CONTEXT_TT_PASS_THROUGH) {
1781 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1782 pgd = phys_to_virt(dma_pte_addr(pgd));
1783 if (!dma_pte_present(pgd)) {
1784 spin_unlock_irqrestore(&iommu->lock, flags);
1785 return -ENOMEM;
1786 }
ea6606b0
WH
1787 }
1788 }
1789 }
1790
1791 context_set_domain_id(context, id);
4ed0d3e6 1792
93a23a72 1793 if (translation != CONTEXT_TT_PASS_THROUGH) {
64ae892b 1794 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
93a23a72
YZ
1795 translation = info ? CONTEXT_TT_DEV_IOTLB :
1796 CONTEXT_TT_MULTI_LEVEL;
1797 }
4ed0d3e6
FY
1798 /*
1799 * In pass through mode, AW must be programmed to indicate the largest
1800 * AGAW value supported by hardware. And ASR is ignored by hardware.
1801 */
93a23a72 1802 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1803 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1804 else {
1805 context_set_address_root(context, virt_to_phys(pgd));
1806 context_set_address_width(context, iommu->agaw);
1807 }
4ed0d3e6
FY
1808
1809 context_set_translation_type(context, translation);
c07e7d21
MM
1810 context_set_fault_enable(context);
1811 context_set_present(context);
5331fe6f 1812 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1813
4c25a2c1
DW
1814 /*
1815 * It's a non-present to present mapping. If hardware doesn't cache
1816 * non-present entry we only need to flush the write-buffer. If the
1817 * _does_ cache non-present entries, then it does so in the special
1818 * domain #0, which we have to flush:
1819 */
1820 if (cap_caching_mode(iommu->cap)) {
1821 iommu->flush.flush_context(iommu, 0,
1822 (((u16)bus) << 8) | devfn,
1823 DMA_CCMD_MASK_NOBIT,
1824 DMA_CCMD_DEVICE_INVL);
18fd779a 1825 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1826 } else {
ba395927 1827 iommu_flush_write_buffer(iommu);
4c25a2c1 1828 }
93a23a72 1829 iommu_enable_dev_iotlb(info);
ba395927 1830 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1831
1832 spin_lock_irqsave(&domain->iommu_lock, flags);
1b198bb0 1833 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
c7151a8d 1834 domain->iommu_count++;
4c923d47
SS
1835 if (domain->iommu_count == 1)
1836 domain->nid = iommu->node;
58c610bd 1837 domain_update_iommu_cap(domain);
c7151a8d
WH
1838 }
1839 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1840 return 0;
1841}
1842
579305f7
AW
1843struct domain_context_mapping_data {
1844 struct dmar_domain *domain;
1845 struct intel_iommu *iommu;
1846 int translation;
1847};
1848
1849static int domain_context_mapping_cb(struct pci_dev *pdev,
1850 u16 alias, void *opaque)
1851{
1852 struct domain_context_mapping_data *data = opaque;
1853
1854 return domain_context_mapping_one(data->domain, data->iommu,
1855 PCI_BUS_NUM(alias), alias & 0xff,
1856 data->translation);
1857}
1858
ba395927 1859static int
e1f167f3
DW
1860domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1861 int translation)
ba395927 1862{
64ae892b 1863 struct intel_iommu *iommu;
156baca8 1864 u8 bus, devfn;
579305f7 1865 struct domain_context_mapping_data data;
64ae892b 1866
e1f167f3 1867 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
1868 if (!iommu)
1869 return -ENODEV;
ba395927 1870
579305f7
AW
1871 if (!dev_is_pci(dev))
1872 return domain_context_mapping_one(domain, iommu, bus, devfn,
4ed0d3e6 1873 translation);
579305f7
AW
1874
1875 data.domain = domain;
1876 data.iommu = iommu;
1877 data.translation = translation;
1878
1879 return pci_for_each_dma_alias(to_pci_dev(dev),
1880 &domain_context_mapping_cb, &data);
1881}
1882
1883static int domain_context_mapped_cb(struct pci_dev *pdev,
1884 u16 alias, void *opaque)
1885{
1886 struct intel_iommu *iommu = opaque;
1887
1888 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
1889}
1890
e1f167f3 1891static int domain_context_mapped(struct device *dev)
ba395927 1892{
5331fe6f 1893 struct intel_iommu *iommu;
156baca8 1894 u8 bus, devfn;
5331fe6f 1895
e1f167f3 1896 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
1897 if (!iommu)
1898 return -ENODEV;
ba395927 1899
579305f7
AW
1900 if (!dev_is_pci(dev))
1901 return device_context_mapped(iommu, bus, devfn);
e1f167f3 1902
579305f7
AW
1903 return !pci_for_each_dma_alias(to_pci_dev(dev),
1904 domain_context_mapped_cb, iommu);
ba395927
KA
1905}
1906
f532959b
FY
1907/* Returns a number of VTD pages, but aligned to MM page size */
1908static inline unsigned long aligned_nrpages(unsigned long host_addr,
1909 size_t size)
1910{
1911 host_addr &= ~PAGE_MASK;
1912 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1913}
1914
6dd9a7c7
YS
1915/* Return largest possible superpage level for a given mapping */
1916static inline int hardware_largepage_caps(struct dmar_domain *domain,
1917 unsigned long iov_pfn,
1918 unsigned long phy_pfn,
1919 unsigned long pages)
1920{
1921 int support, level = 1;
1922 unsigned long pfnmerge;
1923
1924 support = domain->iommu_superpage;
1925
1926 /* To use a large page, the virtual *and* physical addresses
1927 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1928 of them will mean we have to use smaller pages. So just
1929 merge them and check both at once. */
1930 pfnmerge = iov_pfn | phy_pfn;
1931
1932 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1933 pages >>= VTD_STRIDE_SHIFT;
1934 if (!pages)
1935 break;
1936 pfnmerge >>= VTD_STRIDE_SHIFT;
1937 level++;
1938 support--;
1939 }
1940 return level;
1941}
1942
9051aa02
DW
1943static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1944 struct scatterlist *sg, unsigned long phys_pfn,
1945 unsigned long nr_pages, int prot)
e1605495
DW
1946{
1947 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1948 phys_addr_t uninitialized_var(pteval);
e1605495 1949 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1950 unsigned long sg_res;
6dd9a7c7
YS
1951 unsigned int largepage_lvl = 0;
1952 unsigned long lvl_pages = 0;
e1605495
DW
1953
1954 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1955
1956 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1957 return -EINVAL;
1958
1959 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1960
9051aa02
DW
1961 if (sg)
1962 sg_res = 0;
1963 else {
1964 sg_res = nr_pages + 1;
1965 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1966 }
1967
6dd9a7c7 1968 while (nr_pages > 0) {
c85994e4
DW
1969 uint64_t tmp;
1970
e1605495 1971 if (!sg_res) {
f532959b 1972 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1973 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1974 sg->dma_length = sg->length;
1975 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 1976 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 1977 }
6dd9a7c7 1978
e1605495 1979 if (!pte) {
6dd9a7c7
YS
1980 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1981
5cf0a76f 1982 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
1983 if (!pte)
1984 return -ENOMEM;
6dd9a7c7 1985 /* It is large page*/
6491d4d0 1986 if (largepage_lvl > 1) {
6dd9a7c7 1987 pteval |= DMA_PTE_LARGE_PAGE;
6491d4d0
WD
1988 /* Ensure that old small page tables are removed to make room
1989 for superpage, if they exist. */
1990 dma_pte_clear_range(domain, iov_pfn,
1991 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1992 dma_pte_free_pagetable(domain, iov_pfn,
1993 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1994 } else {
6dd9a7c7 1995 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 1996 }
6dd9a7c7 1997
e1605495
DW
1998 }
1999 /* We don't need lock here, nobody else
2000 * touches the iova range
2001 */
7766a3fb 2002 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2003 if (tmp) {
1bf20f0d 2004 static int dumps = 5;
c85994e4
DW
2005 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2006 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2007 if (dumps) {
2008 dumps--;
2009 debug_dma_dump_mappings(NULL);
2010 }
2011 WARN_ON(1);
2012 }
6dd9a7c7
YS
2013
2014 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2015
2016 BUG_ON(nr_pages < lvl_pages);
2017 BUG_ON(sg_res < lvl_pages);
2018
2019 nr_pages -= lvl_pages;
2020 iov_pfn += lvl_pages;
2021 phys_pfn += lvl_pages;
2022 pteval += lvl_pages * VTD_PAGE_SIZE;
2023 sg_res -= lvl_pages;
2024
2025 /* If the next PTE would be the first in a new page, then we
2026 need to flush the cache on the entries we've just written.
2027 And then we'll need to recalculate 'pte', so clear it and
2028 let it get set again in the if (!pte) block above.
2029
2030 If we're done (!nr_pages) we need to flush the cache too.
2031
2032 Also if we've been setting superpages, we may need to
2033 recalculate 'pte' and switch back to smaller pages for the
2034 end of the mapping, if the trailing size is not enough to
2035 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2036 pte++;
6dd9a7c7
YS
2037 if (!nr_pages || first_pte_in_page(pte) ||
2038 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2039 domain_flush_cache(domain, first_pte,
2040 (void *)pte - (void *)first_pte);
2041 pte = NULL;
2042 }
6dd9a7c7
YS
2043
2044 if (!sg_res && nr_pages)
e1605495
DW
2045 sg = sg_next(sg);
2046 }
2047 return 0;
2048}
2049
9051aa02
DW
2050static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2051 struct scatterlist *sg, unsigned long nr_pages,
2052 int prot)
ba395927 2053{
9051aa02
DW
2054 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2055}
6f6a00e4 2056
9051aa02
DW
2057static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2058 unsigned long phys_pfn, unsigned long nr_pages,
2059 int prot)
2060{
2061 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2062}
2063
c7151a8d 2064static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2065{
c7151a8d
WH
2066 if (!iommu)
2067 return;
8c11e798
WH
2068
2069 clear_context_table(iommu, bus, devfn);
2070 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2071 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2072 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2073}
2074
109b9b04
DW
2075static inline void unlink_domain_info(struct device_domain_info *info)
2076{
2077 assert_spin_locked(&device_domain_lock);
2078 list_del(&info->link);
2079 list_del(&info->global);
2080 if (info->dev)
0bcb3e28 2081 info->dev->archdata.iommu = NULL;
109b9b04
DW
2082}
2083
ba395927
KA
2084static void domain_remove_dev_info(struct dmar_domain *domain)
2085{
3a74ca01 2086 struct device_domain_info *info, *tmp;
92d03cc8 2087 unsigned long flags, flags2;
ba395927
KA
2088
2089 spin_lock_irqsave(&device_domain_lock, flags);
3a74ca01 2090 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
109b9b04 2091 unlink_domain_info(info);
ba395927
KA
2092 spin_unlock_irqrestore(&device_domain_lock, flags);
2093
93a23a72 2094 iommu_disable_dev_iotlb(info);
7c7faa11 2095 iommu_detach_dev(info->iommu, info->bus, info->devfn);
ba395927 2096
92d03cc8 2097 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
7c7faa11 2098 iommu_detach_dependent_devices(info->iommu, info->dev);
92d03cc8
JL
2099 /* clear this iommu in iommu_bmp, update iommu count
2100 * and capabilities
2101 */
2102 spin_lock_irqsave(&domain->iommu_lock, flags2);
7c7faa11 2103 if (test_and_clear_bit(info->iommu->seq_id,
92d03cc8
JL
2104 domain->iommu_bmp)) {
2105 domain->iommu_count--;
2106 domain_update_iommu_cap(domain);
2107 }
2108 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2109 }
2110
2111 free_devinfo_mem(info);
ba395927
KA
2112 spin_lock_irqsave(&device_domain_lock, flags);
2113 }
2114 spin_unlock_irqrestore(&device_domain_lock, flags);
2115}
2116
2117/*
2118 * find_domain
1525a29a 2119 * Note: we use struct device->archdata.iommu stores the info
ba395927 2120 */
1525a29a 2121static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2122{
2123 struct device_domain_info *info;
2124
2125 /* No lock here, assumes no domain exit in normal case */
1525a29a 2126 info = dev->archdata.iommu;
ba395927
KA
2127 if (info)
2128 return info->domain;
2129 return NULL;
2130}
2131
5a8f40e8 2132static inline struct device_domain_info *
745f2586
JL
2133dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2134{
2135 struct device_domain_info *info;
2136
2137 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2138 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2139 info->devfn == devfn)
5a8f40e8 2140 return info;
745f2586
JL
2141
2142 return NULL;
2143}
2144
5a8f40e8 2145static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
41e80dca 2146 int bus, int devfn,
b718cd3d
DW
2147 struct device *dev,
2148 struct dmar_domain *domain)
745f2586 2149{
5a8f40e8 2150 struct dmar_domain *found = NULL;
745f2586
JL
2151 struct device_domain_info *info;
2152 unsigned long flags;
2153
2154 info = alloc_devinfo_mem();
2155 if (!info)
b718cd3d 2156 return NULL;
745f2586 2157
745f2586
JL
2158 info->bus = bus;
2159 info->devfn = devfn;
2160 info->dev = dev;
2161 info->domain = domain;
5a8f40e8 2162 info->iommu = iommu;
745f2586
JL
2163 if (!dev)
2164 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2165
2166 spin_lock_irqsave(&device_domain_lock, flags);
2167 if (dev)
0bcb3e28 2168 found = find_domain(dev);
5a8f40e8
DW
2169 else {
2170 struct device_domain_info *info2;
41e80dca 2171 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
5a8f40e8
DW
2172 if (info2)
2173 found = info2->domain;
2174 }
745f2586
JL
2175 if (found) {
2176 spin_unlock_irqrestore(&device_domain_lock, flags);
2177 free_devinfo_mem(info);
b718cd3d
DW
2178 /* Caller must free the original domain */
2179 return found;
745f2586
JL
2180 }
2181
b718cd3d
DW
2182 list_add(&info->link, &domain->devices);
2183 list_add(&info->global, &device_domain_list);
2184 if (dev)
2185 dev->archdata.iommu = info;
2186 spin_unlock_irqrestore(&device_domain_lock, flags);
2187
2188 return domain;
745f2586
JL
2189}
2190
579305f7
AW
2191static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2192{
2193 *(u16 *)opaque = alias;
2194 return 0;
2195}
2196
ba395927 2197/* domain is initialized */
146922ec 2198static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
ba395927 2199{
579305f7
AW
2200 struct dmar_domain *domain, *tmp;
2201 struct intel_iommu *iommu;
5a8f40e8 2202 struct device_domain_info *info;
579305f7 2203 u16 dma_alias;
ba395927 2204 unsigned long flags;
aa4d066a 2205 u8 bus, devfn;
ba395927 2206
146922ec 2207 domain = find_domain(dev);
ba395927
KA
2208 if (domain)
2209 return domain;
2210
579305f7
AW
2211 iommu = device_to_iommu(dev, &bus, &devfn);
2212 if (!iommu)
2213 return NULL;
2214
146922ec
DW
2215 if (dev_is_pci(dev)) {
2216 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2217
579305f7
AW
2218 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2219
2220 spin_lock_irqsave(&device_domain_lock, flags);
2221 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2222 PCI_BUS_NUM(dma_alias),
2223 dma_alias & 0xff);
2224 if (info) {
2225 iommu = info->iommu;
2226 domain = info->domain;
5a8f40e8 2227 }
579305f7 2228 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2229
579305f7
AW
2230 /* DMA alias already has a domain, uses it */
2231 if (info)
2232 goto found_domain;
2233 }
ba395927 2234
146922ec 2235 /* Allocate and initialize new domain for the device */
92d03cc8 2236 domain = alloc_domain(false);
745f2586 2237 if (!domain)
579305f7
AW
2238 return NULL;
2239
745f2586 2240 if (iommu_attach_domain(domain, iommu)) {
2fe9723d 2241 free_domain_mem(domain);
579305f7 2242 return NULL;
2c2e2c38 2243 }
ba395927 2244
579305f7
AW
2245 if (domain_init(domain, gaw)) {
2246 domain_exit(domain);
2247 return NULL;
2c2e2c38 2248 }
ba395927 2249
579305f7
AW
2250 /* register PCI DMA alias device */
2251 if (dev_is_pci(dev)) {
2252 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2253 dma_alias & 0xff, NULL, domain);
2254
2255 if (!tmp || tmp != domain) {
2256 domain_exit(domain);
2257 domain = tmp;
2258 }
2259
b718cd3d 2260 if (!domain)
579305f7 2261 return NULL;
ba395927
KA
2262 }
2263
2264found_domain:
579305f7
AW
2265 tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2266
2267 if (!tmp || tmp != domain) {
2268 domain_exit(domain);
2269 domain = tmp;
2270 }
b718cd3d
DW
2271
2272 return domain;
ba395927
KA
2273}
2274
2c2e2c38 2275static int iommu_identity_mapping;
e0fc7e0b
DW
2276#define IDENTMAP_ALL 1
2277#define IDENTMAP_GFX 2
2278#define IDENTMAP_AZALIA 4
2c2e2c38 2279
b213203e
DW
2280static int iommu_domain_identity_map(struct dmar_domain *domain,
2281 unsigned long long start,
2282 unsigned long long end)
ba395927 2283{
c5395d5c
DW
2284 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2285 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2286
2287 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2288 dma_to_mm_pfn(last_vpfn))) {
ba395927 2289 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2290 return -ENOMEM;
ba395927
KA
2291 }
2292
c5395d5c
DW
2293 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2294 start, end, domain->id);
ba395927
KA
2295 /*
2296 * RMRR range might have overlap with physical memory range,
2297 * clear it first
2298 */
c5395d5c 2299 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2300
c5395d5c
DW
2301 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2302 last_vpfn - first_vpfn + 1,
61df7443 2303 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2304}
2305
0b9d9753 2306static int iommu_prepare_identity_map(struct device *dev,
b213203e
DW
2307 unsigned long long start,
2308 unsigned long long end)
2309{
2310 struct dmar_domain *domain;
2311 int ret;
2312
0b9d9753 2313 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2314 if (!domain)
2315 return -ENOMEM;
2316
19943b0e
DW
2317 /* For _hardware_ passthrough, don't bother. But for software
2318 passthrough, we do it anyway -- it may indicate a memory
2319 range which is reserved in E820, so which didn't get set
2320 up to start with in si_domain */
2321 if (domain == si_domain && hw_pass_through) {
2322 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2323 dev_name(dev), start, end);
19943b0e
DW
2324 return 0;
2325 }
2326
2327 printk(KERN_INFO
2328 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2329 dev_name(dev), start, end);
2ff729f5 2330
5595b528
DW
2331 if (end < start) {
2332 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2333 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2334 dmi_get_system_info(DMI_BIOS_VENDOR),
2335 dmi_get_system_info(DMI_BIOS_VERSION),
2336 dmi_get_system_info(DMI_PRODUCT_VERSION));
2337 ret = -EIO;
2338 goto error;
2339 }
2340
2ff729f5
DW
2341 if (end >> agaw_to_width(domain->agaw)) {
2342 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2343 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2344 agaw_to_width(domain->agaw),
2345 dmi_get_system_info(DMI_BIOS_VENDOR),
2346 dmi_get_system_info(DMI_BIOS_VERSION),
2347 dmi_get_system_info(DMI_PRODUCT_VERSION));
2348 ret = -EIO;
2349 goto error;
2350 }
19943b0e 2351
b213203e 2352 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2353 if (ret)
2354 goto error;
2355
2356 /* context entry init */
0b9d9753 2357 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2358 if (ret)
2359 goto error;
2360
2361 return 0;
2362
2363 error:
ba395927
KA
2364 domain_exit(domain);
2365 return ret;
ba395927
KA
2366}
2367
2368static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2369 struct device *dev)
ba395927 2370{
0b9d9753 2371 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2372 return 0;
0b9d9753
DW
2373 return iommu_prepare_identity_map(dev, rmrr->base_address,
2374 rmrr->end_address);
ba395927
KA
2375}
2376
d3f13810 2377#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2378static inline void iommu_prepare_isa(void)
2379{
2380 struct pci_dev *pdev;
2381 int ret;
2382
2383 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2384 if (!pdev)
2385 return;
2386
c7ab48d2 2387 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2388 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2389
2390 if (ret)
c7ab48d2
DW
2391 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2392 "floppy might not work\n");
49a0429e 2393
9b27e82d 2394 pci_dev_put(pdev);
49a0429e
KA
2395}
2396#else
2397static inline void iommu_prepare_isa(void)
2398{
2399 return;
2400}
d3f13810 2401#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2402
2c2e2c38 2403static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2404
071e1374 2405static int __init si_domain_init(int hw)
2c2e2c38
FY
2406{
2407 struct dmar_drhd_unit *drhd;
2408 struct intel_iommu *iommu;
c7ab48d2 2409 int nid, ret = 0;
2c2e2c38 2410
92d03cc8 2411 si_domain = alloc_domain(false);
2c2e2c38
FY
2412 if (!si_domain)
2413 return -EFAULT;
2414
92d03cc8
JL
2415 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2416
2c2e2c38
FY
2417 for_each_active_iommu(iommu, drhd) {
2418 ret = iommu_attach_domain(si_domain, iommu);
2419 if (ret) {
2420 domain_exit(si_domain);
2421 return -EFAULT;
2422 }
2423 }
2424
2425 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2426 domain_exit(si_domain);
2427 return -EFAULT;
2428 }
2429
9544c003
JL
2430 pr_debug("IOMMU: identity mapping domain is domain %d\n",
2431 si_domain->id);
2c2e2c38 2432
19943b0e
DW
2433 if (hw)
2434 return 0;
2435
c7ab48d2 2436 for_each_online_node(nid) {
5dfe8660
TH
2437 unsigned long start_pfn, end_pfn;
2438 int i;
2439
2440 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2441 ret = iommu_domain_identity_map(si_domain,
2442 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2443 if (ret)
2444 return ret;
2445 }
c7ab48d2
DW
2446 }
2447
2c2e2c38
FY
2448 return 0;
2449}
2450
9b226624 2451static int identity_mapping(struct device *dev)
2c2e2c38
FY
2452{
2453 struct device_domain_info *info;
2454
2455 if (likely(!iommu_identity_mapping))
2456 return 0;
2457
9b226624 2458 info = dev->archdata.iommu;
cb452a40
MT
2459 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2460 return (info->domain == si_domain);
2c2e2c38 2461
2c2e2c38
FY
2462 return 0;
2463}
2464
2465static int domain_add_dev_info(struct dmar_domain *domain,
5913c9bf 2466 struct device *dev, int translation)
2c2e2c38 2467{
0ac72664 2468 struct dmar_domain *ndomain;
5a8f40e8 2469 struct intel_iommu *iommu;
156baca8 2470 u8 bus, devfn;
5fe60f4e 2471 int ret;
2c2e2c38 2472
5913c9bf 2473 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2474 if (!iommu)
2475 return -ENODEV;
2476
5913c9bf 2477 ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2478 if (ndomain != domain)
2479 return -EBUSY;
2c2e2c38 2480
5913c9bf 2481 ret = domain_context_mapping(domain, dev, translation);
e2ad23d0 2482 if (ret) {
5913c9bf 2483 domain_remove_one_dev_info(domain, dev);
e2ad23d0
DW
2484 return ret;
2485 }
2486
2c2e2c38
FY
2487 return 0;
2488}
2489
0b9d9753 2490static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2491{
2492 struct dmar_rmrr_unit *rmrr;
832bd858 2493 struct device *tmp;
ea2447f7
TM
2494 int i;
2495
0e242612 2496 rcu_read_lock();
ea2447f7 2497 for_each_rmrr_units(rmrr) {
b683b230
JL
2498 /*
2499 * Return TRUE if this RMRR contains the device that
2500 * is passed in.
2501 */
2502 for_each_active_dev_scope(rmrr->devices,
2503 rmrr->devices_cnt, i, tmp)
0b9d9753 2504 if (tmp == dev) {
0e242612 2505 rcu_read_unlock();
ea2447f7 2506 return true;
b683b230 2507 }
ea2447f7 2508 }
0e242612 2509 rcu_read_unlock();
ea2447f7
TM
2510 return false;
2511}
2512
3bdb2591 2513static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2514{
ea2447f7 2515
3bdb2591
DW
2516 if (dev_is_pci(dev)) {
2517 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2518
3bdb2591
DW
2519 /*
2520 * We want to prevent any device associated with an RMRR from
2521 * getting placed into the SI Domain. This is done because
2522 * problems exist when devices are moved in and out of domains
2523 * and their respective RMRR info is lost. We exempt USB devices
2524 * from this process due to their usage of RMRRs that are known
2525 * to not be needed after BIOS hand-off to OS.
2526 */
2527 if (device_has_rmrr(dev) &&
2528 (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2529 return 0;
e0fc7e0b 2530
3bdb2591
DW
2531 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2532 return 1;
e0fc7e0b 2533
3bdb2591
DW
2534 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2535 return 1;
6941af28 2536
3bdb2591 2537 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2538 return 0;
3bdb2591
DW
2539
2540 /*
2541 * We want to start off with all devices in the 1:1 domain, and
2542 * take them out later if we find they can't access all of memory.
2543 *
2544 * However, we can't do this for PCI devices behind bridges,
2545 * because all PCI devices behind the same bridge will end up
2546 * with the same source-id on their transactions.
2547 *
2548 * Practically speaking, we can't change things around for these
2549 * devices at run-time, because we can't be sure there'll be no
2550 * DMA transactions in flight for any of their siblings.
2551 *
2552 * So PCI devices (unless they're on the root bus) as well as
2553 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2554 * the 1:1 domain, just in _case_ one of their siblings turns out
2555 * not to be able to map all of memory.
2556 */
2557 if (!pci_is_pcie(pdev)) {
2558 if (!pci_is_root_bus(pdev->bus))
2559 return 0;
2560 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2561 return 0;
2562 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2563 return 0;
3bdb2591
DW
2564 } else {
2565 if (device_has_rmrr(dev))
2566 return 0;
2567 }
3dfc813d 2568
3bdb2591 2569 /*
3dfc813d 2570 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2571 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2572 * take them out of the 1:1 domain later.
2573 */
8fcc5372
CW
2574 if (!startup) {
2575 /*
2576 * If the device's dma_mask is less than the system's memory
2577 * size then this is not a candidate for identity mapping.
2578 */
3bdb2591 2579 u64 dma_mask = *dev->dma_mask;
8fcc5372 2580
3bdb2591
DW
2581 if (dev->coherent_dma_mask &&
2582 dev->coherent_dma_mask < dma_mask)
2583 dma_mask = dev->coherent_dma_mask;
8fcc5372 2584
3bdb2591 2585 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2586 }
6941af28
DW
2587
2588 return 1;
2589}
2590
cf04eee8
DW
2591static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2592{
2593 int ret;
2594
2595 if (!iommu_should_identity_map(dev, 1))
2596 return 0;
2597
2598 ret = domain_add_dev_info(si_domain, dev,
2599 hw ? CONTEXT_TT_PASS_THROUGH :
2600 CONTEXT_TT_MULTI_LEVEL);
2601 if (!ret)
2602 pr_info("IOMMU: %s identity mapping for device %s\n",
2603 hw ? "hardware" : "software", dev_name(dev));
2604 else if (ret == -ENODEV)
2605 /* device not associated with an iommu */
2606 ret = 0;
2607
2608 return ret;
2609}
2610
2611
071e1374 2612static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2613{
2c2e2c38 2614 struct pci_dev *pdev = NULL;
cf04eee8
DW
2615 struct dmar_drhd_unit *drhd;
2616 struct intel_iommu *iommu;
2617 struct device *dev;
2618 int i;
2619 int ret = 0;
2c2e2c38 2620
19943b0e 2621 ret = si_domain_init(hw);
2c2e2c38
FY
2622 if (ret)
2623 return -EFAULT;
2624
2c2e2c38 2625 for_each_pci_dev(pdev) {
cf04eee8
DW
2626 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2627 if (ret)
2628 return ret;
2629 }
2630
2631 for_each_active_iommu(iommu, drhd)
2632 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2633 struct acpi_device_physical_node *pn;
2634 struct acpi_device *adev;
2635
2636 if (dev->bus != &acpi_bus_type)
2637 continue;
2638
2639 adev= to_acpi_device(dev);
2640 mutex_lock(&adev->physical_node_lock);
2641 list_for_each_entry(pn, &adev->physical_node_list, node) {
2642 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2643 if (ret)
2644 break;
eae460b6 2645 }
cf04eee8
DW
2646 mutex_unlock(&adev->physical_node_lock);
2647 if (ret)
2648 return ret;
62edf5dc 2649 }
2c2e2c38
FY
2650
2651 return 0;
2652}
2653
b779260b 2654static int __init init_dmars(void)
ba395927
KA
2655{
2656 struct dmar_drhd_unit *drhd;
2657 struct dmar_rmrr_unit *rmrr;
832bd858 2658 struct device *dev;
ba395927 2659 struct intel_iommu *iommu;
9d783ba0 2660 int i, ret;
2c2e2c38 2661
ba395927
KA
2662 /*
2663 * for each drhd
2664 * allocate root
2665 * initialize and program root entry to not present
2666 * endfor
2667 */
2668 for_each_drhd_unit(drhd) {
5e0d2a6f 2669 /*
2670 * lock not needed as this is only incremented in the single
2671 * threaded kernel __init code path all other access are read
2672 * only
2673 */
1b198bb0
MT
2674 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2675 g_num_of_iommus++;
2676 continue;
2677 }
2678 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2679 IOMMU_UNITS_SUPPORTED);
5e0d2a6f 2680 }
2681
d9630fe9
WH
2682 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2683 GFP_KERNEL);
2684 if (!g_iommus) {
2685 printk(KERN_ERR "Allocating global iommu array failed\n");
2686 ret = -ENOMEM;
2687 goto error;
2688 }
2689
80b20dd8 2690 deferred_flush = kzalloc(g_num_of_iommus *
2691 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2692 if (!deferred_flush) {
5e0d2a6f 2693 ret = -ENOMEM;
989d51fc 2694 goto free_g_iommus;
5e0d2a6f 2695 }
2696
7c919779 2697 for_each_active_iommu(iommu, drhd) {
d9630fe9 2698 g_iommus[iommu->seq_id] = iommu;
ba395927 2699
e61d98d8
SS
2700 ret = iommu_init_domains(iommu);
2701 if (ret)
989d51fc 2702 goto free_iommu;
e61d98d8 2703
ba395927
KA
2704 /*
2705 * TBD:
2706 * we could share the same root & context tables
25985edc 2707 * among all IOMMU's. Need to Split it later.
ba395927
KA
2708 */
2709 ret = iommu_alloc_root_entry(iommu);
2710 if (ret) {
2711 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
989d51fc 2712 goto free_iommu;
ba395927 2713 }
4ed0d3e6 2714 if (!ecap_pass_through(iommu->ecap))
19943b0e 2715 hw_pass_through = 0;
ba395927
KA
2716 }
2717
1531a6a6
SS
2718 /*
2719 * Start from the sane iommu hardware state.
2720 */
7c919779 2721 for_each_active_iommu(iommu, drhd) {
1531a6a6
SS
2722 /*
2723 * If the queued invalidation is already initialized by us
2724 * (for example, while enabling interrupt-remapping) then
2725 * we got the things already rolling from a sane state.
2726 */
2727 if (iommu->qi)
2728 continue;
2729
2730 /*
2731 * Clear any previous faults.
2732 */
2733 dmar_fault(-1, iommu);
2734 /*
2735 * Disable queued invalidation if supported and already enabled
2736 * before OS handover.
2737 */
2738 dmar_disable_qi(iommu);
2739 }
2740
7c919779 2741 for_each_active_iommu(iommu, drhd) {
a77b67d4
YS
2742 if (dmar_enable_qi(iommu)) {
2743 /*
2744 * Queued Invalidate not enabled, use Register Based
2745 * Invalidate
2746 */
2747 iommu->flush.flush_context = __iommu_flush_context;
2748 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2749 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2750 "invalidation\n",
680a7524 2751 iommu->seq_id,
b4e0f9eb 2752 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2753 } else {
2754 iommu->flush.flush_context = qi_flush_context;
2755 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2756 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2757 "invalidation\n",
680a7524 2758 iommu->seq_id,
b4e0f9eb 2759 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2760 }
2761 }
2762
19943b0e 2763 if (iommu_pass_through)
e0fc7e0b
DW
2764 iommu_identity_mapping |= IDENTMAP_ALL;
2765
d3f13810 2766#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 2767 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2768#endif
e0fc7e0b
DW
2769
2770 check_tylersburg_isoch();
2771
ba395927 2772 /*
19943b0e
DW
2773 * If pass through is not set or not enabled, setup context entries for
2774 * identity mappings for rmrr, gfx, and isa and may fall back to static
2775 * identity mapping if iommu_identity_mapping is set.
ba395927 2776 */
19943b0e
DW
2777 if (iommu_identity_mapping) {
2778 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2779 if (ret) {
19943b0e 2780 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
989d51fc 2781 goto free_iommu;
ba395927
KA
2782 }
2783 }
ba395927 2784 /*
19943b0e
DW
2785 * For each rmrr
2786 * for each dev attached to rmrr
2787 * do
2788 * locate drhd for dev, alloc domain for dev
2789 * allocate free domain
2790 * allocate page table entries for rmrr
2791 * if context not allocated for bus
2792 * allocate and init context
2793 * set present in root table for this bus
2794 * init context with domain, translation etc
2795 * endfor
2796 * endfor
ba395927 2797 */
19943b0e
DW
2798 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2799 for_each_rmrr_units(rmrr) {
b683b230
JL
2800 /* some BIOS lists non-exist devices in DMAR table. */
2801 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 2802 i, dev) {
0b9d9753 2803 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e
DW
2804 if (ret)
2805 printk(KERN_ERR
2806 "IOMMU: mapping reserved region failed\n");
ba395927 2807 }
4ed0d3e6 2808 }
49a0429e 2809
19943b0e
DW
2810 iommu_prepare_isa();
2811
ba395927
KA
2812 /*
2813 * for each drhd
2814 * enable fault log
2815 * global invalidate context cache
2816 * global invalidate iotlb
2817 * enable translation
2818 */
7c919779 2819 for_each_iommu(iommu, drhd) {
51a63e67
JC
2820 if (drhd->ignored) {
2821 /*
2822 * we always have to disable PMRs or DMA may fail on
2823 * this device
2824 */
2825 if (force_on)
7c919779 2826 iommu_disable_protect_mem_regions(iommu);
ba395927 2827 continue;
51a63e67 2828 }
ba395927
KA
2829
2830 iommu_flush_write_buffer(iommu);
2831
3460a6d9
KA
2832 ret = dmar_set_interrupt(iommu);
2833 if (ret)
989d51fc 2834 goto free_iommu;
3460a6d9 2835
ba395927
KA
2836 iommu_set_root_entry(iommu);
2837
4c25a2c1 2838 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2839 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2840
ba395927
KA
2841 ret = iommu_enable_translation(iommu);
2842 if (ret)
989d51fc 2843 goto free_iommu;
b94996c9
DW
2844
2845 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2846 }
2847
2848 return 0;
989d51fc
JL
2849
2850free_iommu:
7c919779 2851 for_each_active_iommu(iommu, drhd)
a868e6b7 2852 free_dmar_iommu(iommu);
9bdc531e 2853 kfree(deferred_flush);
989d51fc 2854free_g_iommus:
d9630fe9 2855 kfree(g_iommus);
989d51fc 2856error:
ba395927
KA
2857 return ret;
2858}
2859
5a5e02a6 2860/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2861static struct iova *intel_alloc_iova(struct device *dev,
2862 struct dmar_domain *domain,
2863 unsigned long nrpages, uint64_t dma_mask)
ba395927 2864{
ba395927 2865 struct iova *iova = NULL;
ba395927 2866
875764de
DW
2867 /* Restrict dma_mask to the width that the iommu can handle */
2868 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2869
2870 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2871 /*
2872 * First try to allocate an io virtual address in
284901a9 2873 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2874 * from higher range
ba395927 2875 */
875764de
DW
2876 iova = alloc_iova(&domain->iovad, nrpages,
2877 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2878 if (iova)
2879 return iova;
2880 }
2881 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2882 if (unlikely(!iova)) {
2883 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
207e3592 2884 nrpages, dev_name(dev));
f76aec76
KA
2885 return NULL;
2886 }
2887
2888 return iova;
2889}
2890
d4b709f4 2891static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76
KA
2892{
2893 struct dmar_domain *domain;
2894 int ret;
2895
d4b709f4 2896 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
f76aec76 2897 if (!domain) {
d4b709f4
DW
2898 printk(KERN_ERR "Allocating domain for %s failed",
2899 dev_name(dev));
4fe05bbc 2900 return NULL;
ba395927
KA
2901 }
2902
2903 /* make sure context mapping is ok */
d4b709f4
DW
2904 if (unlikely(!domain_context_mapped(dev))) {
2905 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
f76aec76 2906 if (ret) {
d4b709f4
DW
2907 printk(KERN_ERR "Domain context map for %s failed",
2908 dev_name(dev));
4fe05bbc 2909 return NULL;
f76aec76 2910 }
ba395927
KA
2911 }
2912
f76aec76
KA
2913 return domain;
2914}
2915
d4b709f4 2916static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
2917{
2918 struct device_domain_info *info;
2919
2920 /* No lock here, assumes no domain exit in normal case */
d4b709f4 2921 info = dev->archdata.iommu;
147202aa
DW
2922 if (likely(info))
2923 return info->domain;
2924
2925 return __get_valid_domain_for_dev(dev);
2926}
2927
3d89194a 2928static int iommu_dummy(struct device *dev)
2c2e2c38 2929{
3d89194a 2930 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2c2e2c38
FY
2931}
2932
ecb509ec 2933/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 2934static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
2935{
2936 int found;
2937
3d89194a 2938 if (iommu_dummy(dev))
1e4c64c4
DW
2939 return 1;
2940
2c2e2c38 2941 if (!iommu_identity_mapping)
1e4c64c4 2942 return 0;
2c2e2c38 2943
9b226624 2944 found = identity_mapping(dev);
2c2e2c38 2945 if (found) {
ecb509ec 2946 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
2947 return 1;
2948 else {
2949 /*
2950 * 32 bit DMA is removed from si_domain and fall back
2951 * to non-identity mapping.
2952 */
bf9c9eda 2953 domain_remove_one_dev_info(si_domain, dev);
2c2e2c38 2954 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
ecb509ec 2955 dev_name(dev));
2c2e2c38
FY
2956 return 0;
2957 }
2958 } else {
2959 /*
2960 * In case of a detached 64 bit DMA device from vm, the device
2961 * is put into si_domain for identity mapping.
2962 */
ecb509ec 2963 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 2964 int ret;
5913c9bf 2965 ret = domain_add_dev_info(si_domain, dev,
5fe60f4e
DW
2966 hw_pass_through ?
2967 CONTEXT_TT_PASS_THROUGH :
2968 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2969 if (!ret) {
2970 printk(KERN_INFO "64bit %s uses identity mapping\n",
ecb509ec 2971 dev_name(dev));
2c2e2c38
FY
2972 return 1;
2973 }
2974 }
2975 }
2976
1e4c64c4 2977 return 0;
2c2e2c38
FY
2978}
2979
5040a918 2980static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 2981 size_t size, int dir, u64 dma_mask)
f76aec76 2982{
f76aec76 2983 struct dmar_domain *domain;
5b6985ce 2984 phys_addr_t start_paddr;
f76aec76
KA
2985 struct iova *iova;
2986 int prot = 0;
6865f0d1 2987 int ret;
8c11e798 2988 struct intel_iommu *iommu;
33041ec0 2989 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
2990
2991 BUG_ON(dir == DMA_NONE);
2c2e2c38 2992
5040a918 2993 if (iommu_no_mapping(dev))
6865f0d1 2994 return paddr;
f76aec76 2995
5040a918 2996 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
2997 if (!domain)
2998 return 0;
2999
8c11e798 3000 iommu = domain_get_iommu(domain);
88cb6a74 3001 size = aligned_nrpages(paddr, size);
f76aec76 3002
5040a918 3003 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
3004 if (!iova)
3005 goto error;
3006
ba395927
KA
3007 /*
3008 * Check if DMAR supports zero-length reads on write only
3009 * mappings..
3010 */
3011 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3012 !cap_zlr(iommu->cap))
ba395927
KA
3013 prot |= DMA_PTE_READ;
3014 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3015 prot |= DMA_PTE_WRITE;
3016 /*
6865f0d1 3017 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3018 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3019 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3020 * is not a big problem
3021 */
0ab36de2 3022 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 3023 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3024 if (ret)
3025 goto error;
3026
1f0ef2aa
DW
3027 /* it's a non-present to present mapping. Only flush if caching mode */
3028 if (cap_caching_mode(iommu->cap))
ea8ea460 3029 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
1f0ef2aa 3030 else
8c11e798 3031 iommu_flush_write_buffer(iommu);
f76aec76 3032
03d6a246
DW
3033 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3034 start_paddr += paddr & ~PAGE_MASK;
3035 return start_paddr;
ba395927 3036
ba395927 3037error:
f76aec76
KA
3038 if (iova)
3039 __free_iova(&domain->iovad, iova);
4cf2e75d 3040 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3041 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3042 return 0;
3043}
3044
ffbbef5c
FT
3045static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3046 unsigned long offset, size_t size,
3047 enum dma_data_direction dir,
3048 struct dma_attrs *attrs)
bb9e6d65 3049{
ffbbef5c 3050 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3051 dir, *dev->dma_mask);
bb9e6d65
FT
3052}
3053
5e0d2a6f 3054static void flush_unmaps(void)
3055{
80b20dd8 3056 int i, j;
5e0d2a6f 3057
5e0d2a6f 3058 timer_on = 0;
3059
3060 /* just flush them all */
3061 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
3062 struct intel_iommu *iommu = g_iommus[i];
3063 if (!iommu)
3064 continue;
c42d9f32 3065
9dd2fe89
YZ
3066 if (!deferred_flush[i].next)
3067 continue;
3068
78d5f0f5
NA
3069 /* In caching mode, global flushes turn emulation expensive */
3070 if (!cap_caching_mode(iommu->cap))
3071 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3072 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 3073 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
3074 unsigned long mask;
3075 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
3076 struct dmar_domain *domain = deferred_flush[i].domain[j];
3077
3078 /* On real hardware multiple invalidations are expensive */
3079 if (cap_caching_mode(iommu->cap))
3080 iommu_flush_iotlb_psi(iommu, domain->id,
ea8ea460
DW
3081 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
3082 !deferred_flush[i].freelist[j], 0);
78d5f0f5
NA
3083 else {
3084 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
3085 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3086 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3087 }
93a23a72 3088 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
ea8ea460
DW
3089 if (deferred_flush[i].freelist[j])
3090 dma_free_pagelist(deferred_flush[i].freelist[j]);
80b20dd8 3091 }
9dd2fe89 3092 deferred_flush[i].next = 0;
5e0d2a6f 3093 }
3094
5e0d2a6f 3095 list_size = 0;
5e0d2a6f 3096}
3097
3098static void flush_unmaps_timeout(unsigned long data)
3099{
80b20dd8 3100 unsigned long flags;
3101
3102 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 3103 flush_unmaps();
80b20dd8 3104 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 3105}
3106
ea8ea460 3107static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
5e0d2a6f 3108{
3109 unsigned long flags;
80b20dd8 3110 int next, iommu_id;
8c11e798 3111 struct intel_iommu *iommu;
5e0d2a6f 3112
3113 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 3114 if (list_size == HIGH_WATER_MARK)
3115 flush_unmaps();
3116
8c11e798
WH
3117 iommu = domain_get_iommu(dom);
3118 iommu_id = iommu->seq_id;
c42d9f32 3119
80b20dd8 3120 next = deferred_flush[iommu_id].next;
3121 deferred_flush[iommu_id].domain[next] = dom;
3122 deferred_flush[iommu_id].iova[next] = iova;
ea8ea460 3123 deferred_flush[iommu_id].freelist[next] = freelist;
80b20dd8 3124 deferred_flush[iommu_id].next++;
5e0d2a6f 3125
3126 if (!timer_on) {
3127 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3128 timer_on = 1;
3129 }
3130 list_size++;
3131 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3132}
3133
ffbbef5c
FT
3134static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3135 size_t size, enum dma_data_direction dir,
3136 struct dma_attrs *attrs)
ba395927 3137{
f76aec76 3138 struct dmar_domain *domain;
d794dc9b 3139 unsigned long start_pfn, last_pfn;
ba395927 3140 struct iova *iova;
8c11e798 3141 struct intel_iommu *iommu;
ea8ea460 3142 struct page *freelist;
ba395927 3143
73676832 3144 if (iommu_no_mapping(dev))
f76aec76 3145 return;
2c2e2c38 3146
1525a29a 3147 domain = find_domain(dev);
ba395927
KA
3148 BUG_ON(!domain);
3149
8c11e798
WH
3150 iommu = domain_get_iommu(domain);
3151
ba395927 3152 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
3153 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3154 (unsigned long long)dev_addr))
ba395927 3155 return;
ba395927 3156
d794dc9b
DW
3157 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3158 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 3159
d794dc9b 3160 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3161 dev_name(dev), start_pfn, last_pfn);
ba395927 3162
ea8ea460 3163 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3164
5e0d2a6f 3165 if (intel_iommu_strict) {
03d6a246 3166 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3167 last_pfn - start_pfn + 1, !freelist, 0);
5e0d2a6f 3168 /* free iova */
3169 __free_iova(&domain->iovad, iova);
ea8ea460 3170 dma_free_pagelist(freelist);
5e0d2a6f 3171 } else {
ea8ea460 3172 add_unmap(domain, iova, freelist);
5e0d2a6f 3173 /*
3174 * queue up the release of the unmap to save the 1/6th of the
3175 * cpu used up by the iotlb flush operation...
3176 */
5e0d2a6f 3177 }
ba395927
KA
3178}
3179
5040a918 3180static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc
AP
3181 dma_addr_t *dma_handle, gfp_t flags,
3182 struct dma_attrs *attrs)
ba395927 3183{
36746436 3184 struct page *page = NULL;
ba395927
KA
3185 int order;
3186
5b6985ce 3187 size = PAGE_ALIGN(size);
ba395927 3188 order = get_order(size);
e8bb910d 3189
5040a918 3190 if (!iommu_no_mapping(dev))
e8bb910d 3191 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3192 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3193 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3194 flags |= GFP_DMA;
3195 else
3196 flags |= GFP_DMA32;
3197 }
ba395927 3198
36746436
AM
3199 if (flags & __GFP_WAIT) {
3200 unsigned int count = size >> PAGE_SHIFT;
3201
3202 page = dma_alloc_from_contiguous(dev, count, order);
3203 if (page && iommu_no_mapping(dev) &&
3204 page_to_phys(page) + size > dev->coherent_dma_mask) {
3205 dma_release_from_contiguous(dev, page, count);
3206 page = NULL;
3207 }
3208 }
3209
3210 if (!page)
3211 page = alloc_pages(flags, order);
3212 if (!page)
ba395927 3213 return NULL;
36746436 3214 memset(page_address(page), 0, size);
ba395927 3215
36746436 3216 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3217 DMA_BIDIRECTIONAL,
5040a918 3218 dev->coherent_dma_mask);
ba395927 3219 if (*dma_handle)
36746436
AM
3220 return page_address(page);
3221 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3222 __free_pages(page, order);
3223
ba395927
KA
3224 return NULL;
3225}
3226
5040a918 3227static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
baa676fc 3228 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3229{
3230 int order;
36746436 3231 struct page *page = virt_to_page(vaddr);
ba395927 3232
5b6985ce 3233 size = PAGE_ALIGN(size);
ba395927
KA
3234 order = get_order(size);
3235
5040a918 3236 intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
36746436
AM
3237 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3238 __free_pages(page, order);
ba395927
KA
3239}
3240
5040a918 3241static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46
FT
3242 int nelems, enum dma_data_direction dir,
3243 struct dma_attrs *attrs)
ba395927 3244{
ba395927 3245 struct dmar_domain *domain;
d794dc9b 3246 unsigned long start_pfn, last_pfn;
f76aec76 3247 struct iova *iova;
8c11e798 3248 struct intel_iommu *iommu;
ea8ea460 3249 struct page *freelist;
ba395927 3250
5040a918 3251 if (iommu_no_mapping(dev))
ba395927
KA
3252 return;
3253
5040a918 3254 domain = find_domain(dev);
8c11e798
WH
3255 BUG_ON(!domain);
3256
3257 iommu = domain_get_iommu(domain);
ba395927 3258
c03ab37c 3259 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
3260 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3261 (unsigned long long)sglist[0].dma_address))
f76aec76 3262 return;
f76aec76 3263
d794dc9b
DW
3264 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3265 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76 3266
ea8ea460 3267 freelist = domain_unmap(domain, start_pfn, last_pfn);
f76aec76 3268
acea0018
DW
3269 if (intel_iommu_strict) {
3270 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3271 last_pfn - start_pfn + 1, !freelist, 0);
acea0018
DW
3272 /* free iova */
3273 __free_iova(&domain->iovad, iova);
ea8ea460 3274 dma_free_pagelist(freelist);
acea0018 3275 } else {
ea8ea460 3276 add_unmap(domain, iova, freelist);
acea0018
DW
3277 /*
3278 * queue up the release of the unmap to save the 1/6th of the
3279 * cpu used up by the iotlb flush operation...
3280 */
3281 }
ba395927
KA
3282}
3283
ba395927 3284static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3285 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3286{
3287 int i;
c03ab37c 3288 struct scatterlist *sg;
ba395927 3289
c03ab37c 3290 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3291 BUG_ON(!sg_page(sg));
4cf2e75d 3292 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3293 sg->dma_length = sg->length;
ba395927
KA
3294 }
3295 return nelems;
3296}
3297
5040a918 3298static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
d7ab5c46 3299 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3300{
ba395927 3301 int i;
ba395927 3302 struct dmar_domain *domain;
f76aec76
KA
3303 size_t size = 0;
3304 int prot = 0;
f76aec76
KA
3305 struct iova *iova = NULL;
3306 int ret;
c03ab37c 3307 struct scatterlist *sg;
b536d24d 3308 unsigned long start_vpfn;
8c11e798 3309 struct intel_iommu *iommu;
ba395927
KA
3310
3311 BUG_ON(dir == DMA_NONE);
5040a918
DW
3312 if (iommu_no_mapping(dev))
3313 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3314
5040a918 3315 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3316 if (!domain)
3317 return 0;
3318
8c11e798
WH
3319 iommu = domain_get_iommu(domain);
3320
b536d24d 3321 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3322 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3323
5040a918
DW
3324 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3325 *dev->dma_mask);
f76aec76 3326 if (!iova) {
c03ab37c 3327 sglist->dma_length = 0;
f76aec76
KA
3328 return 0;
3329 }
3330
3331 /*
3332 * Check if DMAR supports zero-length reads on write only
3333 * mappings..
3334 */
3335 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3336 !cap_zlr(iommu->cap))
f76aec76
KA
3337 prot |= DMA_PTE_READ;
3338 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3339 prot |= DMA_PTE_WRITE;
3340
b536d24d 3341 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3342
f532959b 3343 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
3344 if (unlikely(ret)) {
3345 /* clear the page */
3346 dma_pte_clear_range(domain, start_vpfn,
3347 start_vpfn + size - 1);
3348 /* free page tables */
3349 dma_pte_free_pagetable(domain, start_vpfn,
3350 start_vpfn + size - 1);
3351 /* free iova */
3352 __free_iova(&domain->iovad, iova);
3353 return 0;
ba395927
KA
3354 }
3355
1f0ef2aa
DW
3356 /* it's a non-present to present mapping. Only flush if caching mode */
3357 if (cap_caching_mode(iommu->cap))
ea8ea460 3358 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
1f0ef2aa 3359 else
8c11e798 3360 iommu_flush_write_buffer(iommu);
1f0ef2aa 3361
ba395927
KA
3362 return nelems;
3363}
3364
dfb805e8
FT
3365static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3366{
3367 return !dma_addr;
3368}
3369
160c1d8e 3370struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3371 .alloc = intel_alloc_coherent,
3372 .free = intel_free_coherent,
ba395927
KA
3373 .map_sg = intel_map_sg,
3374 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3375 .map_page = intel_map_page,
3376 .unmap_page = intel_unmap_page,
dfb805e8 3377 .mapping_error = intel_mapping_error,
ba395927
KA
3378};
3379
3380static inline int iommu_domain_cache_init(void)
3381{
3382 int ret = 0;
3383
3384 iommu_domain_cache = kmem_cache_create("iommu_domain",
3385 sizeof(struct dmar_domain),
3386 0,
3387 SLAB_HWCACHE_ALIGN,
3388
3389 NULL);
3390 if (!iommu_domain_cache) {
3391 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3392 ret = -ENOMEM;
3393 }
3394
3395 return ret;
3396}
3397
3398static inline int iommu_devinfo_cache_init(void)
3399{
3400 int ret = 0;
3401
3402 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3403 sizeof(struct device_domain_info),
3404 0,
3405 SLAB_HWCACHE_ALIGN,
ba395927
KA
3406 NULL);
3407 if (!iommu_devinfo_cache) {
3408 printk(KERN_ERR "Couldn't create devinfo cache\n");
3409 ret = -ENOMEM;
3410 }
3411
3412 return ret;
3413}
3414
3415static inline int iommu_iova_cache_init(void)
3416{
3417 int ret = 0;
3418
3419 iommu_iova_cache = kmem_cache_create("iommu_iova",
3420 sizeof(struct iova),
3421 0,
3422 SLAB_HWCACHE_ALIGN,
ba395927
KA
3423 NULL);
3424 if (!iommu_iova_cache) {
3425 printk(KERN_ERR "Couldn't create iova cache\n");
3426 ret = -ENOMEM;
3427 }
3428
3429 return ret;
3430}
3431
3432static int __init iommu_init_mempool(void)
3433{
3434 int ret;
3435 ret = iommu_iova_cache_init();
3436 if (ret)
3437 return ret;
3438
3439 ret = iommu_domain_cache_init();
3440 if (ret)
3441 goto domain_error;
3442
3443 ret = iommu_devinfo_cache_init();
3444 if (!ret)
3445 return ret;
3446
3447 kmem_cache_destroy(iommu_domain_cache);
3448domain_error:
3449 kmem_cache_destroy(iommu_iova_cache);
3450
3451 return -ENOMEM;
3452}
3453
3454static void __init iommu_exit_mempool(void)
3455{
3456 kmem_cache_destroy(iommu_devinfo_cache);
3457 kmem_cache_destroy(iommu_domain_cache);
3458 kmem_cache_destroy(iommu_iova_cache);
3459
3460}
3461
556ab45f
DW
3462static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3463{
3464 struct dmar_drhd_unit *drhd;
3465 u32 vtbar;
3466 int rc;
3467
3468 /* We know that this device on this chipset has its own IOMMU.
3469 * If we find it under a different IOMMU, then the BIOS is lying
3470 * to us. Hope that the IOMMU for this device is actually
3471 * disabled, and it needs no translation...
3472 */
3473 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3474 if (rc) {
3475 /* "can't" happen */
3476 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3477 return;
3478 }
3479 vtbar &= 0xffff0000;
3480
3481 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3482 drhd = dmar_find_matched_drhd_unit(pdev);
3483 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3484 TAINT_FIRMWARE_WORKAROUND,
3485 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3486 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3487}
3488DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3489
ba395927
KA
3490static void __init init_no_remapping_devices(void)
3491{
3492 struct dmar_drhd_unit *drhd;
832bd858 3493 struct device *dev;
b683b230 3494 int i;
ba395927
KA
3495
3496 for_each_drhd_unit(drhd) {
3497 if (!drhd->include_all) {
b683b230
JL
3498 for_each_active_dev_scope(drhd->devices,
3499 drhd->devices_cnt, i, dev)
3500 break;
832bd858 3501 /* ignore DMAR unit if no devices exist */
ba395927
KA
3502 if (i == drhd->devices_cnt)
3503 drhd->ignored = 1;
3504 }
3505 }
3506
7c919779 3507 for_each_active_drhd_unit(drhd) {
7c919779 3508 if (drhd->include_all)
ba395927
KA
3509 continue;
3510
b683b230
JL
3511 for_each_active_dev_scope(drhd->devices,
3512 drhd->devices_cnt, i, dev)
832bd858 3513 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3514 break;
ba395927
KA
3515 if (i < drhd->devices_cnt)
3516 continue;
3517
c0771df8
DW
3518 /* This IOMMU has *only* gfx devices. Either bypass it or
3519 set the gfx_mapped flag, as appropriate */
3520 if (dmar_map_gfx) {
3521 intel_iommu_gfx_mapped = 1;
3522 } else {
3523 drhd->ignored = 1;
b683b230
JL
3524 for_each_active_dev_scope(drhd->devices,
3525 drhd->devices_cnt, i, dev)
832bd858 3526 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3527 }
3528 }
3529}
3530
f59c7b69
FY
3531#ifdef CONFIG_SUSPEND
3532static int init_iommu_hw(void)
3533{
3534 struct dmar_drhd_unit *drhd;
3535 struct intel_iommu *iommu = NULL;
3536
3537 for_each_active_iommu(iommu, drhd)
3538 if (iommu->qi)
3539 dmar_reenable_qi(iommu);
3540
b779260b
JC
3541 for_each_iommu(iommu, drhd) {
3542 if (drhd->ignored) {
3543 /*
3544 * we always have to disable PMRs or DMA may fail on
3545 * this device
3546 */
3547 if (force_on)
3548 iommu_disable_protect_mem_regions(iommu);
3549 continue;
3550 }
3551
f59c7b69
FY
3552 iommu_flush_write_buffer(iommu);
3553
3554 iommu_set_root_entry(iommu);
3555
3556 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3557 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3558 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3559 DMA_TLB_GLOBAL_FLUSH);
b779260b
JC
3560 if (iommu_enable_translation(iommu))
3561 return 1;
b94996c9 3562 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3563 }
3564
3565 return 0;
3566}
3567
3568static void iommu_flush_all(void)
3569{
3570 struct dmar_drhd_unit *drhd;
3571 struct intel_iommu *iommu;
3572
3573 for_each_active_iommu(iommu, drhd) {
3574 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3575 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3576 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3577 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3578 }
3579}
3580
134fac3f 3581static int iommu_suspend(void)
f59c7b69
FY
3582{
3583 struct dmar_drhd_unit *drhd;
3584 struct intel_iommu *iommu = NULL;
3585 unsigned long flag;
3586
3587 for_each_active_iommu(iommu, drhd) {
3588 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3589 GFP_ATOMIC);
3590 if (!iommu->iommu_state)
3591 goto nomem;
3592 }
3593
3594 iommu_flush_all();
3595
3596 for_each_active_iommu(iommu, drhd) {
3597 iommu_disable_translation(iommu);
3598
1f5b3c3f 3599 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3600
3601 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3602 readl(iommu->reg + DMAR_FECTL_REG);
3603 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3604 readl(iommu->reg + DMAR_FEDATA_REG);
3605 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3606 readl(iommu->reg + DMAR_FEADDR_REG);
3607 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3608 readl(iommu->reg + DMAR_FEUADDR_REG);
3609
1f5b3c3f 3610 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3611 }
3612 return 0;
3613
3614nomem:
3615 for_each_active_iommu(iommu, drhd)
3616 kfree(iommu->iommu_state);
3617
3618 return -ENOMEM;
3619}
3620
134fac3f 3621static void iommu_resume(void)
f59c7b69
FY
3622{
3623 struct dmar_drhd_unit *drhd;
3624 struct intel_iommu *iommu = NULL;
3625 unsigned long flag;
3626
3627 if (init_iommu_hw()) {
b779260b
JC
3628 if (force_on)
3629 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3630 else
3631 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3632 return;
f59c7b69
FY
3633 }
3634
3635 for_each_active_iommu(iommu, drhd) {
3636
1f5b3c3f 3637 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3638
3639 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3640 iommu->reg + DMAR_FECTL_REG);
3641 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3642 iommu->reg + DMAR_FEDATA_REG);
3643 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3644 iommu->reg + DMAR_FEADDR_REG);
3645 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3646 iommu->reg + DMAR_FEUADDR_REG);
3647
1f5b3c3f 3648 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3649 }
3650
3651 for_each_active_iommu(iommu, drhd)
3652 kfree(iommu->iommu_state);
f59c7b69
FY
3653}
3654
134fac3f 3655static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3656 .resume = iommu_resume,
3657 .suspend = iommu_suspend,
3658};
3659
134fac3f 3660static void __init init_iommu_pm_ops(void)
f59c7b69 3661{
134fac3f 3662 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3663}
3664
3665#else
99592ba4 3666static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3667#endif /* CONFIG_PM */
3668
318fe7df
SS
3669
3670int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3671{
3672 struct acpi_dmar_reserved_memory *rmrr;
3673 struct dmar_rmrr_unit *rmrru;
3674
3675 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3676 if (!rmrru)
3677 return -ENOMEM;
3678
3679 rmrru->hdr = header;
3680 rmrr = (struct acpi_dmar_reserved_memory *)header;
3681 rmrru->base_address = rmrr->base_address;
3682 rmrru->end_address = rmrr->end_address;
2e455289
JL
3683 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3684 ((void *)rmrr) + rmrr->header.length,
3685 &rmrru->devices_cnt);
3686 if (rmrru->devices_cnt && rmrru->devices == NULL) {
3687 kfree(rmrru);
3688 return -ENOMEM;
3689 }
318fe7df 3690
2e455289 3691 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 3692
2e455289 3693 return 0;
318fe7df
SS
3694}
3695
318fe7df
SS
3696int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3697{
3698 struct acpi_dmar_atsr *atsr;
3699 struct dmar_atsr_unit *atsru;
3700
3701 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3702 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3703 if (!atsru)
3704 return -ENOMEM;
3705
3706 atsru->hdr = hdr;
3707 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
3708 if (!atsru->include_all) {
3709 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3710 (void *)atsr + atsr->header.length,
3711 &atsru->devices_cnt);
3712 if (atsru->devices_cnt && atsru->devices == NULL) {
3713 kfree(atsru);
3714 return -ENOMEM;
3715 }
3716 }
318fe7df 3717
0e242612 3718 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
3719
3720 return 0;
3721}
3722
9bdc531e
JL
3723static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3724{
3725 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3726 kfree(atsru);
3727}
3728
3729static void intel_iommu_free_dmars(void)
3730{
3731 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3732 struct dmar_atsr_unit *atsru, *atsr_n;
3733
3734 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3735 list_del(&rmrru->list);
3736 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3737 kfree(rmrru);
318fe7df
SS
3738 }
3739
9bdc531e
JL
3740 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3741 list_del(&atsru->list);
3742 intel_iommu_free_atsr(atsru);
3743 }
318fe7df
SS
3744}
3745
3746int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3747{
b683b230 3748 int i, ret = 1;
318fe7df 3749 struct pci_bus *bus;
832bd858
DW
3750 struct pci_dev *bridge = NULL;
3751 struct device *tmp;
318fe7df
SS
3752 struct acpi_dmar_atsr *atsr;
3753 struct dmar_atsr_unit *atsru;
3754
3755 dev = pci_physfn(dev);
318fe7df 3756 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 3757 bridge = bus->self;
318fe7df 3758 if (!bridge || !pci_is_pcie(bridge) ||
62f87c0e 3759 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 3760 return 0;
b5f82ddf 3761 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 3762 break;
318fe7df 3763 }
b5f82ddf
JL
3764 if (!bridge)
3765 return 0;
318fe7df 3766
0e242612 3767 rcu_read_lock();
b5f82ddf
JL
3768 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3769 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3770 if (atsr->segment != pci_domain_nr(dev->bus))
3771 continue;
3772
b683b230 3773 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 3774 if (tmp == &bridge->dev)
b683b230 3775 goto out;
b5f82ddf
JL
3776
3777 if (atsru->include_all)
b683b230 3778 goto out;
b5f82ddf 3779 }
b683b230
JL
3780 ret = 0;
3781out:
0e242612 3782 rcu_read_unlock();
318fe7df 3783
b683b230 3784 return ret;
318fe7df
SS
3785}
3786
59ce0515
JL
3787int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3788{
3789 int ret = 0;
3790 struct dmar_rmrr_unit *rmrru;
3791 struct dmar_atsr_unit *atsru;
3792 struct acpi_dmar_atsr *atsr;
3793 struct acpi_dmar_reserved_memory *rmrr;
3794
3795 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3796 return 0;
3797
3798 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3799 rmrr = container_of(rmrru->hdr,
3800 struct acpi_dmar_reserved_memory, header);
3801 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3802 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3803 ((void *)rmrr) + rmrr->header.length,
3804 rmrr->segment, rmrru->devices,
3805 rmrru->devices_cnt);
27e24950 3806 if(ret < 0)
59ce0515
JL
3807 return ret;
3808 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
27e24950
JL
3809 dmar_remove_dev_scope(info, rmrr->segment,
3810 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
3811 }
3812 }
3813
3814 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3815 if (atsru->include_all)
3816 continue;
3817
3818 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3819 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3820 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3821 (void *)atsr + atsr->header.length,
3822 atsr->segment, atsru->devices,
3823 atsru->devices_cnt);
3824 if (ret > 0)
3825 break;
3826 else if(ret < 0)
3827 return ret;
3828 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3829 if (dmar_remove_dev_scope(info, atsr->segment,
3830 atsru->devices, atsru->devices_cnt))
3831 break;
3832 }
3833 }
3834
3835 return 0;
3836}
3837
99dcaded
FY
3838/*
3839 * Here we only respond to action of unbound device from driver.
3840 *
3841 * Added device is not attached to its DMAR domain here yet. That will happen
3842 * when mapping the device to iova.
3843 */
3844static int device_notifier(struct notifier_block *nb,
3845 unsigned long action, void *data)
3846{
3847 struct device *dev = data;
99dcaded
FY
3848 struct dmar_domain *domain;
3849
3d89194a 3850 if (iommu_dummy(dev))
44cd613c
DW
3851 return 0;
3852
7e7dfab7
JL
3853 if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3854 action != BUS_NOTIFY_DEL_DEVICE)
3855 return 0;
3856
1525a29a 3857 domain = find_domain(dev);
99dcaded
FY
3858 if (!domain)
3859 return 0;
3860
3a5670e8 3861 down_read(&dmar_global_lock);
bf9c9eda 3862 domain_remove_one_dev_info(domain, dev);
7e7dfab7
JL
3863 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3864 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3865 list_empty(&domain->devices))
3866 domain_exit(domain);
3a5670e8 3867 up_read(&dmar_global_lock);
a97590e5 3868
99dcaded
FY
3869 return 0;
3870}
3871
3872static struct notifier_block device_nb = {
3873 .notifier_call = device_notifier,
3874};
3875
75f05569
JL
3876static int intel_iommu_memory_notifier(struct notifier_block *nb,
3877 unsigned long val, void *v)
3878{
3879 struct memory_notify *mhp = v;
3880 unsigned long long start, end;
3881 unsigned long start_vpfn, last_vpfn;
3882
3883 switch (val) {
3884 case MEM_GOING_ONLINE:
3885 start = mhp->start_pfn << PAGE_SHIFT;
3886 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
3887 if (iommu_domain_identity_map(si_domain, start, end)) {
3888 pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3889 start, end);
3890 return NOTIFY_BAD;
3891 }
3892 break;
3893
3894 case MEM_OFFLINE:
3895 case MEM_CANCEL_ONLINE:
3896 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3897 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
3898 while (start_vpfn <= last_vpfn) {
3899 struct iova *iova;
3900 struct dmar_drhd_unit *drhd;
3901 struct intel_iommu *iommu;
ea8ea460 3902 struct page *freelist;
75f05569
JL
3903
3904 iova = find_iova(&si_domain->iovad, start_vpfn);
3905 if (iova == NULL) {
3906 pr_debug("dmar: failed get IOVA for PFN %lx\n",
3907 start_vpfn);
3908 break;
3909 }
3910
3911 iova = split_and_remove_iova(&si_domain->iovad, iova,
3912 start_vpfn, last_vpfn);
3913 if (iova == NULL) {
3914 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3915 start_vpfn, last_vpfn);
3916 return NOTIFY_BAD;
3917 }
3918
ea8ea460
DW
3919 freelist = domain_unmap(si_domain, iova->pfn_lo,
3920 iova->pfn_hi);
3921
75f05569
JL
3922 rcu_read_lock();
3923 for_each_active_iommu(iommu, drhd)
3924 iommu_flush_iotlb_psi(iommu, si_domain->id,
3925 iova->pfn_lo,
ea8ea460
DW
3926 iova->pfn_hi - iova->pfn_lo + 1,
3927 !freelist, 0);
75f05569 3928 rcu_read_unlock();
ea8ea460 3929 dma_free_pagelist(freelist);
75f05569
JL
3930
3931 start_vpfn = iova->pfn_hi + 1;
3932 free_iova_mem(iova);
3933 }
3934 break;
3935 }
3936
3937 return NOTIFY_OK;
3938}
3939
3940static struct notifier_block intel_iommu_memory_nb = {
3941 .notifier_call = intel_iommu_memory_notifier,
3942 .priority = 0
3943};
3944
a5459cfe
AW
3945
3946static ssize_t intel_iommu_show_version(struct device *dev,
3947 struct device_attribute *attr,
3948 char *buf)
3949{
3950 struct intel_iommu *iommu = dev_get_drvdata(dev);
3951 u32 ver = readl(iommu->reg + DMAR_VER_REG);
3952 return sprintf(buf, "%d:%d\n",
3953 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
3954}
3955static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
3956
3957static ssize_t intel_iommu_show_address(struct device *dev,
3958 struct device_attribute *attr,
3959 char *buf)
3960{
3961 struct intel_iommu *iommu = dev_get_drvdata(dev);
3962 return sprintf(buf, "%llx\n", iommu->reg_phys);
3963}
3964static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
3965
3966static ssize_t intel_iommu_show_cap(struct device *dev,
3967 struct device_attribute *attr,
3968 char *buf)
3969{
3970 struct intel_iommu *iommu = dev_get_drvdata(dev);
3971 return sprintf(buf, "%llx\n", iommu->cap);
3972}
3973static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
3974
3975static ssize_t intel_iommu_show_ecap(struct device *dev,
3976 struct device_attribute *attr,
3977 char *buf)
3978{
3979 struct intel_iommu *iommu = dev_get_drvdata(dev);
3980 return sprintf(buf, "%llx\n", iommu->ecap);
3981}
3982static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
3983
3984static struct attribute *intel_iommu_attrs[] = {
3985 &dev_attr_version.attr,
3986 &dev_attr_address.attr,
3987 &dev_attr_cap.attr,
3988 &dev_attr_ecap.attr,
3989 NULL,
3990};
3991
3992static struct attribute_group intel_iommu_group = {
3993 .name = "intel-iommu",
3994 .attrs = intel_iommu_attrs,
3995};
3996
3997const struct attribute_group *intel_iommu_groups[] = {
3998 &intel_iommu_group,
3999 NULL,
4000};
4001
ba395927
KA
4002int __init intel_iommu_init(void)
4003{
9bdc531e 4004 int ret = -ENODEV;
3a93c841 4005 struct dmar_drhd_unit *drhd;
7c919779 4006 struct intel_iommu *iommu;
ba395927 4007
a59b50e9
JC
4008 /* VT-d is required for a TXT/tboot launch, so enforce that */
4009 force_on = tboot_force_iommu();
4010
3a5670e8
JL
4011 if (iommu_init_mempool()) {
4012 if (force_on)
4013 panic("tboot: Failed to initialize iommu memory\n");
4014 return -ENOMEM;
4015 }
4016
4017 down_write(&dmar_global_lock);
a59b50e9
JC
4018 if (dmar_table_init()) {
4019 if (force_on)
4020 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4021 goto out_free_dmar;
a59b50e9 4022 }
ba395927 4023
3a93c841
TI
4024 /*
4025 * Disable translation if already enabled prior to OS handover.
4026 */
7c919779 4027 for_each_active_iommu(iommu, drhd)
3a93c841
TI
4028 if (iommu->gcmd & DMA_GCMD_TE)
4029 iommu_disable_translation(iommu);
3a93c841 4030
c2c7286a 4031 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4032 if (force_on)
4033 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4034 goto out_free_dmar;
a59b50e9 4035 }
1886e8a9 4036
75f1cdf1 4037 if (no_iommu || dmar_disabled)
9bdc531e 4038 goto out_free_dmar;
2ae21010 4039
318fe7df
SS
4040 if (list_empty(&dmar_rmrr_units))
4041 printk(KERN_INFO "DMAR: No RMRR found\n");
4042
4043 if (list_empty(&dmar_atsr_units))
4044 printk(KERN_INFO "DMAR: No ATSR found\n");
4045
51a63e67
JC
4046 if (dmar_init_reserved_ranges()) {
4047 if (force_on)
4048 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4049 goto out_free_reserved_range;
51a63e67 4050 }
ba395927
KA
4051
4052 init_no_remapping_devices();
4053
b779260b 4054 ret = init_dmars();
ba395927 4055 if (ret) {
a59b50e9
JC
4056 if (force_on)
4057 panic("tboot: Failed to initialize DMARs\n");
ba395927 4058 printk(KERN_ERR "IOMMU: dmar init failed\n");
9bdc531e 4059 goto out_free_reserved_range;
ba395927 4060 }
3a5670e8 4061 up_write(&dmar_global_lock);
ba395927
KA
4062 printk(KERN_INFO
4063 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4064
5e0d2a6f 4065 init_timer(&unmap_timer);
75f1cdf1
FT
4066#ifdef CONFIG_SWIOTLB
4067 swiotlb = 0;
4068#endif
19943b0e 4069 dma_ops = &intel_dma_ops;
4ed0d3e6 4070
134fac3f 4071 init_iommu_pm_ops();
a8bcbb0d 4072
a5459cfe
AW
4073 for_each_active_iommu(iommu, drhd)
4074 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4075 intel_iommu_groups,
4076 iommu->name);
4077
4236d97d 4078 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4079 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4080 if (si_domain && !hw_pass_through)
4081 register_memory_notifier(&intel_iommu_memory_nb);
99dcaded 4082
8bc1f85c
ED
4083 intel_iommu_enabled = 1;
4084
ba395927 4085 return 0;
9bdc531e
JL
4086
4087out_free_reserved_range:
4088 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4089out_free_dmar:
4090 intel_iommu_free_dmars();
3a5670e8
JL
4091 up_write(&dmar_global_lock);
4092 iommu_exit_mempool();
9bdc531e 4093 return ret;
ba395927 4094}
e820482c 4095
579305f7
AW
4096static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4097{
4098 struct intel_iommu *iommu = opaque;
4099
4100 iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4101 return 0;
4102}
4103
4104/*
4105 * NB - intel-iommu lacks any sort of reference counting for the users of
4106 * dependent devices. If multiple endpoints have intersecting dependent
4107 * devices, unbinding the driver from any one of them will possibly leave
4108 * the others unable to operate.
4109 */
3199aa6b 4110static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 4111 struct device *dev)
3199aa6b 4112{
0bcb3e28 4113 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4114 return;
4115
579305f7 4116 pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
3199aa6b
HW
4117}
4118
2c2e2c38 4119static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 4120 struct device *dev)
c7151a8d 4121{
bca2b916 4122 struct device_domain_info *info, *tmp;
c7151a8d
WH
4123 struct intel_iommu *iommu;
4124 unsigned long flags;
4125 int found = 0;
156baca8 4126 u8 bus, devfn;
c7151a8d 4127
bf9c9eda 4128 iommu = device_to_iommu(dev, &bus, &devfn);
c7151a8d
WH
4129 if (!iommu)
4130 return;
4131
4132 spin_lock_irqsave(&device_domain_lock, flags);
bca2b916 4133 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
bf9c9eda
DW
4134 if (info->iommu == iommu && info->bus == bus &&
4135 info->devfn == devfn) {
109b9b04 4136 unlink_domain_info(info);
c7151a8d
WH
4137 spin_unlock_irqrestore(&device_domain_lock, flags);
4138
93a23a72 4139 iommu_disable_dev_iotlb(info);
c7151a8d 4140 iommu_detach_dev(iommu, info->bus, info->devfn);
bf9c9eda 4141 iommu_detach_dependent_devices(iommu, dev);
c7151a8d
WH
4142 free_devinfo_mem(info);
4143
4144 spin_lock_irqsave(&device_domain_lock, flags);
4145
4146 if (found)
4147 break;
4148 else
4149 continue;
4150 }
4151
4152 /* if there is no other devices under the same iommu
4153 * owned by this domain, clear this iommu in iommu_bmp
4154 * update iommu count and coherency
4155 */
8bbc4410 4156 if (info->iommu == iommu)
c7151a8d
WH
4157 found = 1;
4158 }
4159
3e7abe25
RD
4160 spin_unlock_irqrestore(&device_domain_lock, flags);
4161
c7151a8d
WH
4162 if (found == 0) {
4163 unsigned long tmp_flags;
4164 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
1b198bb0 4165 clear_bit(iommu->seq_id, domain->iommu_bmp);
c7151a8d 4166 domain->iommu_count--;
58c610bd 4167 domain_update_iommu_cap(domain);
c7151a8d 4168 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
a97590e5 4169
9b4554b2
AW
4170 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
4171 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
4172 spin_lock_irqsave(&iommu->lock, tmp_flags);
4173 clear_bit(domain->id, iommu->domain_ids);
4174 iommu->domains[domain->id] = NULL;
4175 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
4176 }
c7151a8d 4177 }
c7151a8d
WH
4178}
4179
2c2e2c38 4180static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4181{
4182 int adjust_width;
4183
4184 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
4185 domain_reserve_special_ranges(domain);
4186
4187 /* calculate AGAW */
4188 domain->gaw = guest_width;
4189 adjust_width = guestwidth_to_adjustwidth(guest_width);
4190 domain->agaw = width_to_agaw(adjust_width);
4191
5e98c4b1 4192 domain->iommu_coherency = 0;
c5b15255 4193 domain->iommu_snooping = 0;
6dd9a7c7 4194 domain->iommu_superpage = 0;
fe40f1e0 4195 domain->max_addr = 0;
5e98c4b1
WH
4196
4197 /* always allocate the top pgd */
4c923d47 4198 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4199 if (!domain->pgd)
4200 return -ENOMEM;
4201 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4202 return 0;
4203}
4204
5d450806 4205static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 4206{
5d450806 4207 struct dmar_domain *dmar_domain;
38717946 4208
92d03cc8 4209 dmar_domain = alloc_domain(true);
5d450806 4210 if (!dmar_domain) {
38717946 4211 printk(KERN_ERR
5d450806
JR
4212 "intel_iommu_domain_init: dmar_domain == NULL\n");
4213 return -ENOMEM;
38717946 4214 }
2c2e2c38 4215 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 4216 printk(KERN_ERR
5d450806 4217 "intel_iommu_domain_init() failed\n");
92d03cc8 4218 domain_exit(dmar_domain);
5d450806 4219 return -ENOMEM;
38717946 4220 }
8140a95d 4221 domain_update_iommu_cap(dmar_domain);
5d450806 4222 domain->priv = dmar_domain;
faa3d6f5 4223
8a0e715b
JR
4224 domain->geometry.aperture_start = 0;
4225 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4226 domain->geometry.force_aperture = true;
4227
5d450806 4228 return 0;
38717946 4229}
38717946 4230
5d450806 4231static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 4232{
5d450806
JR
4233 struct dmar_domain *dmar_domain = domain->priv;
4234
4235 domain->priv = NULL;
92d03cc8 4236 domain_exit(dmar_domain);
38717946 4237}
38717946 4238
4c5478c9
JR
4239static int intel_iommu_attach_device(struct iommu_domain *domain,
4240 struct device *dev)
38717946 4241{
4c5478c9 4242 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0
WH
4243 struct intel_iommu *iommu;
4244 int addr_width;
156baca8 4245 u8 bus, devfn;
faa3d6f5 4246
7207d8f9
DW
4247 /* normally dev is not mapped */
4248 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4249 struct dmar_domain *old_domain;
4250
1525a29a 4251 old_domain = find_domain(dev);
faa3d6f5 4252 if (old_domain) {
2c2e2c38
FY
4253 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
4254 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
bf9c9eda 4255 domain_remove_one_dev_info(old_domain, dev);
faa3d6f5
WH
4256 else
4257 domain_remove_dev_info(old_domain);
4258 }
4259 }
4260
156baca8 4261 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4262 if (!iommu)
4263 return -ENODEV;
4264
4265 /* check if this iommu agaw is sufficient for max mapped address */
4266 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4267 if (addr_width > cap_mgaw(iommu->cap))
4268 addr_width = cap_mgaw(iommu->cap);
4269
4270 if (dmar_domain->max_addr > (1LL << addr_width)) {
4271 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4272 "sufficient for the mapped address (%llx)\n",
a99c47a2 4273 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4274 return -EFAULT;
4275 }
a99c47a2
TL
4276 dmar_domain->gaw = addr_width;
4277
4278 /*
4279 * Knock out extra levels of page tables if necessary
4280 */
4281 while (iommu->agaw < dmar_domain->agaw) {
4282 struct dma_pte *pte;
4283
4284 pte = dmar_domain->pgd;
4285 if (dma_pte_present(pte)) {
25cbff16
SY
4286 dmar_domain->pgd = (struct dma_pte *)
4287 phys_to_virt(dma_pte_addr(pte));
7a661013 4288 free_pgtable_page(pte);
a99c47a2
TL
4289 }
4290 dmar_domain->agaw--;
4291 }
fe40f1e0 4292
5913c9bf 4293 return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
38717946 4294}
38717946 4295
4c5478c9
JR
4296static void intel_iommu_detach_device(struct iommu_domain *domain,
4297 struct device *dev)
38717946 4298{
4c5478c9 4299 struct dmar_domain *dmar_domain = domain->priv;
4c5478c9 4300
bf9c9eda 4301 domain_remove_one_dev_info(dmar_domain, dev);
faa3d6f5 4302}
c7151a8d 4303
b146a1c9
JR
4304static int intel_iommu_map(struct iommu_domain *domain,
4305 unsigned long iova, phys_addr_t hpa,
5009065d 4306 size_t size, int iommu_prot)
faa3d6f5 4307{
dde57a21 4308 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 4309 u64 max_addr;
dde57a21 4310 int prot = 0;
faa3d6f5 4311 int ret;
fe40f1e0 4312
dde57a21
JR
4313 if (iommu_prot & IOMMU_READ)
4314 prot |= DMA_PTE_READ;
4315 if (iommu_prot & IOMMU_WRITE)
4316 prot |= DMA_PTE_WRITE;
9cf06697
SY
4317 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4318 prot |= DMA_PTE_SNP;
dde57a21 4319
163cc52c 4320 max_addr = iova + size;
dde57a21 4321 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4322 u64 end;
4323
4324 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4325 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4326 if (end < max_addr) {
8954da1f 4327 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4328 "sufficient for the mapped address (%llx)\n",
8954da1f 4329 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4330 return -EFAULT;
4331 }
dde57a21 4332 dmar_domain->max_addr = max_addr;
fe40f1e0 4333 }
ad051221
DW
4334 /* Round up size to next multiple of PAGE_SIZE, if it and
4335 the low bits of hpa would take us onto the next page */
88cb6a74 4336 size = aligned_nrpages(hpa, size);
ad051221
DW
4337 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4338 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4339 return ret;
38717946 4340}
38717946 4341
5009065d 4342static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 4343 unsigned long iova, size_t size)
38717946 4344{
dde57a21 4345 struct dmar_domain *dmar_domain = domain->priv;
ea8ea460
DW
4346 struct page *freelist = NULL;
4347 struct intel_iommu *iommu;
4348 unsigned long start_pfn, last_pfn;
4349 unsigned int npages;
4350 int iommu_id, num, ndomains, level = 0;
5cf0a76f
DW
4351
4352 /* Cope with horrid API which requires us to unmap more than the
4353 size argument if it happens to be a large-page mapping. */
4354 if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4355 BUG();
4356
4357 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4358 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 4359
ea8ea460
DW
4360 start_pfn = iova >> VTD_PAGE_SHIFT;
4361 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4362
4363 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4364
4365 npages = last_pfn - start_pfn + 1;
4366
4367 for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4368 iommu = g_iommus[iommu_id];
4369
4370 /*
4371 * find bit position of dmar_domain
4372 */
4373 ndomains = cap_ndoms(iommu->cap);
4374 for_each_set_bit(num, iommu->domain_ids, ndomains) {
4375 if (iommu->domains[num] == dmar_domain)
4376 iommu_flush_iotlb_psi(iommu, num, start_pfn,
4377 npages, !freelist, 0);
4378 }
4379
4380 }
4381
4382 dma_free_pagelist(freelist);
fe40f1e0 4383
163cc52c
DW
4384 if (dmar_domain->max_addr == iova + size)
4385 dmar_domain->max_addr = iova;
b146a1c9 4386
5cf0a76f 4387 return size;
38717946 4388}
38717946 4389
d14d6577 4390static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4391 dma_addr_t iova)
38717946 4392{
d14d6577 4393 struct dmar_domain *dmar_domain = domain->priv;
38717946 4394 struct dma_pte *pte;
5cf0a76f 4395 int level = 0;
faa3d6f5 4396 u64 phys = 0;
38717946 4397
5cf0a76f 4398 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 4399 if (pte)
faa3d6f5 4400 phys = dma_pte_addr(pte);
38717946 4401
faa3d6f5 4402 return phys;
38717946 4403}
a8bcbb0d 4404
dbb9fd86
SY
4405static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4406 unsigned long cap)
4407{
4408 struct dmar_domain *dmar_domain = domain->priv;
4409
4410 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4411 return dmar_domain->iommu_snooping;
323f99cb 4412 if (cap == IOMMU_CAP_INTR_REMAP)
95a02e97 4413 return irq_remapping_enabled;
dbb9fd86
SY
4414
4415 return 0;
4416}
4417
abdfdde2
AW
4418static int intel_iommu_add_device(struct device *dev)
4419{
a5459cfe 4420 struct intel_iommu *iommu;
abdfdde2 4421 struct iommu_group *group;
156baca8 4422 u8 bus, devfn;
70ae6f0d 4423
a5459cfe
AW
4424 iommu = device_to_iommu(dev, &bus, &devfn);
4425 if (!iommu)
70ae6f0d
AW
4426 return -ENODEV;
4427
a5459cfe 4428 iommu_device_link(iommu->iommu_dev, dev);
a4ff1fc2 4429
e17f9ff4 4430 group = iommu_group_get_for_dev(dev);
783f157b 4431
e17f9ff4
AW
4432 if (IS_ERR(group))
4433 return PTR_ERR(group);
bcb71abe 4434
abdfdde2 4435 iommu_group_put(group);
e17f9ff4 4436 return 0;
abdfdde2 4437}
70ae6f0d 4438
abdfdde2
AW
4439static void intel_iommu_remove_device(struct device *dev)
4440{
a5459cfe
AW
4441 struct intel_iommu *iommu;
4442 u8 bus, devfn;
4443
4444 iommu = device_to_iommu(dev, &bus, &devfn);
4445 if (!iommu)
4446 return;
4447
abdfdde2 4448 iommu_group_remove_device(dev);
a5459cfe
AW
4449
4450 iommu_device_unlink(iommu->iommu_dev, dev);
70ae6f0d
AW
4451}
4452
b22f6434 4453static const struct iommu_ops intel_iommu_ops = {
a8bcbb0d
JR
4454 .domain_init = intel_iommu_domain_init,
4455 .domain_destroy = intel_iommu_domain_destroy,
4456 .attach_dev = intel_iommu_attach_device,
4457 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4458 .map = intel_iommu_map,
4459 .unmap = intel_iommu_unmap,
a8bcbb0d 4460 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 4461 .domain_has_cap = intel_iommu_domain_has_cap,
abdfdde2
AW
4462 .add_device = intel_iommu_add_device,
4463 .remove_device = intel_iommu_remove_device,
6d1c56a9 4464 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4465};
9af88143 4466
9452618e
DV
4467static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4468{
4469 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4470 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4471 dmar_map_gfx = 0;
4472}
4473
4474DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4475DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4476DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4477DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4478DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4479DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4480DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4481
d34d6517 4482static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4483{
4484 /*
4485 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4486 * but needs it. Same seems to hold for the desktop versions.
9af88143
DW
4487 */
4488 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4489 rwbf_quirk = 1;
4490}
4491
4492DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4493DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4494DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4495DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4496DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4497DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4498DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4499
eecfd57f
AJ
4500#define GGC 0x52
4501#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4502#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4503#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4504#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4505#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4506#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4507#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4508#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4509
d34d6517 4510static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4511{
4512 unsigned short ggc;
4513
eecfd57f 4514 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4515 return;
4516
eecfd57f 4517 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
4518 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4519 dmar_map_gfx = 0;
6fbcfb3e
DW
4520 } else if (dmar_map_gfx) {
4521 /* we have to ensure the gfx device is idle before we flush */
4522 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4523 intel_iommu_strict = 1;
4524 }
9eecabcb
DW
4525}
4526DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4527DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4528DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4529DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4530
e0fc7e0b
DW
4531/* On Tylersburg chipsets, some BIOSes have been known to enable the
4532 ISOCH DMAR unit for the Azalia sound device, but not give it any
4533 TLB entries, which causes it to deadlock. Check for that. We do
4534 this in a function called from init_dmars(), instead of in a PCI
4535 quirk, because we don't want to print the obnoxious "BIOS broken"
4536 message if VT-d is actually disabled.
4537*/
4538static void __init check_tylersburg_isoch(void)
4539{
4540 struct pci_dev *pdev;
4541 uint32_t vtisochctrl;
4542
4543 /* If there's no Azalia in the system anyway, forget it. */
4544 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4545 if (!pdev)
4546 return;
4547 pci_dev_put(pdev);
4548
4549 /* System Management Registers. Might be hidden, in which case
4550 we can't do the sanity check. But that's OK, because the
4551 known-broken BIOSes _don't_ actually hide it, so far. */
4552 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4553 if (!pdev)
4554 return;
4555
4556 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4557 pci_dev_put(pdev);
4558 return;
4559 }
4560
4561 pci_dev_put(pdev);
4562
4563 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4564 if (vtisochctrl & 1)
4565 return;
4566
4567 /* Drop all bits other than the number of TLB entries */
4568 vtisochctrl &= 0x1c;
4569
4570 /* If we have the recommended number of TLB entries (16), fine. */
4571 if (vtisochctrl == 0x10)
4572 return;
4573
4574 /* Zero TLB entries? You get to ride the short bus to school. */
4575 if (!vtisochctrl) {
4576 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4577 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4578 dmi_get_system_info(DMI_BIOS_VENDOR),
4579 dmi_get_system_info(DMI_BIOS_VERSION),
4580 dmi_get_system_info(DMI_PRODUCT_VERSION));
4581 iommu_identity_mapping |= IDENTMAP_AZALIA;
4582 return;
4583 }
4584
4585 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4586 vtisochctrl);
4587}