]> git.ipfire.org Git - people/arne_f/kernel.git/blame - drivers/iommu/intel-iommu.c
iommu/vt-d: Check for NULL pointer when freeing IOMMU data structure
[people/arne_f/kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
54485c30 27#include <linux/export.h>
ba395927
KA
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
ba395927
KA
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
5e0d2a6f 36#include <linux/timer.h>
38717946 37#include <linux/iova.h>
5d450806 38#include <linux/iommu.h>
38717946 39#include <linux/intel-iommu.h>
134fac3f 40#include <linux/syscore_ops.h>
69575d38 41#include <linux/tboot.h>
adb2fe02 42#include <linux/dmi.h>
5cdede24 43#include <linux/pci-ats.h>
0ee332c1 44#include <linux/memblock.h>
8a8f422d 45#include <asm/irq_remapping.h>
ba395927 46#include <asm/cacheflush.h>
46a7fa27 47#include <asm/iommu.h>
ba395927 48
078e1ee2 49#include "irq_remapping.h"
61e015ac 50#include "pci.h"
078e1ee2 51
5b6985ce
FY
52#define ROOT_SIZE VTD_PAGE_SIZE
53#define CONTEXT_SIZE VTD_PAGE_SIZE
54
ba395927
KA
55#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
56#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 57#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
58
59#define IOAPIC_RANGE_START (0xfee00000)
60#define IOAPIC_RANGE_END (0xfeefffff)
61#define IOVA_START_ADDR (0x1000)
62
63#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
64
4ed0d3e6 65#define MAX_AGAW_WIDTH 64
5c645b35 66#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 67
2ebe3151
DW
68#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
69#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
70
71/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
72 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
73#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
74 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
75#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 76
f27be03b 77#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 78#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 79#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 80
df08cdc7
AM
81/* page table handling */
82#define LEVEL_STRIDE (9)
83#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
84
6d1c56a9
OBC
85/*
86 * This bitmap is used to advertise the page sizes our hardware support
87 * to the IOMMU core, which will then use this information to split
88 * physically contiguous memory regions it is mapping into page sizes
89 * that we support.
90 *
91 * Traditionally the IOMMU core just handed us the mappings directly,
92 * after making sure the size is an order of a 4KiB page and that the
93 * mapping has natural alignment.
94 *
95 * To retain this behavior, we currently advertise that we support
96 * all page sizes that are an order of 4KiB.
97 *
98 * If at some point we'd like to utilize the IOMMU core's new behavior,
99 * we could change this to advertise the real page sizes we support.
100 */
101#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
102
df08cdc7
AM
103static inline int agaw_to_level(int agaw)
104{
105 return agaw + 2;
106}
107
108static inline int agaw_to_width(int agaw)
109{
5c645b35 110 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
111}
112
113static inline int width_to_agaw(int width)
114{
5c645b35 115 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
116}
117
118static inline unsigned int level_to_offset_bits(int level)
119{
120 return (level - 1) * LEVEL_STRIDE;
121}
122
123static inline int pfn_level_offset(unsigned long pfn, int level)
124{
125 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
126}
127
128static inline unsigned long level_mask(int level)
129{
130 return -1UL << level_to_offset_bits(level);
131}
132
133static inline unsigned long level_size(int level)
134{
135 return 1UL << level_to_offset_bits(level);
136}
137
138static inline unsigned long align_to_level(unsigned long pfn, int level)
139{
140 return (pfn + level_size(level) - 1) & level_mask(level);
141}
fd18de50 142
6dd9a7c7
YS
143static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
144{
5c645b35 145 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
146}
147
dd4e8319
DW
148/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
149 are never going to work. */
150static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
151{
152 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
153}
154
155static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
156{
157 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
158}
159static inline unsigned long page_to_dma_pfn(struct page *pg)
160{
161 return mm_to_dma_pfn(page_to_pfn(pg));
162}
163static inline unsigned long virt_to_dma_pfn(void *p)
164{
165 return page_to_dma_pfn(virt_to_page(p));
166}
167
d9630fe9
WH
168/* global iommu list, set NULL for ignored DMAR units */
169static struct intel_iommu **g_iommus;
170
e0fc7e0b 171static void __init check_tylersburg_isoch(void);
9af88143
DW
172static int rwbf_quirk;
173
b779260b
JC
174/*
175 * set to 1 to panic kernel if can't successfully enable VT-d
176 * (used when kernel is launched w/ TXT)
177 */
178static int force_on = 0;
179
46b08e1a
MM
180/*
181 * 0: Present
182 * 1-11: Reserved
183 * 12-63: Context Ptr (12 - (haw-1))
184 * 64-127: Reserved
185 */
186struct root_entry {
187 u64 val;
188 u64 rsvd1;
189};
190#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
191static inline bool root_present(struct root_entry *root)
192{
193 return (root->val & 1);
194}
195static inline void set_root_present(struct root_entry *root)
196{
197 root->val |= 1;
198}
199static inline void set_root_value(struct root_entry *root, unsigned long value)
200{
201 root->val |= value & VTD_PAGE_MASK;
202}
203
204static inline struct context_entry *
205get_context_addr_from_root(struct root_entry *root)
206{
207 return (struct context_entry *)
208 (root_present(root)?phys_to_virt(
209 root->val & VTD_PAGE_MASK) :
210 NULL);
211}
212
7a8fc25e
MM
213/*
214 * low 64 bits:
215 * 0: present
216 * 1: fault processing disable
217 * 2-3: translation type
218 * 12-63: address space root
219 * high 64 bits:
220 * 0-2: address width
221 * 3-6: aval
222 * 8-23: domain id
223 */
224struct context_entry {
225 u64 lo;
226 u64 hi;
227};
c07e7d21
MM
228
229static inline bool context_present(struct context_entry *context)
230{
231 return (context->lo & 1);
232}
233static inline void context_set_present(struct context_entry *context)
234{
235 context->lo |= 1;
236}
237
238static inline void context_set_fault_enable(struct context_entry *context)
239{
240 context->lo &= (((u64)-1) << 2) | 1;
241}
242
c07e7d21
MM
243static inline void context_set_translation_type(struct context_entry *context,
244 unsigned long value)
245{
246 context->lo &= (((u64)-1) << 4) | 3;
247 context->lo |= (value & 3) << 2;
248}
249
250static inline void context_set_address_root(struct context_entry *context,
251 unsigned long value)
252{
253 context->lo |= value & VTD_PAGE_MASK;
254}
255
256static inline void context_set_address_width(struct context_entry *context,
257 unsigned long value)
258{
259 context->hi |= value & 7;
260}
261
262static inline void context_set_domain_id(struct context_entry *context,
263 unsigned long value)
264{
265 context->hi |= (value & ((1 << 16) - 1)) << 8;
266}
267
268static inline void context_clear_entry(struct context_entry *context)
269{
270 context->lo = 0;
271 context->hi = 0;
272}
7a8fc25e 273
622ba12a
MM
274/*
275 * 0: readable
276 * 1: writable
277 * 2-6: reserved
278 * 7: super page
9cf06697
SY
279 * 8-10: available
280 * 11: snoop behavior
622ba12a
MM
281 * 12-63: Host physcial address
282 */
283struct dma_pte {
284 u64 val;
285};
622ba12a 286
19c239ce
MM
287static inline void dma_clear_pte(struct dma_pte *pte)
288{
289 pte->val = 0;
290}
291
19c239ce
MM
292static inline u64 dma_pte_addr(struct dma_pte *pte)
293{
c85994e4
DW
294#ifdef CONFIG_64BIT
295 return pte->val & VTD_PAGE_MASK;
296#else
297 /* Must have a full atomic 64-bit read */
1a8bd481 298 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 299#endif
19c239ce
MM
300}
301
19c239ce
MM
302static inline bool dma_pte_present(struct dma_pte *pte)
303{
304 return (pte->val & 3) != 0;
305}
622ba12a 306
4399c8bf
AK
307static inline bool dma_pte_superpage(struct dma_pte *pte)
308{
309 return (pte->val & (1 << 7));
310}
311
75e6bf96
DW
312static inline int first_pte_in_page(struct dma_pte *pte)
313{
314 return !((unsigned long)pte & ~VTD_PAGE_MASK);
315}
316
2c2e2c38
FY
317/*
318 * This domain is a statically identity mapping domain.
319 * 1. This domain creats a static 1:1 mapping to all usable memory.
320 * 2. It maps to each iommu if successful.
321 * 3. Each iommu mapps to this domain if successful.
322 */
19943b0e
DW
323static struct dmar_domain *si_domain;
324static int hw_pass_through = 1;
2c2e2c38 325
3b5410e7 326/* devices under the same p2p bridge are owned in one domain */
cdc7b837 327#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 328
1ce28feb
WH
329/* domain represents a virtual machine, more than one devices
330 * across iommus may be owned in one domain, e.g. kvm guest.
331 */
332#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
333
2c2e2c38
FY
334/* si_domain contains mulitple devices */
335#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
336
1b198bb0
MT
337/* define the limit of IOMMUs supported in each domain */
338#ifdef CONFIG_X86
339# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
340#else
341# define IOMMU_UNITS_SUPPORTED 64
342#endif
343
99126f7c
MM
344struct dmar_domain {
345 int id; /* domain id */
4c923d47 346 int nid; /* node id */
1b198bb0
MT
347 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
348 /* bitmap of iommus this domain uses*/
99126f7c
MM
349
350 struct list_head devices; /* all devices' list */
351 struct iova_domain iovad; /* iova's that belong to this domain */
352
353 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
354 int gaw; /* max guest address width */
355
356 /* adjusted guest address width, 0 is level 2 30-bit */
357 int agaw;
358
3b5410e7 359 int flags; /* flags to find out type of domain */
8e604097
WH
360
361 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 362 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 363 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
364 int iommu_superpage;/* Level of superpages supported:
365 0 == 4KiB (no superpages), 1 == 2MiB,
366 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 367 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 368 u64 max_addr; /* maximum mapped address */
99126f7c
MM
369};
370
a647dacb
MM
371/* PCI domain-device relationship */
372struct device_domain_info {
373 struct list_head link; /* link to domain siblings */
374 struct list_head global; /* link to global list */
276dbf99
DW
375 int segment; /* PCI domain */
376 u8 bus; /* PCI bus number */
a647dacb 377 u8 devfn; /* PCI devfn number */
45e829ea 378 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 379 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
380 struct dmar_domain *domain; /* pointer to domain */
381};
382
b94e4117
JL
383struct dmar_rmrr_unit {
384 struct list_head list; /* list of rmrr units */
385 struct acpi_dmar_header *hdr; /* ACPI header */
386 u64 base_address; /* reserved base address*/
387 u64 end_address; /* reserved end address */
388 struct pci_dev **devices; /* target devices */
389 int devices_cnt; /* target device count */
390};
391
392struct dmar_atsr_unit {
393 struct list_head list; /* list of ATSR units */
394 struct acpi_dmar_header *hdr; /* ACPI header */
395 struct pci_dev **devices; /* target devices */
396 int devices_cnt; /* target device count */
397 u8 include_all:1; /* include all ports */
398};
399
400static LIST_HEAD(dmar_atsr_units);
401static LIST_HEAD(dmar_rmrr_units);
402
403#define for_each_rmrr_units(rmrr) \
404 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
405
5e0d2a6f 406static void flush_unmaps_timeout(unsigned long data);
407
b707cb02 408static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
5e0d2a6f 409
80b20dd8 410#define HIGH_WATER_MARK 250
411struct deferred_flush_tables {
412 int next;
413 struct iova *iova[HIGH_WATER_MARK];
414 struct dmar_domain *domain[HIGH_WATER_MARK];
415};
416
417static struct deferred_flush_tables *deferred_flush;
418
5e0d2a6f 419/* bitmap for indexing intel_iommus */
5e0d2a6f 420static int g_num_of_iommus;
421
422static DEFINE_SPINLOCK(async_umap_flush_lock);
423static LIST_HEAD(unmaps_to_do);
424
425static int timer_on;
426static long list_size;
5e0d2a6f 427
92d03cc8 428static void domain_exit(struct dmar_domain *domain);
ba395927 429static void domain_remove_dev_info(struct dmar_domain *domain);
b94e4117
JL
430static void domain_remove_one_dev_info(struct dmar_domain *domain,
431 struct pci_dev *pdev);
92d03cc8
JL
432static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
433 struct pci_dev *pdev);
ba395927 434
d3f13810 435#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
436int dmar_disabled = 0;
437#else
438int dmar_disabled = 1;
d3f13810 439#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 440
8bc1f85c
ED
441int intel_iommu_enabled = 0;
442EXPORT_SYMBOL_GPL(intel_iommu_enabled);
443
2d9e667e 444static int dmar_map_gfx = 1;
7d3b03ce 445static int dmar_forcedac;
5e0d2a6f 446static int intel_iommu_strict;
6dd9a7c7 447static int intel_iommu_superpage = 1;
ba395927 448
c0771df8
DW
449int intel_iommu_gfx_mapped;
450EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
451
ba395927
KA
452#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
453static DEFINE_SPINLOCK(device_domain_lock);
454static LIST_HEAD(device_domain_list);
455
a8bcbb0d
JR
456static struct iommu_ops intel_iommu_ops;
457
ba395927
KA
458static int __init intel_iommu_setup(char *str)
459{
460 if (!str)
461 return -EINVAL;
462 while (*str) {
0cd5c3c8
KM
463 if (!strncmp(str, "on", 2)) {
464 dmar_disabled = 0;
465 printk(KERN_INFO "Intel-IOMMU: enabled\n");
466 } else if (!strncmp(str, "off", 3)) {
ba395927 467 dmar_disabled = 1;
0cd5c3c8 468 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
469 } else if (!strncmp(str, "igfx_off", 8)) {
470 dmar_map_gfx = 0;
471 printk(KERN_INFO
472 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 473 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 474 printk(KERN_INFO
7d3b03ce
KA
475 "Intel-IOMMU: Forcing DAC for PCI devices\n");
476 dmar_forcedac = 1;
5e0d2a6f 477 } else if (!strncmp(str, "strict", 6)) {
478 printk(KERN_INFO
479 "Intel-IOMMU: disable batched IOTLB flush\n");
480 intel_iommu_strict = 1;
6dd9a7c7
YS
481 } else if (!strncmp(str, "sp_off", 6)) {
482 printk(KERN_INFO
483 "Intel-IOMMU: disable supported super page\n");
484 intel_iommu_superpage = 0;
ba395927
KA
485 }
486
487 str += strcspn(str, ",");
488 while (*str == ',')
489 str++;
490 }
491 return 0;
492}
493__setup("intel_iommu=", intel_iommu_setup);
494
495static struct kmem_cache *iommu_domain_cache;
496static struct kmem_cache *iommu_devinfo_cache;
497static struct kmem_cache *iommu_iova_cache;
498
4c923d47 499static inline void *alloc_pgtable_page(int node)
eb3fa7cb 500{
4c923d47
SS
501 struct page *page;
502 void *vaddr = NULL;
eb3fa7cb 503
4c923d47
SS
504 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
505 if (page)
506 vaddr = page_address(page);
eb3fa7cb 507 return vaddr;
ba395927
KA
508}
509
510static inline void free_pgtable_page(void *vaddr)
511{
512 free_page((unsigned long)vaddr);
513}
514
515static inline void *alloc_domain_mem(void)
516{
354bb65e 517 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
518}
519
38717946 520static void free_domain_mem(void *vaddr)
ba395927
KA
521{
522 kmem_cache_free(iommu_domain_cache, vaddr);
523}
524
525static inline void * alloc_devinfo_mem(void)
526{
354bb65e 527 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
528}
529
530static inline void free_devinfo_mem(void *vaddr)
531{
532 kmem_cache_free(iommu_devinfo_cache, vaddr);
533}
534
535struct iova *alloc_iova_mem(void)
536{
354bb65e 537 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
538}
539
540void free_iova_mem(struct iova *iova)
541{
542 kmem_cache_free(iommu_iova_cache, iova);
543}
544
1b573683 545
4ed0d3e6 546static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
547{
548 unsigned long sagaw;
549 int agaw = -1;
550
551 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 552 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
553 agaw >= 0; agaw--) {
554 if (test_bit(agaw, &sagaw))
555 break;
556 }
557
558 return agaw;
559}
560
4ed0d3e6
FY
561/*
562 * Calculate max SAGAW for each iommu.
563 */
564int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
565{
566 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
567}
568
569/*
570 * calculate agaw for each iommu.
571 * "SAGAW" may be different across iommus, use a default agaw, and
572 * get a supported less agaw for iommus that don't support the default agaw.
573 */
574int iommu_calculate_agaw(struct intel_iommu *iommu)
575{
576 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
577}
578
2c2e2c38 579/* This functionin only returns single iommu in a domain */
8c11e798
WH
580static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
581{
582 int iommu_id;
583
2c2e2c38 584 /* si_domain and vm domain should not get here. */
1ce28feb 585 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 586 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 587
1b198bb0 588 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
8c11e798
WH
589 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
590 return NULL;
591
592 return g_iommus[iommu_id];
593}
594
8e604097
WH
595static void domain_update_iommu_coherency(struct dmar_domain *domain)
596{
597 int i;
598
2e12bc29
AW
599 i = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
600
601 domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0;
8e604097 602
1b198bb0 603 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
8e604097
WH
604 if (!ecap_coherent(g_iommus[i]->ecap)) {
605 domain->iommu_coherency = 0;
606 break;
607 }
8e604097
WH
608 }
609}
610
58c610bd
SY
611static void domain_update_iommu_snooping(struct dmar_domain *domain)
612{
613 int i;
614
615 domain->iommu_snooping = 1;
616
1b198bb0 617 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
618 if (!ecap_sc_support(g_iommus[i]->ecap)) {
619 domain->iommu_snooping = 0;
620 break;
621 }
58c610bd
SY
622 }
623}
624
6dd9a7c7
YS
625static void domain_update_iommu_superpage(struct dmar_domain *domain)
626{
8140a95d
AK
627 struct dmar_drhd_unit *drhd;
628 struct intel_iommu *iommu = NULL;
629 int mask = 0xf;
6dd9a7c7
YS
630
631 if (!intel_iommu_superpage) {
632 domain->iommu_superpage = 0;
633 return;
634 }
635
8140a95d
AK
636 /* set iommu_superpage to the smallest common denominator */
637 for_each_active_iommu(iommu, drhd) {
638 mask &= cap_super_page_val(iommu->cap);
6dd9a7c7
YS
639 if (!mask) {
640 break;
641 }
642 }
643 domain->iommu_superpage = fls(mask);
644}
645
58c610bd
SY
646/* Some capabilities may be different across iommus */
647static void domain_update_iommu_cap(struct dmar_domain *domain)
648{
649 domain_update_iommu_coherency(domain);
650 domain_update_iommu_snooping(domain);
6dd9a7c7 651 domain_update_iommu_superpage(domain);
58c610bd
SY
652}
653
276dbf99 654static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
c7151a8d
WH
655{
656 struct dmar_drhd_unit *drhd = NULL;
657 int i;
658
7c919779 659 for_each_active_drhd_unit(drhd) {
276dbf99
DW
660 if (segment != drhd->segment)
661 continue;
c7151a8d 662
924b6231 663 for (i = 0; i < drhd->devices_cnt; i++) {
288e4877
DH
664 if (drhd->devices[i] &&
665 drhd->devices[i]->bus->number == bus &&
c7151a8d
WH
666 drhd->devices[i]->devfn == devfn)
667 return drhd->iommu;
4958c5dc
DW
668 if (drhd->devices[i] &&
669 drhd->devices[i]->subordinate &&
924b6231 670 drhd->devices[i]->subordinate->number <= bus &&
b918c62e 671 drhd->devices[i]->subordinate->busn_res.end >= bus)
924b6231
DW
672 return drhd->iommu;
673 }
c7151a8d
WH
674
675 if (drhd->include_all)
676 return drhd->iommu;
677 }
678
679 return NULL;
680}
681
5331fe6f
WH
682static void domain_flush_cache(struct dmar_domain *domain,
683 void *addr, int size)
684{
685 if (!domain->iommu_coherency)
686 clflush_cache_range(addr, size);
687}
688
ba395927
KA
689/* Gets context entry for a given bus and devfn */
690static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
691 u8 bus, u8 devfn)
692{
693 struct root_entry *root;
694 struct context_entry *context;
695 unsigned long phy_addr;
696 unsigned long flags;
697
698 spin_lock_irqsave(&iommu->lock, flags);
699 root = &iommu->root_entry[bus];
700 context = get_context_addr_from_root(root);
701 if (!context) {
4c923d47
SS
702 context = (struct context_entry *)
703 alloc_pgtable_page(iommu->node);
ba395927
KA
704 if (!context) {
705 spin_unlock_irqrestore(&iommu->lock, flags);
706 return NULL;
707 }
5b6985ce 708 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
709 phy_addr = virt_to_phys((void *)context);
710 set_root_value(root, phy_addr);
711 set_root_present(root);
712 __iommu_flush_cache(iommu, root, sizeof(*root));
713 }
714 spin_unlock_irqrestore(&iommu->lock, flags);
715 return &context[devfn];
716}
717
718static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
719{
720 struct root_entry *root;
721 struct context_entry *context;
722 int ret;
723 unsigned long flags;
724
725 spin_lock_irqsave(&iommu->lock, flags);
726 root = &iommu->root_entry[bus];
727 context = get_context_addr_from_root(root);
728 if (!context) {
729 ret = 0;
730 goto out;
731 }
c07e7d21 732 ret = context_present(&context[devfn]);
ba395927
KA
733out:
734 spin_unlock_irqrestore(&iommu->lock, flags);
735 return ret;
736}
737
738static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
739{
740 struct root_entry *root;
741 struct context_entry *context;
742 unsigned long flags;
743
744 spin_lock_irqsave(&iommu->lock, flags);
745 root = &iommu->root_entry[bus];
746 context = get_context_addr_from_root(root);
747 if (context) {
c07e7d21 748 context_clear_entry(&context[devfn]);
ba395927
KA
749 __iommu_flush_cache(iommu, &context[devfn], \
750 sizeof(*context));
751 }
752 spin_unlock_irqrestore(&iommu->lock, flags);
753}
754
755static void free_context_table(struct intel_iommu *iommu)
756{
757 struct root_entry *root;
758 int i;
759 unsigned long flags;
760 struct context_entry *context;
761
762 spin_lock_irqsave(&iommu->lock, flags);
763 if (!iommu->root_entry) {
764 goto out;
765 }
766 for (i = 0; i < ROOT_ENTRY_NR; i++) {
767 root = &iommu->root_entry[i];
768 context = get_context_addr_from_root(root);
769 if (context)
770 free_pgtable_page(context);
771 }
772 free_pgtable_page(iommu->root_entry);
773 iommu->root_entry = NULL;
774out:
775 spin_unlock_irqrestore(&iommu->lock, flags);
776}
777
b026fd28 778static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
4399c8bf 779 unsigned long pfn, int target_level)
ba395927 780{
b026fd28 781 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
782 struct dma_pte *parent, *pte = NULL;
783 int level = agaw_to_level(domain->agaw);
4399c8bf 784 int offset;
ba395927
KA
785
786 BUG_ON(!domain->pgd);
f9423606
JS
787
788 if (addr_width < BITS_PER_LONG && pfn >> addr_width)
789 /* Address beyond IOMMU's addressing capabilities. */
790 return NULL;
791
ba395927
KA
792 parent = domain->pgd;
793
ba395927
KA
794 while (level > 0) {
795 void *tmp_page;
796
b026fd28 797 offset = pfn_level_offset(pfn, level);
ba395927 798 pte = &parent[offset];
4399c8bf 799 if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7
YS
800 break;
801 if (level == target_level)
ba395927
KA
802 break;
803
19c239ce 804 if (!dma_pte_present(pte)) {
c85994e4
DW
805 uint64_t pteval;
806
4c923d47 807 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 808
206a73c1 809 if (!tmp_page)
ba395927 810 return NULL;
206a73c1 811
c85994e4 812 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 813 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
c85994e4
DW
814 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
815 /* Someone else set it while we were thinking; use theirs. */
816 free_pgtable_page(tmp_page);
817 } else {
818 dma_pte_addr(pte);
819 domain_flush_cache(domain, pte, sizeof(*pte));
820 }
ba395927 821 }
19c239ce 822 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
823 level--;
824 }
825
ba395927
KA
826 return pte;
827}
828
6dd9a7c7 829
ba395927 830/* return address's pte at specific level */
90dcfb5e
DW
831static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
832 unsigned long pfn,
6dd9a7c7 833 int level, int *large_page)
ba395927
KA
834{
835 struct dma_pte *parent, *pte = NULL;
836 int total = agaw_to_level(domain->agaw);
837 int offset;
838
839 parent = domain->pgd;
840 while (level <= total) {
90dcfb5e 841 offset = pfn_level_offset(pfn, total);
ba395927
KA
842 pte = &parent[offset];
843 if (level == total)
844 return pte;
845
6dd9a7c7
YS
846 if (!dma_pte_present(pte)) {
847 *large_page = total;
ba395927 848 break;
6dd9a7c7
YS
849 }
850
851 if (pte->val & DMA_PTE_LARGE_PAGE) {
852 *large_page = total;
853 return pte;
854 }
855
19c239ce 856 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
857 total--;
858 }
859 return NULL;
860}
861
ba395927 862/* clear last level pte, a tlb flush should be followed */
292827cb 863static int dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
864 unsigned long start_pfn,
865 unsigned long last_pfn)
ba395927 866{
04b18e65 867 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
6dd9a7c7 868 unsigned int large_page = 1;
310a5ab9 869 struct dma_pte *first_pte, *pte;
66eae846 870
04b18e65 871 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 872 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 873 BUG_ON(start_pfn > last_pfn);
ba395927 874
04b18e65 875 /* we don't need lock here; nobody else touches the iova range */
59c36286 876 do {
6dd9a7c7
YS
877 large_page = 1;
878 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 879 if (!pte) {
6dd9a7c7 880 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
881 continue;
882 }
6dd9a7c7 883 do {
310a5ab9 884 dma_clear_pte(pte);
6dd9a7c7 885 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 886 pte++;
75e6bf96
DW
887 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
888
310a5ab9
DW
889 domain_flush_cache(domain, first_pte,
890 (void *)pte - (void *)first_pte);
59c36286
DW
891
892 } while (start_pfn && start_pfn <= last_pfn);
292827cb 893
5c645b35 894 return min_t(int, (large_page - 1) * 9, MAX_AGAW_PFN_WIDTH);
ba395927
KA
895}
896
3269ee0b
AW
897static void dma_pte_free_level(struct dmar_domain *domain, int level,
898 struct dma_pte *pte, unsigned long pfn,
899 unsigned long start_pfn, unsigned long last_pfn)
900{
901 pfn = max(start_pfn, pfn);
902 pte = &pte[pfn_level_offset(pfn, level)];
903
904 do {
905 unsigned long level_pfn;
906 struct dma_pte *level_pte;
907
908 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
909 goto next;
910
911 level_pfn = pfn & level_mask(level - 1);
912 level_pte = phys_to_virt(dma_pte_addr(pte));
913
914 if (level > 2)
915 dma_pte_free_level(domain, level - 1, level_pte,
916 level_pfn, start_pfn, last_pfn);
917
918 /* If range covers entire pagetable, free it */
919 if (!(start_pfn > level_pfn ||
08336fd2 920 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
921 dma_clear_pte(pte);
922 domain_flush_cache(domain, pte, sizeof(*pte));
923 free_pgtable_page(level_pte);
924 }
925next:
926 pfn += level_size(level);
927 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
928}
929
ba395927
KA
930/* free page table pages. last level pte should already be cleared */
931static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
932 unsigned long start_pfn,
933 unsigned long last_pfn)
ba395927 934{
6660c63a 935 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927 936
6660c63a
DW
937 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
938 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 939 BUG_ON(start_pfn > last_pfn);
ba395927 940
f3a0a52f 941 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
942 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
943 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 944
ba395927 945 /* free pgd */
d794dc9b 946 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
947 free_pgtable_page(domain->pgd);
948 domain->pgd = NULL;
949 }
950}
951
952/* iommu handling */
953static int iommu_alloc_root_entry(struct intel_iommu *iommu)
954{
955 struct root_entry *root;
956 unsigned long flags;
957
4c923d47 958 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
959 if (!root)
960 return -ENOMEM;
961
5b6985ce 962 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
963
964 spin_lock_irqsave(&iommu->lock, flags);
965 iommu->root_entry = root;
966 spin_unlock_irqrestore(&iommu->lock, flags);
967
968 return 0;
969}
970
ba395927
KA
971static void iommu_set_root_entry(struct intel_iommu *iommu)
972{
973 void *addr;
c416daa9 974 u32 sts;
ba395927
KA
975 unsigned long flag;
976
977 addr = iommu->root_entry;
978
1f5b3c3f 979 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
980 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
981
c416daa9 982 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
983
984 /* Make sure hardware complete it */
985 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 986 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 987
1f5b3c3f 988 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
989}
990
991static void iommu_flush_write_buffer(struct intel_iommu *iommu)
992{
993 u32 val;
994 unsigned long flag;
995
9af88143 996 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 997 return;
ba395927 998
1f5b3c3f 999 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1000 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1001
1002 /* Make sure hardware complete it */
1003 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1004 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1005
1f5b3c3f 1006 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1007}
1008
1009/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1010static void __iommu_flush_context(struct intel_iommu *iommu,
1011 u16 did, u16 source_id, u8 function_mask,
1012 u64 type)
ba395927
KA
1013{
1014 u64 val = 0;
1015 unsigned long flag;
1016
ba395927
KA
1017 switch (type) {
1018 case DMA_CCMD_GLOBAL_INVL:
1019 val = DMA_CCMD_GLOBAL_INVL;
1020 break;
1021 case DMA_CCMD_DOMAIN_INVL:
1022 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1023 break;
1024 case DMA_CCMD_DEVICE_INVL:
1025 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1026 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1027 break;
1028 default:
1029 BUG();
1030 }
1031 val |= DMA_CCMD_ICC;
1032
1f5b3c3f 1033 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1034 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1035
1036 /* Make sure hardware complete it */
1037 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1038 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1039
1f5b3c3f 1040 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1041}
1042
ba395927 1043/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1044static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1045 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1046{
1047 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1048 u64 val = 0, val_iva = 0;
1049 unsigned long flag;
1050
ba395927
KA
1051 switch (type) {
1052 case DMA_TLB_GLOBAL_FLUSH:
1053 /* global flush doesn't need set IVA_REG */
1054 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1055 break;
1056 case DMA_TLB_DSI_FLUSH:
1057 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1058 break;
1059 case DMA_TLB_PSI_FLUSH:
1060 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1061 /* Note: always flush non-leaf currently */
1062 val_iva = size_order | addr;
1063 break;
1064 default:
1065 BUG();
1066 }
1067 /* Note: set drain read/write */
1068#if 0
1069 /*
1070 * This is probably to be super secure.. Looks like we can
1071 * ignore it without any impact.
1072 */
1073 if (cap_read_drain(iommu->cap))
1074 val |= DMA_TLB_READ_DRAIN;
1075#endif
1076 if (cap_write_drain(iommu->cap))
1077 val |= DMA_TLB_WRITE_DRAIN;
1078
1f5b3c3f 1079 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1080 /* Note: Only uses first TLB reg currently */
1081 if (val_iva)
1082 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1083 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1084
1085 /* Make sure hardware complete it */
1086 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1087 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1088
1f5b3c3f 1089 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1090
1091 /* check IOTLB invalidation granularity */
1092 if (DMA_TLB_IAIG(val) == 0)
1093 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1094 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1095 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1096 (unsigned long long)DMA_TLB_IIRG(type),
1097 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1098}
1099
93a23a72
YZ
1100static struct device_domain_info *iommu_support_dev_iotlb(
1101 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1102{
1103 int found = 0;
1104 unsigned long flags;
1105 struct device_domain_info *info;
1106 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1107
1108 if (!ecap_dev_iotlb_support(iommu->ecap))
1109 return NULL;
1110
1111 if (!iommu->qi)
1112 return NULL;
1113
1114 spin_lock_irqsave(&device_domain_lock, flags);
1115 list_for_each_entry(info, &domain->devices, link)
1116 if (info->bus == bus && info->devfn == devfn) {
1117 found = 1;
1118 break;
1119 }
1120 spin_unlock_irqrestore(&device_domain_lock, flags);
1121
1122 if (!found || !info->dev)
1123 return NULL;
1124
1125 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1126 return NULL;
1127
1128 if (!dmar_find_matched_atsr_unit(info->dev))
1129 return NULL;
1130
1131 info->iommu = iommu;
1132
1133 return info;
1134}
1135
1136static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1137{
93a23a72
YZ
1138 if (!info)
1139 return;
1140
1141 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1142}
1143
1144static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1145{
1146 if (!info->dev || !pci_ats_enabled(info->dev))
1147 return;
1148
1149 pci_disable_ats(info->dev);
1150}
1151
1152static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1153 u64 addr, unsigned mask)
1154{
1155 u16 sid, qdep;
1156 unsigned long flags;
1157 struct device_domain_info *info;
1158
1159 spin_lock_irqsave(&device_domain_lock, flags);
1160 list_for_each_entry(info, &domain->devices, link) {
1161 if (!info->dev || !pci_ats_enabled(info->dev))
1162 continue;
1163
1164 sid = info->bus << 8 | info->devfn;
1165 qdep = pci_ats_queue_depth(info->dev);
1166 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1167 }
1168 spin_unlock_irqrestore(&device_domain_lock, flags);
1169}
1170
1f0ef2aa 1171static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
82653633 1172 unsigned long pfn, unsigned int pages, int map)
ba395927 1173{
9dd2fe89 1174 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1175 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1176
ba395927
KA
1177 BUG_ON(pages == 0);
1178
ba395927 1179 /*
9dd2fe89
YZ
1180 * Fallback to domain selective flush if no PSI support or the size is
1181 * too big.
ba395927
KA
1182 * PSI requires page size to be 2 ^ x, and the base address is naturally
1183 * aligned to the size
1184 */
9dd2fe89
YZ
1185 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1186 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1187 DMA_TLB_DSI_FLUSH);
9dd2fe89
YZ
1188 else
1189 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1190 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1191
1192 /*
82653633
NA
1193 * In caching mode, changes of pages from non-present to present require
1194 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1195 */
82653633 1196 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1197 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1198}
1199
f8bab735 1200static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1201{
1202 u32 pmen;
1203 unsigned long flags;
1204
1f5b3c3f 1205 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1206 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1207 pmen &= ~DMA_PMEN_EPM;
1208 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1209
1210 /* wait for the protected region status bit to clear */
1211 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1212 readl, !(pmen & DMA_PMEN_PRS), pmen);
1213
1f5b3c3f 1214 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1215}
1216
ba395927
KA
1217static int iommu_enable_translation(struct intel_iommu *iommu)
1218{
1219 u32 sts;
1220 unsigned long flags;
1221
1f5b3c3f 1222 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1223 iommu->gcmd |= DMA_GCMD_TE;
1224 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1225
1226 /* Make sure hardware complete it */
1227 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1228 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1229
1f5b3c3f 1230 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1231 return 0;
1232}
1233
1234static int iommu_disable_translation(struct intel_iommu *iommu)
1235{
1236 u32 sts;
1237 unsigned long flag;
1238
1f5b3c3f 1239 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1240 iommu->gcmd &= ~DMA_GCMD_TE;
1241 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1242
1243 /* Make sure hardware complete it */
1244 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1245 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1246
1f5b3c3f 1247 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1248 return 0;
1249}
1250
3460a6d9 1251
ba395927
KA
1252static int iommu_init_domains(struct intel_iommu *iommu)
1253{
1254 unsigned long ndomains;
1255 unsigned long nlongs;
1256
1257 ndomains = cap_ndoms(iommu->cap);
852bdb04
JL
1258 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1259 iommu->seq_id, ndomains);
ba395927
KA
1260 nlongs = BITS_TO_LONGS(ndomains);
1261
94a91b50
DD
1262 spin_lock_init(&iommu->lock);
1263
ba395927
KA
1264 /* TBD: there might be 64K domains,
1265 * consider other allocation for future chip
1266 */
1267 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1268 if (!iommu->domain_ids) {
852bdb04
JL
1269 pr_err("IOMMU%d: allocating domain id array failed\n",
1270 iommu->seq_id);
ba395927
KA
1271 return -ENOMEM;
1272 }
1273 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1274 GFP_KERNEL);
1275 if (!iommu->domains) {
852bdb04
JL
1276 pr_err("IOMMU%d: allocating domain array failed\n",
1277 iommu->seq_id);
1278 kfree(iommu->domain_ids);
1279 iommu->domain_ids = NULL;
ba395927
KA
1280 return -ENOMEM;
1281 }
1282
1283 /*
1284 * if Caching mode is set, then invalid translations are tagged
1285 * with domainid 0. Hence we need to pre-allocate it.
1286 */
1287 if (cap_caching_mode(iommu->cap))
1288 set_bit(0, iommu->domain_ids);
1289 return 0;
1290}
ba395927 1291
a868e6b7 1292static void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1293{
1294 struct dmar_domain *domain;
5ced12af 1295 int i, count;
c7151a8d 1296 unsigned long flags;
ba395927 1297
94a91b50 1298 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1299 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
a4eaa86c
JL
1300 /*
1301 * Domain id 0 is reserved for invalid translation
1302 * if hardware supports caching mode.
1303 */
1304 if (cap_caching_mode(iommu->cap) && i == 0)
1305 continue;
1306
94a91b50
DD
1307 domain = iommu->domains[i];
1308 clear_bit(i, iommu->domain_ids);
1309
1310 spin_lock_irqsave(&domain->iommu_lock, flags);
5ced12af
JL
1311 count = --domain->iommu_count;
1312 spin_unlock_irqrestore(&domain->iommu_lock, flags);
92d03cc8
JL
1313 if (count == 0)
1314 domain_exit(domain);
5e98c4b1 1315 }
ba395927
KA
1316 }
1317
1318 if (iommu->gcmd & DMA_GCMD_TE)
1319 iommu_disable_translation(iommu);
1320
ba395927
KA
1321 kfree(iommu->domains);
1322 kfree(iommu->domain_ids);
a868e6b7
JL
1323 iommu->domains = NULL;
1324 iommu->domain_ids = NULL;
ba395927 1325
d9630fe9
WH
1326 g_iommus[iommu->seq_id] = NULL;
1327
ba395927
KA
1328 /* free context mapping */
1329 free_context_table(iommu);
ba395927
KA
1330}
1331
92d03cc8 1332static struct dmar_domain *alloc_domain(bool vm)
ba395927 1333{
92d03cc8
JL
1334 /* domain id for virtual machine, it won't be set in context */
1335 static atomic_t vm_domid = ATOMIC_INIT(0);
ba395927 1336 struct dmar_domain *domain;
ba395927
KA
1337
1338 domain = alloc_domain_mem();
1339 if (!domain)
1340 return NULL;
1341
4c923d47 1342 domain->nid = -1;
92d03cc8 1343 domain->iommu_count = 0;
1b198bb0 1344 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
2c2e2c38 1345 domain->flags = 0;
92d03cc8
JL
1346 spin_lock_init(&domain->iommu_lock);
1347 INIT_LIST_HEAD(&domain->devices);
1348 if (vm) {
1349 domain->id = atomic_inc_return(&vm_domid);
1350 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
1351 }
2c2e2c38
FY
1352
1353 return domain;
1354}
1355
1356static int iommu_attach_domain(struct dmar_domain *domain,
1357 struct intel_iommu *iommu)
1358{
1359 int num;
1360 unsigned long ndomains;
1361 unsigned long flags;
1362
ba395927
KA
1363 ndomains = cap_ndoms(iommu->cap);
1364
1365 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1366
ba395927
KA
1367 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1368 if (num >= ndomains) {
1369 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1370 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1371 return -ENOMEM;
ba395927
KA
1372 }
1373
ba395927 1374 domain->id = num;
9ebd682e 1375 domain->iommu_count++;
2c2e2c38 1376 set_bit(num, iommu->domain_ids);
1b198bb0 1377 set_bit(iommu->seq_id, domain->iommu_bmp);
ba395927
KA
1378 iommu->domains[num] = domain;
1379 spin_unlock_irqrestore(&iommu->lock, flags);
1380
2c2e2c38 1381 return 0;
ba395927
KA
1382}
1383
2c2e2c38
FY
1384static void iommu_detach_domain(struct dmar_domain *domain,
1385 struct intel_iommu *iommu)
ba395927
KA
1386{
1387 unsigned long flags;
2c2e2c38 1388 int num, ndomains;
ba395927 1389
8c11e798 1390 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1391 ndomains = cap_ndoms(iommu->cap);
a45946ab 1392 for_each_set_bit(num, iommu->domain_ids, ndomains) {
2c2e2c38 1393 if (iommu->domains[num] == domain) {
92d03cc8
JL
1394 clear_bit(num, iommu->domain_ids);
1395 iommu->domains[num] = NULL;
2c2e2c38
FY
1396 break;
1397 }
2c2e2c38 1398 }
8c11e798 1399 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1400}
1401
1402static struct iova_domain reserved_iova_list;
8a443df4 1403static struct lock_class_key reserved_rbtree_key;
ba395927 1404
51a63e67 1405static int dmar_init_reserved_ranges(void)
ba395927
KA
1406{
1407 struct pci_dev *pdev = NULL;
1408 struct iova *iova;
1409 int i;
ba395927 1410
f661197e 1411 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1412
8a443df4
MG
1413 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1414 &reserved_rbtree_key);
1415
ba395927
KA
1416 /* IOAPIC ranges shouldn't be accessed by DMA */
1417 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1418 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1419 if (!iova) {
ba395927 1420 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1421 return -ENODEV;
1422 }
ba395927
KA
1423
1424 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1425 for_each_pci_dev(pdev) {
1426 struct resource *r;
1427
1428 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1429 r = &pdev->resource[i];
1430 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1431 continue;
1a4a4551
DW
1432 iova = reserve_iova(&reserved_iova_list,
1433 IOVA_PFN(r->start),
1434 IOVA_PFN(r->end));
51a63e67 1435 if (!iova) {
ba395927 1436 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1437 return -ENODEV;
1438 }
ba395927
KA
1439 }
1440 }
51a63e67 1441 return 0;
ba395927
KA
1442}
1443
1444static void domain_reserve_special_ranges(struct dmar_domain *domain)
1445{
1446 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1447}
1448
1449static inline int guestwidth_to_adjustwidth(int gaw)
1450{
1451 int agaw;
1452 int r = (gaw - 12) % 9;
1453
1454 if (r == 0)
1455 agaw = gaw;
1456 else
1457 agaw = gaw + 9 - r;
1458 if (agaw > 64)
1459 agaw = 64;
1460 return agaw;
1461}
1462
1463static int domain_init(struct dmar_domain *domain, int guest_width)
1464{
1465 struct intel_iommu *iommu;
1466 int adjust_width, agaw;
1467 unsigned long sagaw;
1468
f661197e 1469 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1470 domain_reserve_special_ranges(domain);
1471
1472 /* calculate AGAW */
8c11e798 1473 iommu = domain_get_iommu(domain);
ba395927
KA
1474 if (guest_width > cap_mgaw(iommu->cap))
1475 guest_width = cap_mgaw(iommu->cap);
1476 domain->gaw = guest_width;
1477 adjust_width = guestwidth_to_adjustwidth(guest_width);
1478 agaw = width_to_agaw(adjust_width);
1479 sagaw = cap_sagaw(iommu->cap);
1480 if (!test_bit(agaw, &sagaw)) {
1481 /* hardware doesn't support it, choose a bigger one */
1482 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1483 agaw = find_next_bit(&sagaw, 5, agaw);
1484 if (agaw >= 5)
1485 return -ENODEV;
1486 }
1487 domain->agaw = agaw;
ba395927 1488
8e604097
WH
1489 if (ecap_coherent(iommu->ecap))
1490 domain->iommu_coherency = 1;
1491 else
1492 domain->iommu_coherency = 0;
1493
58c610bd
SY
1494 if (ecap_sc_support(iommu->ecap))
1495 domain->iommu_snooping = 1;
1496 else
1497 domain->iommu_snooping = 0;
1498
6dd9a7c7 1499 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
4c923d47 1500 domain->nid = iommu->node;
c7151a8d 1501
ba395927 1502 /* always allocate the top pgd */
4c923d47 1503 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1504 if (!domain->pgd)
1505 return -ENOMEM;
5b6985ce 1506 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1507 return 0;
1508}
1509
1510static void domain_exit(struct dmar_domain *domain)
1511{
2c2e2c38
FY
1512 struct dmar_drhd_unit *drhd;
1513 struct intel_iommu *iommu;
ba395927
KA
1514
1515 /* Domain 0 is reserved, so dont process it */
1516 if (!domain)
1517 return;
1518
7b668357
AW
1519 /* Flush any lazy unmaps that may reference this domain */
1520 if (!intel_iommu_strict)
1521 flush_unmaps_timeout(0);
1522
92d03cc8 1523 /* remove associated devices */
ba395927 1524 domain_remove_dev_info(domain);
92d03cc8 1525
ba395927
KA
1526 /* destroy iovas */
1527 put_iova_domain(&domain->iovad);
ba395927
KA
1528
1529 /* clear ptes */
595badf5 1530 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927
KA
1531
1532 /* free page tables */
d794dc9b 1533 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1534
92d03cc8 1535 /* clear attached or cached domains */
2c2e2c38 1536 for_each_active_iommu(iommu, drhd)
92d03cc8
JL
1537 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1538 test_bit(iommu->seq_id, domain->iommu_bmp))
2c2e2c38
FY
1539 iommu_detach_domain(domain, iommu);
1540
ba395927
KA
1541 free_domain_mem(domain);
1542}
1543
4ed0d3e6
FY
1544static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1545 u8 bus, u8 devfn, int translation)
ba395927
KA
1546{
1547 struct context_entry *context;
ba395927 1548 unsigned long flags;
5331fe6f 1549 struct intel_iommu *iommu;
ea6606b0
WH
1550 struct dma_pte *pgd;
1551 unsigned long num;
1552 unsigned long ndomains;
1553 int id;
1554 int agaw;
93a23a72 1555 struct device_domain_info *info = NULL;
ba395927
KA
1556
1557 pr_debug("Set context mapping for %02x:%02x.%d\n",
1558 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1559
ba395927 1560 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1561 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1562 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1563
276dbf99 1564 iommu = device_to_iommu(segment, bus, devfn);
5331fe6f
WH
1565 if (!iommu)
1566 return -ENODEV;
1567
ba395927
KA
1568 context = device_to_context_entry(iommu, bus, devfn);
1569 if (!context)
1570 return -ENOMEM;
1571 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1572 if (context_present(context)) {
ba395927
KA
1573 spin_unlock_irqrestore(&iommu->lock, flags);
1574 return 0;
1575 }
1576
ea6606b0
WH
1577 id = domain->id;
1578 pgd = domain->pgd;
1579
2c2e2c38
FY
1580 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1581 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1582 int found = 0;
1583
1584 /* find an available domain id for this device in iommu */
1585 ndomains = cap_ndoms(iommu->cap);
a45946ab 1586 for_each_set_bit(num, iommu->domain_ids, ndomains) {
ea6606b0
WH
1587 if (iommu->domains[num] == domain) {
1588 id = num;
1589 found = 1;
1590 break;
1591 }
ea6606b0
WH
1592 }
1593
1594 if (found == 0) {
1595 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1596 if (num >= ndomains) {
1597 spin_unlock_irqrestore(&iommu->lock, flags);
1598 printk(KERN_ERR "IOMMU: no free domain ids\n");
1599 return -EFAULT;
1600 }
1601
1602 set_bit(num, iommu->domain_ids);
1603 iommu->domains[num] = domain;
1604 id = num;
1605 }
1606
1607 /* Skip top levels of page tables for
1608 * iommu which has less agaw than default.
1672af11 1609 * Unnecessary for PT mode.
ea6606b0 1610 */
1672af11
CW
1611 if (translation != CONTEXT_TT_PASS_THROUGH) {
1612 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1613 pgd = phys_to_virt(dma_pte_addr(pgd));
1614 if (!dma_pte_present(pgd)) {
1615 spin_unlock_irqrestore(&iommu->lock, flags);
1616 return -ENOMEM;
1617 }
ea6606b0
WH
1618 }
1619 }
1620 }
1621
1622 context_set_domain_id(context, id);
4ed0d3e6 1623
93a23a72
YZ
1624 if (translation != CONTEXT_TT_PASS_THROUGH) {
1625 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1626 translation = info ? CONTEXT_TT_DEV_IOTLB :
1627 CONTEXT_TT_MULTI_LEVEL;
1628 }
4ed0d3e6
FY
1629 /*
1630 * In pass through mode, AW must be programmed to indicate the largest
1631 * AGAW value supported by hardware. And ASR is ignored by hardware.
1632 */
93a23a72 1633 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1634 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1635 else {
1636 context_set_address_root(context, virt_to_phys(pgd));
1637 context_set_address_width(context, iommu->agaw);
1638 }
4ed0d3e6
FY
1639
1640 context_set_translation_type(context, translation);
c07e7d21
MM
1641 context_set_fault_enable(context);
1642 context_set_present(context);
5331fe6f 1643 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1644
4c25a2c1
DW
1645 /*
1646 * It's a non-present to present mapping. If hardware doesn't cache
1647 * non-present entry we only need to flush the write-buffer. If the
1648 * _does_ cache non-present entries, then it does so in the special
1649 * domain #0, which we have to flush:
1650 */
1651 if (cap_caching_mode(iommu->cap)) {
1652 iommu->flush.flush_context(iommu, 0,
1653 (((u16)bus) << 8) | devfn,
1654 DMA_CCMD_MASK_NOBIT,
1655 DMA_CCMD_DEVICE_INVL);
82653633 1656 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1657 } else {
ba395927 1658 iommu_flush_write_buffer(iommu);
4c25a2c1 1659 }
93a23a72 1660 iommu_enable_dev_iotlb(info);
ba395927 1661 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1662
1663 spin_lock_irqsave(&domain->iommu_lock, flags);
1b198bb0 1664 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
c7151a8d 1665 domain->iommu_count++;
4c923d47
SS
1666 if (domain->iommu_count == 1)
1667 domain->nid = iommu->node;
58c610bd 1668 domain_update_iommu_cap(domain);
c7151a8d
WH
1669 }
1670 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1671 return 0;
1672}
1673
1674static int
4ed0d3e6
FY
1675domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1676 int translation)
ba395927
KA
1677{
1678 int ret;
1679 struct pci_dev *tmp, *parent;
1680
276dbf99 1681 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
4ed0d3e6
FY
1682 pdev->bus->number, pdev->devfn,
1683 translation);
ba395927
KA
1684 if (ret)
1685 return ret;
1686
1687 /* dependent device mapping */
1688 tmp = pci_find_upstream_pcie_bridge(pdev);
1689 if (!tmp)
1690 return 0;
1691 /* Secondary interface's bus number and devfn 0 */
1692 parent = pdev->bus->self;
1693 while (parent != tmp) {
276dbf99
DW
1694 ret = domain_context_mapping_one(domain,
1695 pci_domain_nr(parent->bus),
1696 parent->bus->number,
4ed0d3e6 1697 parent->devfn, translation);
ba395927
KA
1698 if (ret)
1699 return ret;
1700 parent = parent->bus->self;
1701 }
45e829ea 1702 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
ba395927 1703 return domain_context_mapping_one(domain,
276dbf99 1704 pci_domain_nr(tmp->subordinate),
4ed0d3e6
FY
1705 tmp->subordinate->number, 0,
1706 translation);
ba395927
KA
1707 else /* this is a legacy PCI bridge */
1708 return domain_context_mapping_one(domain,
276dbf99
DW
1709 pci_domain_nr(tmp->bus),
1710 tmp->bus->number,
4ed0d3e6
FY
1711 tmp->devfn,
1712 translation);
ba395927
KA
1713}
1714
5331fe6f 1715static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1716{
1717 int ret;
1718 struct pci_dev *tmp, *parent;
5331fe6f
WH
1719 struct intel_iommu *iommu;
1720
276dbf99
DW
1721 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1722 pdev->devfn);
5331fe6f
WH
1723 if (!iommu)
1724 return -ENODEV;
ba395927 1725
276dbf99 1726 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
ba395927
KA
1727 if (!ret)
1728 return ret;
1729 /* dependent device mapping */
1730 tmp = pci_find_upstream_pcie_bridge(pdev);
1731 if (!tmp)
1732 return ret;
1733 /* Secondary interface's bus number and devfn 0 */
1734 parent = pdev->bus->self;
1735 while (parent != tmp) {
8c11e798 1736 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1737 parent->devfn);
ba395927
KA
1738 if (!ret)
1739 return ret;
1740 parent = parent->bus->self;
1741 }
5f4d91a1 1742 if (pci_is_pcie(tmp))
276dbf99
DW
1743 return device_context_mapped(iommu, tmp->subordinate->number,
1744 0);
ba395927 1745 else
276dbf99
DW
1746 return device_context_mapped(iommu, tmp->bus->number,
1747 tmp->devfn);
ba395927
KA
1748}
1749
f532959b
FY
1750/* Returns a number of VTD pages, but aligned to MM page size */
1751static inline unsigned long aligned_nrpages(unsigned long host_addr,
1752 size_t size)
1753{
1754 host_addr &= ~PAGE_MASK;
1755 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1756}
1757
6dd9a7c7
YS
1758/* Return largest possible superpage level for a given mapping */
1759static inline int hardware_largepage_caps(struct dmar_domain *domain,
1760 unsigned long iov_pfn,
1761 unsigned long phy_pfn,
1762 unsigned long pages)
1763{
1764 int support, level = 1;
1765 unsigned long pfnmerge;
1766
1767 support = domain->iommu_superpage;
1768
1769 /* To use a large page, the virtual *and* physical addresses
1770 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1771 of them will mean we have to use smaller pages. So just
1772 merge them and check both at once. */
1773 pfnmerge = iov_pfn | phy_pfn;
1774
1775 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1776 pages >>= VTD_STRIDE_SHIFT;
1777 if (!pages)
1778 break;
1779 pfnmerge >>= VTD_STRIDE_SHIFT;
1780 level++;
1781 support--;
1782 }
1783 return level;
1784}
1785
9051aa02
DW
1786static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1787 struct scatterlist *sg, unsigned long phys_pfn,
1788 unsigned long nr_pages, int prot)
e1605495
DW
1789{
1790 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1791 phys_addr_t uninitialized_var(pteval);
e1605495 1792 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1793 unsigned long sg_res;
6dd9a7c7
YS
1794 unsigned int largepage_lvl = 0;
1795 unsigned long lvl_pages = 0;
e1605495
DW
1796
1797 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1798
1799 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1800 return -EINVAL;
1801
1802 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1803
9051aa02
DW
1804 if (sg)
1805 sg_res = 0;
1806 else {
1807 sg_res = nr_pages + 1;
1808 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1809 }
1810
6dd9a7c7 1811 while (nr_pages > 0) {
c85994e4
DW
1812 uint64_t tmp;
1813
e1605495 1814 if (!sg_res) {
f532959b 1815 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1816 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1817 sg->dma_length = sg->length;
1818 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 1819 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 1820 }
6dd9a7c7 1821
e1605495 1822 if (!pte) {
6dd9a7c7
YS
1823 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1824
1825 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
e1605495
DW
1826 if (!pte)
1827 return -ENOMEM;
6dd9a7c7 1828 /* It is large page*/
6491d4d0 1829 if (largepage_lvl > 1) {
6dd9a7c7 1830 pteval |= DMA_PTE_LARGE_PAGE;
6491d4d0
WD
1831 /* Ensure that old small page tables are removed to make room
1832 for superpage, if they exist. */
1833 dma_pte_clear_range(domain, iov_pfn,
1834 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1835 dma_pte_free_pagetable(domain, iov_pfn,
1836 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1837 } else {
6dd9a7c7 1838 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 1839 }
6dd9a7c7 1840
e1605495
DW
1841 }
1842 /* We don't need lock here, nobody else
1843 * touches the iova range
1844 */
7766a3fb 1845 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 1846 if (tmp) {
1bf20f0d 1847 static int dumps = 5;
c85994e4
DW
1848 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1849 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
1850 if (dumps) {
1851 dumps--;
1852 debug_dma_dump_mappings(NULL);
1853 }
1854 WARN_ON(1);
1855 }
6dd9a7c7
YS
1856
1857 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1858
1859 BUG_ON(nr_pages < lvl_pages);
1860 BUG_ON(sg_res < lvl_pages);
1861
1862 nr_pages -= lvl_pages;
1863 iov_pfn += lvl_pages;
1864 phys_pfn += lvl_pages;
1865 pteval += lvl_pages * VTD_PAGE_SIZE;
1866 sg_res -= lvl_pages;
1867
1868 /* If the next PTE would be the first in a new page, then we
1869 need to flush the cache on the entries we've just written.
1870 And then we'll need to recalculate 'pte', so clear it and
1871 let it get set again in the if (!pte) block above.
1872
1873 If we're done (!nr_pages) we need to flush the cache too.
1874
1875 Also if we've been setting superpages, we may need to
1876 recalculate 'pte' and switch back to smaller pages for the
1877 end of the mapping, if the trailing size is not enough to
1878 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 1879 pte++;
6dd9a7c7
YS
1880 if (!nr_pages || first_pte_in_page(pte) ||
1881 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
1882 domain_flush_cache(domain, first_pte,
1883 (void *)pte - (void *)first_pte);
1884 pte = NULL;
1885 }
6dd9a7c7
YS
1886
1887 if (!sg_res && nr_pages)
e1605495
DW
1888 sg = sg_next(sg);
1889 }
1890 return 0;
1891}
1892
9051aa02
DW
1893static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1894 struct scatterlist *sg, unsigned long nr_pages,
1895 int prot)
ba395927 1896{
9051aa02
DW
1897 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1898}
6f6a00e4 1899
9051aa02
DW
1900static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1901 unsigned long phys_pfn, unsigned long nr_pages,
1902 int prot)
1903{
1904 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
1905}
1906
c7151a8d 1907static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1908{
c7151a8d
WH
1909 if (!iommu)
1910 return;
8c11e798
WH
1911
1912 clear_context_table(iommu, bus, devfn);
1913 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 1914 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 1915 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
1916}
1917
109b9b04
DW
1918static inline void unlink_domain_info(struct device_domain_info *info)
1919{
1920 assert_spin_locked(&device_domain_lock);
1921 list_del(&info->link);
1922 list_del(&info->global);
1923 if (info->dev)
1924 info->dev->dev.archdata.iommu = NULL;
1925}
1926
ba395927
KA
1927static void domain_remove_dev_info(struct dmar_domain *domain)
1928{
1929 struct device_domain_info *info;
92d03cc8 1930 unsigned long flags, flags2;
c7151a8d 1931 struct intel_iommu *iommu;
ba395927
KA
1932
1933 spin_lock_irqsave(&device_domain_lock, flags);
1934 while (!list_empty(&domain->devices)) {
1935 info = list_entry(domain->devices.next,
1936 struct device_domain_info, link);
109b9b04 1937 unlink_domain_info(info);
ba395927
KA
1938 spin_unlock_irqrestore(&device_domain_lock, flags);
1939
93a23a72 1940 iommu_disable_dev_iotlb(info);
276dbf99 1941 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 1942 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927 1943
92d03cc8
JL
1944 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
1945 iommu_detach_dependent_devices(iommu, info->dev);
1946 /* clear this iommu in iommu_bmp, update iommu count
1947 * and capabilities
1948 */
1949 spin_lock_irqsave(&domain->iommu_lock, flags2);
1950 if (test_and_clear_bit(iommu->seq_id,
1951 domain->iommu_bmp)) {
1952 domain->iommu_count--;
1953 domain_update_iommu_cap(domain);
1954 }
1955 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
1956 }
1957
1958 free_devinfo_mem(info);
ba395927
KA
1959 spin_lock_irqsave(&device_domain_lock, flags);
1960 }
1961 spin_unlock_irqrestore(&device_domain_lock, flags);
1962}
1963
1964/*
1965 * find_domain
358dd8ac 1966 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1967 */
38717946 1968static struct dmar_domain *
ba395927
KA
1969find_domain(struct pci_dev *pdev)
1970{
1971 struct device_domain_info *info;
1972
1973 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1974 info = pdev->dev.archdata.iommu;
ba395927
KA
1975 if (info)
1976 return info->domain;
1977 return NULL;
1978}
1979
745f2586
JL
1980static inline struct dmar_domain *
1981dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
1982{
1983 struct device_domain_info *info;
1984
1985 list_for_each_entry(info, &device_domain_list, global)
1986 if (info->segment == segment && info->bus == bus &&
1987 info->devfn == devfn)
1988 return info->domain;
1989
1990 return NULL;
1991}
1992
1993static int dmar_insert_dev_info(int segment, int bus, int devfn,
1994 struct pci_dev *dev, struct dmar_domain **domp)
1995{
1996 struct dmar_domain *found, *domain = *domp;
1997 struct device_domain_info *info;
1998 unsigned long flags;
1999
2000 info = alloc_devinfo_mem();
2001 if (!info)
2002 return -ENOMEM;
2003
2004 info->segment = segment;
2005 info->bus = bus;
2006 info->devfn = devfn;
2007 info->dev = dev;
2008 info->domain = domain;
2009 if (!dev)
2010 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2011
2012 spin_lock_irqsave(&device_domain_lock, flags);
2013 if (dev)
2014 found = find_domain(dev);
2015 else
2016 found = dmar_search_domain_by_dev_info(segment, bus, devfn);
2017 if (found) {
2018 spin_unlock_irqrestore(&device_domain_lock, flags);
2019 free_devinfo_mem(info);
2020 if (found != domain) {
2021 domain_exit(domain);
2022 *domp = found;
2023 }
2024 } else {
2025 list_add(&info->link, &domain->devices);
2026 list_add(&info->global, &device_domain_list);
2027 if (dev)
2028 dev->dev.archdata.iommu = info;
2029 spin_unlock_irqrestore(&device_domain_lock, flags);
2030 }
2031
2032 return 0;
2033}
2034
ba395927
KA
2035/* domain is initialized */
2036static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
2037{
e85bb5d4 2038 struct dmar_domain *domain, *free = NULL;
ba395927
KA
2039 struct intel_iommu *iommu;
2040 struct dmar_drhd_unit *drhd;
ba395927
KA
2041 struct pci_dev *dev_tmp;
2042 unsigned long flags;
2043 int bus = 0, devfn = 0;
276dbf99 2044 int segment;
ba395927
KA
2045
2046 domain = find_domain(pdev);
2047 if (domain)
2048 return domain;
2049
276dbf99
DW
2050 segment = pci_domain_nr(pdev->bus);
2051
ba395927
KA
2052 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
2053 if (dev_tmp) {
5f4d91a1 2054 if (pci_is_pcie(dev_tmp)) {
ba395927
KA
2055 bus = dev_tmp->subordinate->number;
2056 devfn = 0;
2057 } else {
2058 bus = dev_tmp->bus->number;
2059 devfn = dev_tmp->devfn;
2060 }
2061 spin_lock_irqsave(&device_domain_lock, flags);
745f2586 2062 domain = dmar_search_domain_by_dev_info(segment, bus, devfn);
ba395927
KA
2063 spin_unlock_irqrestore(&device_domain_lock, flags);
2064 /* pcie-pci bridge already has a domain, uses it */
745f2586 2065 if (domain)
ba395927 2066 goto found_domain;
ba395927
KA
2067 }
2068
ba395927
KA
2069 drhd = dmar_find_matched_drhd_unit(pdev);
2070 if (!drhd) {
2071 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
2072 pci_name(pdev));
2073 return NULL;
2074 }
2075 iommu = drhd->iommu;
2076
745f2586 2077 /* Allocate and intialize new domain for the device */
92d03cc8 2078 domain = alloc_domain(false);
745f2586
JL
2079 if (!domain)
2080 goto error;
2081 if (iommu_attach_domain(domain, iommu)) {
2fe9723d 2082 free_domain_mem(domain);
ba395927 2083 goto error;
2c2e2c38 2084 }
e85bb5d4
JL
2085 free = domain;
2086 if (domain_init(domain, gaw))
ba395927 2087 goto error;
ba395927
KA
2088
2089 /* register pcie-to-pci device */
2090 if (dev_tmp) {
e85bb5d4 2091 if (dmar_insert_dev_info(segment, bus, devfn, NULL, &domain))
ba395927 2092 goto error;
e85bb5d4
JL
2093 else
2094 free = NULL;
ba395927
KA
2095 }
2096
2097found_domain:
745f2586
JL
2098 if (dmar_insert_dev_info(segment, pdev->bus->number, pdev->devfn,
2099 pdev, &domain) == 0)
ba395927 2100 return domain;
ba395927 2101error:
e85bb5d4
JL
2102 if (free)
2103 domain_exit(free);
ba395927
KA
2104 /* recheck it here, maybe others set it */
2105 return find_domain(pdev);
2106}
2107
2c2e2c38 2108static int iommu_identity_mapping;
e0fc7e0b
DW
2109#define IDENTMAP_ALL 1
2110#define IDENTMAP_GFX 2
2111#define IDENTMAP_AZALIA 4
2c2e2c38 2112
b213203e
DW
2113static int iommu_domain_identity_map(struct dmar_domain *domain,
2114 unsigned long long start,
2115 unsigned long long end)
ba395927 2116{
c5395d5c
DW
2117 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2118 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2119
2120 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2121 dma_to_mm_pfn(last_vpfn))) {
ba395927 2122 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2123 return -ENOMEM;
ba395927
KA
2124 }
2125
c5395d5c
DW
2126 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2127 start, end, domain->id);
ba395927
KA
2128 /*
2129 * RMRR range might have overlap with physical memory range,
2130 * clear it first
2131 */
c5395d5c 2132 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2133
c5395d5c
DW
2134 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2135 last_vpfn - first_vpfn + 1,
61df7443 2136 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2137}
2138
2139static int iommu_prepare_identity_map(struct pci_dev *pdev,
2140 unsigned long long start,
2141 unsigned long long end)
2142{
2143 struct dmar_domain *domain;
2144 int ret;
2145
c7ab48d2 2146 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2147 if (!domain)
2148 return -ENOMEM;
2149
19943b0e
DW
2150 /* For _hardware_ passthrough, don't bother. But for software
2151 passthrough, we do it anyway -- it may indicate a memory
2152 range which is reserved in E820, so which didn't get set
2153 up to start with in si_domain */
2154 if (domain == si_domain && hw_pass_through) {
2155 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2156 pci_name(pdev), start, end);
2157 return 0;
2158 }
2159
2160 printk(KERN_INFO
2161 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2162 pci_name(pdev), start, end);
2ff729f5 2163
5595b528
DW
2164 if (end < start) {
2165 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2166 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2167 dmi_get_system_info(DMI_BIOS_VENDOR),
2168 dmi_get_system_info(DMI_BIOS_VERSION),
2169 dmi_get_system_info(DMI_PRODUCT_VERSION));
2170 ret = -EIO;
2171 goto error;
2172 }
2173
2ff729f5
DW
2174 if (end >> agaw_to_width(domain->agaw)) {
2175 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2176 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2177 agaw_to_width(domain->agaw),
2178 dmi_get_system_info(DMI_BIOS_VENDOR),
2179 dmi_get_system_info(DMI_BIOS_VERSION),
2180 dmi_get_system_info(DMI_PRODUCT_VERSION));
2181 ret = -EIO;
2182 goto error;
2183 }
19943b0e 2184
b213203e 2185 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2186 if (ret)
2187 goto error;
2188
2189 /* context entry init */
4ed0d3e6 2190 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2191 if (ret)
2192 goto error;
2193
2194 return 0;
2195
2196 error:
ba395927
KA
2197 domain_exit(domain);
2198 return ret;
ba395927
KA
2199}
2200
2201static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2202 struct pci_dev *pdev)
2203{
358dd8ac 2204 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2205 return 0;
2206 return iommu_prepare_identity_map(pdev, rmrr->base_address,
70e535d1 2207 rmrr->end_address);
ba395927
KA
2208}
2209
d3f13810 2210#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2211static inline void iommu_prepare_isa(void)
2212{
2213 struct pci_dev *pdev;
2214 int ret;
2215
2216 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2217 if (!pdev)
2218 return;
2219
c7ab48d2 2220 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
70e535d1 2221 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
49a0429e
KA
2222
2223 if (ret)
c7ab48d2
DW
2224 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2225 "floppy might not work\n");
49a0429e
KA
2226
2227}
2228#else
2229static inline void iommu_prepare_isa(void)
2230{
2231 return;
2232}
d3f13810 2233#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2234
2c2e2c38 2235static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2236
071e1374 2237static int __init si_domain_init(int hw)
2c2e2c38
FY
2238{
2239 struct dmar_drhd_unit *drhd;
2240 struct intel_iommu *iommu;
c7ab48d2 2241 int nid, ret = 0;
2c2e2c38 2242
92d03cc8 2243 si_domain = alloc_domain(false);
2c2e2c38
FY
2244 if (!si_domain)
2245 return -EFAULT;
2246
92d03cc8
JL
2247 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2248
2c2e2c38
FY
2249 for_each_active_iommu(iommu, drhd) {
2250 ret = iommu_attach_domain(si_domain, iommu);
2251 if (ret) {
2252 domain_exit(si_domain);
2253 return -EFAULT;
2254 }
2255 }
2256
2257 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2258 domain_exit(si_domain);
2259 return -EFAULT;
2260 }
2261
9544c003
JL
2262 pr_debug("IOMMU: identity mapping domain is domain %d\n",
2263 si_domain->id);
2c2e2c38 2264
19943b0e
DW
2265 if (hw)
2266 return 0;
2267
c7ab48d2 2268 for_each_online_node(nid) {
5dfe8660
TH
2269 unsigned long start_pfn, end_pfn;
2270 int i;
2271
2272 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2273 ret = iommu_domain_identity_map(si_domain,
2274 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2275 if (ret)
2276 return ret;
2277 }
c7ab48d2
DW
2278 }
2279
2c2e2c38
FY
2280 return 0;
2281}
2282
2c2e2c38
FY
2283static int identity_mapping(struct pci_dev *pdev)
2284{
2285 struct device_domain_info *info;
2286
2287 if (likely(!iommu_identity_mapping))
2288 return 0;
2289
cb452a40
MT
2290 info = pdev->dev.archdata.iommu;
2291 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2292 return (info->domain == si_domain);
2c2e2c38 2293
2c2e2c38
FY
2294 return 0;
2295}
2296
2297static int domain_add_dev_info(struct dmar_domain *domain,
5fe60f4e
DW
2298 struct pci_dev *pdev,
2299 int translation)
2c2e2c38
FY
2300{
2301 struct device_domain_info *info;
2302 unsigned long flags;
5fe60f4e 2303 int ret;
2c2e2c38
FY
2304
2305 info = alloc_devinfo_mem();
2306 if (!info)
2307 return -ENOMEM;
2308
2309 info->segment = pci_domain_nr(pdev->bus);
2310 info->bus = pdev->bus->number;
2311 info->devfn = pdev->devfn;
2312 info->dev = pdev;
2313 info->domain = domain;
2314
2315 spin_lock_irqsave(&device_domain_lock, flags);
2316 list_add(&info->link, &domain->devices);
2317 list_add(&info->global, &device_domain_list);
2318 pdev->dev.archdata.iommu = info;
2319 spin_unlock_irqrestore(&device_domain_lock, flags);
2320
e2ad23d0
DW
2321 ret = domain_context_mapping(domain, pdev, translation);
2322 if (ret) {
2323 spin_lock_irqsave(&device_domain_lock, flags);
109b9b04 2324 unlink_domain_info(info);
e2ad23d0
DW
2325 spin_unlock_irqrestore(&device_domain_lock, flags);
2326 free_devinfo_mem(info);
2327 return ret;
2328 }
2329
2c2e2c38
FY
2330 return 0;
2331}
2332
ea2447f7
TM
2333static bool device_has_rmrr(struct pci_dev *dev)
2334{
2335 struct dmar_rmrr_unit *rmrr;
2336 int i;
2337
2338 for_each_rmrr_units(rmrr) {
2339 for (i = 0; i < rmrr->devices_cnt; i++) {
2340 /*
2341 * Return TRUE if this RMRR contains the device that
2342 * is passed in.
2343 */
2344 if (rmrr->devices[i] == dev)
2345 return true;
2346 }
2347 }
2348 return false;
2349}
2350
6941af28
DW
2351static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2352{
ea2447f7
TM
2353
2354 /*
2355 * We want to prevent any device associated with an RMRR from
2356 * getting placed into the SI Domain. This is done because
2357 * problems exist when devices are moved in and out of domains
2358 * and their respective RMRR info is lost. We exempt USB devices
2359 * from this process due to their usage of RMRRs that are known
2360 * to not be needed after BIOS hand-off to OS.
2361 */
2362 if (device_has_rmrr(pdev) &&
2363 (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2364 return 0;
2365
e0fc7e0b
DW
2366 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2367 return 1;
2368
2369 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2370 return 1;
2371
2372 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2373 return 0;
6941af28 2374
3dfc813d
DW
2375 /*
2376 * We want to start off with all devices in the 1:1 domain, and
2377 * take them out later if we find they can't access all of memory.
2378 *
2379 * However, we can't do this for PCI devices behind bridges,
2380 * because all PCI devices behind the same bridge will end up
2381 * with the same source-id on their transactions.
2382 *
2383 * Practically speaking, we can't change things around for these
2384 * devices at run-time, because we can't be sure there'll be no
2385 * DMA transactions in flight for any of their siblings.
2386 *
2387 * So PCI devices (unless they're on the root bus) as well as
2388 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2389 * the 1:1 domain, just in _case_ one of their siblings turns out
2390 * not to be able to map all of memory.
2391 */
5f4d91a1 2392 if (!pci_is_pcie(pdev)) {
3dfc813d
DW
2393 if (!pci_is_root_bus(pdev->bus))
2394 return 0;
2395 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2396 return 0;
62f87c0e 2397 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d
DW
2398 return 0;
2399
2400 /*
2401 * At boot time, we don't yet know if devices will be 64-bit capable.
2402 * Assume that they will -- if they turn out not to be, then we can
2403 * take them out of the 1:1 domain later.
2404 */
8fcc5372
CW
2405 if (!startup) {
2406 /*
2407 * If the device's dma_mask is less than the system's memory
2408 * size then this is not a candidate for identity mapping.
2409 */
2410 u64 dma_mask = pdev->dma_mask;
2411
2412 if (pdev->dev.coherent_dma_mask &&
2413 pdev->dev.coherent_dma_mask < dma_mask)
2414 dma_mask = pdev->dev.coherent_dma_mask;
2415
2416 return dma_mask >= dma_get_required_mask(&pdev->dev);
2417 }
6941af28
DW
2418
2419 return 1;
2420}
2421
071e1374 2422static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2423{
2c2e2c38
FY
2424 struct pci_dev *pdev = NULL;
2425 int ret;
2426
19943b0e 2427 ret = si_domain_init(hw);
2c2e2c38
FY
2428 if (ret)
2429 return -EFAULT;
2430
2c2e2c38 2431 for_each_pci_dev(pdev) {
6941af28 2432 if (iommu_should_identity_map(pdev, 1)) {
5fe60f4e 2433 ret = domain_add_dev_info(si_domain, pdev,
eae460b6
MT
2434 hw ? CONTEXT_TT_PASS_THROUGH :
2435 CONTEXT_TT_MULTI_LEVEL);
2436 if (ret) {
2437 /* device not associated with an iommu */
2438 if (ret == -ENODEV)
2439 continue;
62edf5dc 2440 return ret;
eae460b6
MT
2441 }
2442 pr_info("IOMMU: %s identity mapping for device %s\n",
2443 hw ? "hardware" : "software", pci_name(pdev));
62edf5dc 2444 }
2c2e2c38
FY
2445 }
2446
2447 return 0;
2448}
2449
b779260b 2450static int __init init_dmars(void)
ba395927
KA
2451{
2452 struct dmar_drhd_unit *drhd;
2453 struct dmar_rmrr_unit *rmrr;
2454 struct pci_dev *pdev;
2455 struct intel_iommu *iommu;
9d783ba0 2456 int i, ret;
2c2e2c38 2457
ba395927
KA
2458 /*
2459 * for each drhd
2460 * allocate root
2461 * initialize and program root entry to not present
2462 * endfor
2463 */
2464 for_each_drhd_unit(drhd) {
5e0d2a6f 2465 /*
2466 * lock not needed as this is only incremented in the single
2467 * threaded kernel __init code path all other access are read
2468 * only
2469 */
1b198bb0
MT
2470 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2471 g_num_of_iommus++;
2472 continue;
2473 }
2474 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2475 IOMMU_UNITS_SUPPORTED);
5e0d2a6f 2476 }
2477
d9630fe9
WH
2478 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2479 GFP_KERNEL);
2480 if (!g_iommus) {
2481 printk(KERN_ERR "Allocating global iommu array failed\n");
2482 ret = -ENOMEM;
2483 goto error;
2484 }
2485
80b20dd8 2486 deferred_flush = kzalloc(g_num_of_iommus *
2487 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2488 if (!deferred_flush) {
5e0d2a6f 2489 ret = -ENOMEM;
989d51fc 2490 goto free_g_iommus;
5e0d2a6f 2491 }
2492
7c919779 2493 for_each_active_iommu(iommu, drhd) {
d9630fe9 2494 g_iommus[iommu->seq_id] = iommu;
ba395927 2495
e61d98d8
SS
2496 ret = iommu_init_domains(iommu);
2497 if (ret)
989d51fc 2498 goto free_iommu;
e61d98d8 2499
ba395927
KA
2500 /*
2501 * TBD:
2502 * we could share the same root & context tables
25985edc 2503 * among all IOMMU's. Need to Split it later.
ba395927
KA
2504 */
2505 ret = iommu_alloc_root_entry(iommu);
2506 if (ret) {
2507 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
989d51fc 2508 goto free_iommu;
ba395927 2509 }
4ed0d3e6 2510 if (!ecap_pass_through(iommu->ecap))
19943b0e 2511 hw_pass_through = 0;
ba395927
KA
2512 }
2513
1531a6a6
SS
2514 /*
2515 * Start from the sane iommu hardware state.
2516 */
7c919779 2517 for_each_active_iommu(iommu, drhd) {
1531a6a6
SS
2518 /*
2519 * If the queued invalidation is already initialized by us
2520 * (for example, while enabling interrupt-remapping) then
2521 * we got the things already rolling from a sane state.
2522 */
2523 if (iommu->qi)
2524 continue;
2525
2526 /*
2527 * Clear any previous faults.
2528 */
2529 dmar_fault(-1, iommu);
2530 /*
2531 * Disable queued invalidation if supported and already enabled
2532 * before OS handover.
2533 */
2534 dmar_disable_qi(iommu);
2535 }
2536
7c919779 2537 for_each_active_iommu(iommu, drhd) {
a77b67d4
YS
2538 if (dmar_enable_qi(iommu)) {
2539 /*
2540 * Queued Invalidate not enabled, use Register Based
2541 * Invalidate
2542 */
2543 iommu->flush.flush_context = __iommu_flush_context;
2544 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2545 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2546 "invalidation\n",
680a7524 2547 iommu->seq_id,
b4e0f9eb 2548 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2549 } else {
2550 iommu->flush.flush_context = qi_flush_context;
2551 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2552 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2553 "invalidation\n",
680a7524 2554 iommu->seq_id,
b4e0f9eb 2555 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2556 }
2557 }
2558
19943b0e 2559 if (iommu_pass_through)
e0fc7e0b
DW
2560 iommu_identity_mapping |= IDENTMAP_ALL;
2561
d3f13810 2562#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 2563 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2564#endif
e0fc7e0b
DW
2565
2566 check_tylersburg_isoch();
2567
ba395927 2568 /*
19943b0e
DW
2569 * If pass through is not set or not enabled, setup context entries for
2570 * identity mappings for rmrr, gfx, and isa and may fall back to static
2571 * identity mapping if iommu_identity_mapping is set.
ba395927 2572 */
19943b0e
DW
2573 if (iommu_identity_mapping) {
2574 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2575 if (ret) {
19943b0e 2576 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
989d51fc 2577 goto free_iommu;
ba395927
KA
2578 }
2579 }
ba395927 2580 /*
19943b0e
DW
2581 * For each rmrr
2582 * for each dev attached to rmrr
2583 * do
2584 * locate drhd for dev, alloc domain for dev
2585 * allocate free domain
2586 * allocate page table entries for rmrr
2587 * if context not allocated for bus
2588 * allocate and init context
2589 * set present in root table for this bus
2590 * init context with domain, translation etc
2591 * endfor
2592 * endfor
ba395927 2593 */
19943b0e
DW
2594 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2595 for_each_rmrr_units(rmrr) {
2596 for (i = 0; i < rmrr->devices_cnt; i++) {
2597 pdev = rmrr->devices[i];
2598 /*
2599 * some BIOS lists non-exist devices in DMAR
2600 * table.
2601 */
2602 if (!pdev)
2603 continue;
2604 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2605 if (ret)
2606 printk(KERN_ERR
2607 "IOMMU: mapping reserved region failed\n");
ba395927 2608 }
4ed0d3e6 2609 }
49a0429e 2610
19943b0e
DW
2611 iommu_prepare_isa();
2612
ba395927
KA
2613 /*
2614 * for each drhd
2615 * enable fault log
2616 * global invalidate context cache
2617 * global invalidate iotlb
2618 * enable translation
2619 */
7c919779 2620 for_each_iommu(iommu, drhd) {
51a63e67
JC
2621 if (drhd->ignored) {
2622 /*
2623 * we always have to disable PMRs or DMA may fail on
2624 * this device
2625 */
2626 if (force_on)
7c919779 2627 iommu_disable_protect_mem_regions(iommu);
ba395927 2628 continue;
51a63e67 2629 }
ba395927
KA
2630
2631 iommu_flush_write_buffer(iommu);
2632
3460a6d9
KA
2633 ret = dmar_set_interrupt(iommu);
2634 if (ret)
989d51fc 2635 goto free_iommu;
3460a6d9 2636
ba395927
KA
2637 iommu_set_root_entry(iommu);
2638
4c25a2c1 2639 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2640 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2641
ba395927
KA
2642 ret = iommu_enable_translation(iommu);
2643 if (ret)
989d51fc 2644 goto free_iommu;
b94996c9
DW
2645
2646 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2647 }
2648
2649 return 0;
989d51fc
JL
2650
2651free_iommu:
7c919779 2652 for_each_active_iommu(iommu, drhd)
a868e6b7 2653 free_dmar_iommu(iommu);
9bdc531e 2654 kfree(deferred_flush);
989d51fc 2655free_g_iommus:
d9630fe9 2656 kfree(g_iommus);
989d51fc 2657error:
ba395927
KA
2658 return ret;
2659}
2660
5a5e02a6 2661/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2662static struct iova *intel_alloc_iova(struct device *dev,
2663 struct dmar_domain *domain,
2664 unsigned long nrpages, uint64_t dma_mask)
ba395927 2665{
ba395927 2666 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2667 struct iova *iova = NULL;
ba395927 2668
875764de
DW
2669 /* Restrict dma_mask to the width that the iommu can handle */
2670 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2671
2672 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2673 /*
2674 * First try to allocate an io virtual address in
284901a9 2675 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2676 * from higher range
ba395927 2677 */
875764de
DW
2678 iova = alloc_iova(&domain->iovad, nrpages,
2679 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2680 if (iova)
2681 return iova;
2682 }
2683 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2684 if (unlikely(!iova)) {
2685 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2686 nrpages, pci_name(pdev));
f76aec76
KA
2687 return NULL;
2688 }
2689
2690 return iova;
2691}
2692
147202aa 2693static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
f76aec76
KA
2694{
2695 struct dmar_domain *domain;
2696 int ret;
2697
2698 domain = get_domain_for_dev(pdev,
2699 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2700 if (!domain) {
2701 printk(KERN_ERR
2702 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2703 return NULL;
ba395927
KA
2704 }
2705
2706 /* make sure context mapping is ok */
5331fe6f 2707 if (unlikely(!domain_context_mapped(pdev))) {
4ed0d3e6
FY
2708 ret = domain_context_mapping(domain, pdev,
2709 CONTEXT_TT_MULTI_LEVEL);
f76aec76
KA
2710 if (ret) {
2711 printk(KERN_ERR
2712 "Domain context map for %s failed",
2713 pci_name(pdev));
4fe05bbc 2714 return NULL;
f76aec76 2715 }
ba395927
KA
2716 }
2717
f76aec76
KA
2718 return domain;
2719}
2720
147202aa
DW
2721static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2722{
2723 struct device_domain_info *info;
2724
2725 /* No lock here, assumes no domain exit in normal case */
2726 info = dev->dev.archdata.iommu;
2727 if (likely(info))
2728 return info->domain;
2729
2730 return __get_valid_domain_for_dev(dev);
2731}
2732
2c2e2c38
FY
2733static int iommu_dummy(struct pci_dev *pdev)
2734{
2735 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2736}
2737
2738/* Check if the pdev needs to go through non-identity map and unmap process.*/
73676832 2739static int iommu_no_mapping(struct device *dev)
2c2e2c38 2740{
73676832 2741 struct pci_dev *pdev;
2c2e2c38
FY
2742 int found;
2743
dbad0864 2744 if (unlikely(!dev_is_pci(dev)))
73676832
DW
2745 return 1;
2746
2747 pdev = to_pci_dev(dev);
1e4c64c4
DW
2748 if (iommu_dummy(pdev))
2749 return 1;
2750
2c2e2c38 2751 if (!iommu_identity_mapping)
1e4c64c4 2752 return 0;
2c2e2c38
FY
2753
2754 found = identity_mapping(pdev);
2755 if (found) {
6941af28 2756 if (iommu_should_identity_map(pdev, 0))
2c2e2c38
FY
2757 return 1;
2758 else {
2759 /*
2760 * 32 bit DMA is removed from si_domain and fall back
2761 * to non-identity mapping.
2762 */
2763 domain_remove_one_dev_info(si_domain, pdev);
2764 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2765 pci_name(pdev));
2766 return 0;
2767 }
2768 } else {
2769 /*
2770 * In case of a detached 64 bit DMA device from vm, the device
2771 * is put into si_domain for identity mapping.
2772 */
6941af28 2773 if (iommu_should_identity_map(pdev, 0)) {
2c2e2c38 2774 int ret;
5fe60f4e
DW
2775 ret = domain_add_dev_info(si_domain, pdev,
2776 hw_pass_through ?
2777 CONTEXT_TT_PASS_THROUGH :
2778 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2779 if (!ret) {
2780 printk(KERN_INFO "64bit %s uses identity mapping\n",
2781 pci_name(pdev));
2782 return 1;
2783 }
2784 }
2785 }
2786
1e4c64c4 2787 return 0;
2c2e2c38
FY
2788}
2789
bb9e6d65
FT
2790static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2791 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2792{
2793 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2794 struct dmar_domain *domain;
5b6985ce 2795 phys_addr_t start_paddr;
f76aec76
KA
2796 struct iova *iova;
2797 int prot = 0;
6865f0d1 2798 int ret;
8c11e798 2799 struct intel_iommu *iommu;
33041ec0 2800 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
2801
2802 BUG_ON(dir == DMA_NONE);
2c2e2c38 2803
73676832 2804 if (iommu_no_mapping(hwdev))
6865f0d1 2805 return paddr;
f76aec76
KA
2806
2807 domain = get_valid_domain_for_dev(pdev);
2808 if (!domain)
2809 return 0;
2810
8c11e798 2811 iommu = domain_get_iommu(domain);
88cb6a74 2812 size = aligned_nrpages(paddr, size);
f76aec76 2813
c681d0ba 2814 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
2815 if (!iova)
2816 goto error;
2817
ba395927
KA
2818 /*
2819 * Check if DMAR supports zero-length reads on write only
2820 * mappings..
2821 */
2822 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2823 !cap_zlr(iommu->cap))
ba395927
KA
2824 prot |= DMA_PTE_READ;
2825 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2826 prot |= DMA_PTE_WRITE;
2827 /*
6865f0d1 2828 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2829 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2830 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2831 * is not a big problem
2832 */
0ab36de2 2833 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 2834 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
2835 if (ret)
2836 goto error;
2837
1f0ef2aa
DW
2838 /* it's a non-present to present mapping. Only flush if caching mode */
2839 if (cap_caching_mode(iommu->cap))
82653633 2840 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
1f0ef2aa 2841 else
8c11e798 2842 iommu_flush_write_buffer(iommu);
f76aec76 2843
03d6a246
DW
2844 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2845 start_paddr += paddr & ~PAGE_MASK;
2846 return start_paddr;
ba395927 2847
ba395927 2848error:
f76aec76
KA
2849 if (iova)
2850 __free_iova(&domain->iovad, iova);
4cf2e75d 2851 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5b6985ce 2852 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2853 return 0;
2854}
2855
ffbbef5c
FT
2856static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2857 unsigned long offset, size_t size,
2858 enum dma_data_direction dir,
2859 struct dma_attrs *attrs)
bb9e6d65 2860{
ffbbef5c
FT
2861 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2862 dir, to_pci_dev(dev)->dma_mask);
bb9e6d65
FT
2863}
2864
5e0d2a6f 2865static void flush_unmaps(void)
2866{
80b20dd8 2867 int i, j;
5e0d2a6f 2868
5e0d2a6f 2869 timer_on = 0;
2870
2871 /* just flush them all */
2872 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2873 struct intel_iommu *iommu = g_iommus[i];
2874 if (!iommu)
2875 continue;
c42d9f32 2876
9dd2fe89
YZ
2877 if (!deferred_flush[i].next)
2878 continue;
2879
78d5f0f5
NA
2880 /* In caching mode, global flushes turn emulation expensive */
2881 if (!cap_caching_mode(iommu->cap))
2882 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 2883 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 2884 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
2885 unsigned long mask;
2886 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
2887 struct dmar_domain *domain = deferred_flush[i].domain[j];
2888
2889 /* On real hardware multiple invalidations are expensive */
2890 if (cap_caching_mode(iommu->cap))
2891 iommu_flush_iotlb_psi(iommu, domain->id,
2892 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2893 else {
2894 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2895 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2896 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2897 }
93a23a72 2898 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
80b20dd8 2899 }
9dd2fe89 2900 deferred_flush[i].next = 0;
5e0d2a6f 2901 }
2902
5e0d2a6f 2903 list_size = 0;
5e0d2a6f 2904}
2905
2906static void flush_unmaps_timeout(unsigned long data)
2907{
80b20dd8 2908 unsigned long flags;
2909
2910 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2911 flush_unmaps();
80b20dd8 2912 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2913}
2914
2915static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2916{
2917 unsigned long flags;
80b20dd8 2918 int next, iommu_id;
8c11e798 2919 struct intel_iommu *iommu;
5e0d2a6f 2920
2921 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2922 if (list_size == HIGH_WATER_MARK)
2923 flush_unmaps();
2924
8c11e798
WH
2925 iommu = domain_get_iommu(dom);
2926 iommu_id = iommu->seq_id;
c42d9f32 2927
80b20dd8 2928 next = deferred_flush[iommu_id].next;
2929 deferred_flush[iommu_id].domain[next] = dom;
2930 deferred_flush[iommu_id].iova[next] = iova;
2931 deferred_flush[iommu_id].next++;
5e0d2a6f 2932
2933 if (!timer_on) {
2934 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2935 timer_on = 1;
2936 }
2937 list_size++;
2938 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2939}
2940
ffbbef5c
FT
2941static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2942 size_t size, enum dma_data_direction dir,
2943 struct dma_attrs *attrs)
ba395927 2944{
ba395927 2945 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76 2946 struct dmar_domain *domain;
d794dc9b 2947 unsigned long start_pfn, last_pfn;
ba395927 2948 struct iova *iova;
8c11e798 2949 struct intel_iommu *iommu;
ba395927 2950
73676832 2951 if (iommu_no_mapping(dev))
f76aec76 2952 return;
2c2e2c38 2953
ba395927
KA
2954 domain = find_domain(pdev);
2955 BUG_ON(!domain);
2956
8c11e798
WH
2957 iommu = domain_get_iommu(domain);
2958
ba395927 2959 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
2960 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2961 (unsigned long long)dev_addr))
ba395927 2962 return;
ba395927 2963
d794dc9b
DW
2964 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2965 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 2966
d794dc9b
DW
2967 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2968 pci_name(pdev), start_pfn, last_pfn);
ba395927 2969
f76aec76 2970 /* clear the whole page */
d794dc9b
DW
2971 dma_pte_clear_range(domain, start_pfn, last_pfn);
2972
f76aec76 2973 /* free page tables */
d794dc9b
DW
2974 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2975
5e0d2a6f 2976 if (intel_iommu_strict) {
03d6a246 2977 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2978 last_pfn - start_pfn + 1, 0);
5e0d2a6f 2979 /* free iova */
2980 __free_iova(&domain->iovad, iova);
2981 } else {
2982 add_unmap(domain, iova);
2983 /*
2984 * queue up the release of the unmap to save the 1/6th of the
2985 * cpu used up by the iotlb flush operation...
2986 */
5e0d2a6f 2987 }
ba395927
KA
2988}
2989
d7ab5c46 2990static void *intel_alloc_coherent(struct device *hwdev, size_t size,
baa676fc
AP
2991 dma_addr_t *dma_handle, gfp_t flags,
2992 struct dma_attrs *attrs)
ba395927
KA
2993{
2994 void *vaddr;
2995 int order;
2996
5b6985ce 2997 size = PAGE_ALIGN(size);
ba395927 2998 order = get_order(size);
e8bb910d
AW
2999
3000 if (!iommu_no_mapping(hwdev))
3001 flags &= ~(GFP_DMA | GFP_DMA32);
3002 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
3003 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
3004 flags |= GFP_DMA;
3005 else
3006 flags |= GFP_DMA32;
3007 }
ba395927
KA
3008
3009 vaddr = (void *)__get_free_pages(flags, order);
3010 if (!vaddr)
3011 return NULL;
3012 memset(vaddr, 0, size);
3013
bb9e6d65
FT
3014 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
3015 DMA_BIDIRECTIONAL,
3016 hwdev->coherent_dma_mask);
ba395927
KA
3017 if (*dma_handle)
3018 return vaddr;
3019 free_pages((unsigned long)vaddr, order);
3020 return NULL;
3021}
3022
d7ab5c46 3023static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
baa676fc 3024 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3025{
3026 int order;
3027
5b6985ce 3028 size = PAGE_ALIGN(size);
ba395927
KA
3029 order = get_order(size);
3030
0db9b7ae 3031 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
ba395927
KA
3032 free_pages((unsigned long)vaddr, order);
3033}
3034
d7ab5c46
FT
3035static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
3036 int nelems, enum dma_data_direction dir,
3037 struct dma_attrs *attrs)
ba395927 3038{
ba395927
KA
3039 struct pci_dev *pdev = to_pci_dev(hwdev);
3040 struct dmar_domain *domain;
d794dc9b 3041 unsigned long start_pfn, last_pfn;
f76aec76 3042 struct iova *iova;
8c11e798 3043 struct intel_iommu *iommu;
ba395927 3044
73676832 3045 if (iommu_no_mapping(hwdev))
ba395927
KA
3046 return;
3047
3048 domain = find_domain(pdev);
8c11e798
WH
3049 BUG_ON(!domain);
3050
3051 iommu = domain_get_iommu(domain);
ba395927 3052
c03ab37c 3053 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
3054 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3055 (unsigned long long)sglist[0].dma_address))
f76aec76 3056 return;
f76aec76 3057
d794dc9b
DW
3058 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3059 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76
KA
3060
3061 /* clear the whole page */
d794dc9b
DW
3062 dma_pte_clear_range(domain, start_pfn, last_pfn);
3063
f76aec76 3064 /* free page tables */
d794dc9b 3065 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
f76aec76 3066
acea0018
DW
3067 if (intel_iommu_strict) {
3068 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 3069 last_pfn - start_pfn + 1, 0);
acea0018
DW
3070 /* free iova */
3071 __free_iova(&domain->iovad, iova);
3072 } else {
3073 add_unmap(domain, iova);
3074 /*
3075 * queue up the release of the unmap to save the 1/6th of the
3076 * cpu used up by the iotlb flush operation...
3077 */
3078 }
ba395927
KA
3079}
3080
ba395927 3081static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3082 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3083{
3084 int i;
c03ab37c 3085 struct scatterlist *sg;
ba395927 3086
c03ab37c 3087 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3088 BUG_ON(!sg_page(sg));
4cf2e75d 3089 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3090 sg->dma_length = sg->length;
ba395927
KA
3091 }
3092 return nelems;
3093}
3094
d7ab5c46
FT
3095static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3096 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3097{
ba395927 3098 int i;
ba395927
KA
3099 struct pci_dev *pdev = to_pci_dev(hwdev);
3100 struct dmar_domain *domain;
f76aec76
KA
3101 size_t size = 0;
3102 int prot = 0;
f76aec76
KA
3103 struct iova *iova = NULL;
3104 int ret;
c03ab37c 3105 struct scatterlist *sg;
b536d24d 3106 unsigned long start_vpfn;
8c11e798 3107 struct intel_iommu *iommu;
ba395927
KA
3108
3109 BUG_ON(dir == DMA_NONE);
73676832 3110 if (iommu_no_mapping(hwdev))
c03ab37c 3111 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 3112
f76aec76
KA
3113 domain = get_valid_domain_for_dev(pdev);
3114 if (!domain)
3115 return 0;
3116
8c11e798
WH
3117 iommu = domain_get_iommu(domain);
3118
b536d24d 3119 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3120 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3121
5a5e02a6
DW
3122 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3123 pdev->dma_mask);
f76aec76 3124 if (!iova) {
c03ab37c 3125 sglist->dma_length = 0;
f76aec76
KA
3126 return 0;
3127 }
3128
3129 /*
3130 * Check if DMAR supports zero-length reads on write only
3131 * mappings..
3132 */
3133 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3134 !cap_zlr(iommu->cap))
f76aec76
KA
3135 prot |= DMA_PTE_READ;
3136 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3137 prot |= DMA_PTE_WRITE;
3138
b536d24d 3139 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3140
f532959b 3141 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
3142 if (unlikely(ret)) {
3143 /* clear the page */
3144 dma_pte_clear_range(domain, start_vpfn,
3145 start_vpfn + size - 1);
3146 /* free page tables */
3147 dma_pte_free_pagetable(domain, start_vpfn,
3148 start_vpfn + size - 1);
3149 /* free iova */
3150 __free_iova(&domain->iovad, iova);
3151 return 0;
ba395927
KA
3152 }
3153
1f0ef2aa
DW
3154 /* it's a non-present to present mapping. Only flush if caching mode */
3155 if (cap_caching_mode(iommu->cap))
82653633 3156 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
1f0ef2aa 3157 else
8c11e798 3158 iommu_flush_write_buffer(iommu);
1f0ef2aa 3159
ba395927
KA
3160 return nelems;
3161}
3162
dfb805e8
FT
3163static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3164{
3165 return !dma_addr;
3166}
3167
160c1d8e 3168struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3169 .alloc = intel_alloc_coherent,
3170 .free = intel_free_coherent,
ba395927
KA
3171 .map_sg = intel_map_sg,
3172 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3173 .map_page = intel_map_page,
3174 .unmap_page = intel_unmap_page,
dfb805e8 3175 .mapping_error = intel_mapping_error,
ba395927
KA
3176};
3177
3178static inline int iommu_domain_cache_init(void)
3179{
3180 int ret = 0;
3181
3182 iommu_domain_cache = kmem_cache_create("iommu_domain",
3183 sizeof(struct dmar_domain),
3184 0,
3185 SLAB_HWCACHE_ALIGN,
3186
3187 NULL);
3188 if (!iommu_domain_cache) {
3189 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3190 ret = -ENOMEM;
3191 }
3192
3193 return ret;
3194}
3195
3196static inline int iommu_devinfo_cache_init(void)
3197{
3198 int ret = 0;
3199
3200 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3201 sizeof(struct device_domain_info),
3202 0,
3203 SLAB_HWCACHE_ALIGN,
ba395927
KA
3204 NULL);
3205 if (!iommu_devinfo_cache) {
3206 printk(KERN_ERR "Couldn't create devinfo cache\n");
3207 ret = -ENOMEM;
3208 }
3209
3210 return ret;
3211}
3212
3213static inline int iommu_iova_cache_init(void)
3214{
3215 int ret = 0;
3216
3217 iommu_iova_cache = kmem_cache_create("iommu_iova",
3218 sizeof(struct iova),
3219 0,
3220 SLAB_HWCACHE_ALIGN,
ba395927
KA
3221 NULL);
3222 if (!iommu_iova_cache) {
3223 printk(KERN_ERR "Couldn't create iova cache\n");
3224 ret = -ENOMEM;
3225 }
3226
3227 return ret;
3228}
3229
3230static int __init iommu_init_mempool(void)
3231{
3232 int ret;
3233 ret = iommu_iova_cache_init();
3234 if (ret)
3235 return ret;
3236
3237 ret = iommu_domain_cache_init();
3238 if (ret)
3239 goto domain_error;
3240
3241 ret = iommu_devinfo_cache_init();
3242 if (!ret)
3243 return ret;
3244
3245 kmem_cache_destroy(iommu_domain_cache);
3246domain_error:
3247 kmem_cache_destroy(iommu_iova_cache);
3248
3249 return -ENOMEM;
3250}
3251
3252static void __init iommu_exit_mempool(void)
3253{
3254 kmem_cache_destroy(iommu_devinfo_cache);
3255 kmem_cache_destroy(iommu_domain_cache);
3256 kmem_cache_destroy(iommu_iova_cache);
3257
3258}
3259
556ab45f
DW
3260static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3261{
3262 struct dmar_drhd_unit *drhd;
3263 u32 vtbar;
3264 int rc;
3265
3266 /* We know that this device on this chipset has its own IOMMU.
3267 * If we find it under a different IOMMU, then the BIOS is lying
3268 * to us. Hope that the IOMMU for this device is actually
3269 * disabled, and it needs no translation...
3270 */
3271 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3272 if (rc) {
3273 /* "can't" happen */
3274 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3275 return;
3276 }
3277 vtbar &= 0xffff0000;
3278
3279 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3280 drhd = dmar_find_matched_drhd_unit(pdev);
3281 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3282 TAINT_FIRMWARE_WORKAROUND,
3283 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3284 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3285}
3286DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3287
ba395927
KA
3288static void __init init_no_remapping_devices(void)
3289{
3290 struct dmar_drhd_unit *drhd;
3291
3292 for_each_drhd_unit(drhd) {
3293 if (!drhd->include_all) {
3294 int i;
3295 for (i = 0; i < drhd->devices_cnt; i++)
3296 if (drhd->devices[i] != NULL)
3297 break;
3298 /* ignore DMAR unit if no pci devices exist */
3299 if (i == drhd->devices_cnt)
3300 drhd->ignored = 1;
3301 }
3302 }
3303
7c919779 3304 for_each_active_drhd_unit(drhd) {
ba395927 3305 int i;
7c919779 3306 if (drhd->include_all)
ba395927
KA
3307 continue;
3308
3309 for (i = 0; i < drhd->devices_cnt; i++)
3310 if (drhd->devices[i] &&
c0771df8 3311 !IS_GFX_DEVICE(drhd->devices[i]))
ba395927
KA
3312 break;
3313
3314 if (i < drhd->devices_cnt)
3315 continue;
3316
c0771df8
DW
3317 /* This IOMMU has *only* gfx devices. Either bypass it or
3318 set the gfx_mapped flag, as appropriate */
3319 if (dmar_map_gfx) {
3320 intel_iommu_gfx_mapped = 1;
3321 } else {
3322 drhd->ignored = 1;
3323 for (i = 0; i < drhd->devices_cnt; i++) {
3324 if (!drhd->devices[i])
3325 continue;
3326 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3327 }
ba395927
KA
3328 }
3329 }
3330}
3331
f59c7b69
FY
3332#ifdef CONFIG_SUSPEND
3333static int init_iommu_hw(void)
3334{
3335 struct dmar_drhd_unit *drhd;
3336 struct intel_iommu *iommu = NULL;
3337
3338 for_each_active_iommu(iommu, drhd)
3339 if (iommu->qi)
3340 dmar_reenable_qi(iommu);
3341
b779260b
JC
3342 for_each_iommu(iommu, drhd) {
3343 if (drhd->ignored) {
3344 /*
3345 * we always have to disable PMRs or DMA may fail on
3346 * this device
3347 */
3348 if (force_on)
3349 iommu_disable_protect_mem_regions(iommu);
3350 continue;
3351 }
3352
f59c7b69
FY
3353 iommu_flush_write_buffer(iommu);
3354
3355 iommu_set_root_entry(iommu);
3356
3357 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3358 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3359 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3360 DMA_TLB_GLOBAL_FLUSH);
b779260b
JC
3361 if (iommu_enable_translation(iommu))
3362 return 1;
b94996c9 3363 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3364 }
3365
3366 return 0;
3367}
3368
3369static void iommu_flush_all(void)
3370{
3371 struct dmar_drhd_unit *drhd;
3372 struct intel_iommu *iommu;
3373
3374 for_each_active_iommu(iommu, drhd) {
3375 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3376 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3377 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3378 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3379 }
3380}
3381
134fac3f 3382static int iommu_suspend(void)
f59c7b69
FY
3383{
3384 struct dmar_drhd_unit *drhd;
3385 struct intel_iommu *iommu = NULL;
3386 unsigned long flag;
3387
3388 for_each_active_iommu(iommu, drhd) {
3389 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3390 GFP_ATOMIC);
3391 if (!iommu->iommu_state)
3392 goto nomem;
3393 }
3394
3395 iommu_flush_all();
3396
3397 for_each_active_iommu(iommu, drhd) {
3398 iommu_disable_translation(iommu);
3399
1f5b3c3f 3400 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3401
3402 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3403 readl(iommu->reg + DMAR_FECTL_REG);
3404 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3405 readl(iommu->reg + DMAR_FEDATA_REG);
3406 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3407 readl(iommu->reg + DMAR_FEADDR_REG);
3408 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3409 readl(iommu->reg + DMAR_FEUADDR_REG);
3410
1f5b3c3f 3411 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3412 }
3413 return 0;
3414
3415nomem:
3416 for_each_active_iommu(iommu, drhd)
3417 kfree(iommu->iommu_state);
3418
3419 return -ENOMEM;
3420}
3421
134fac3f 3422static void iommu_resume(void)
f59c7b69
FY
3423{
3424 struct dmar_drhd_unit *drhd;
3425 struct intel_iommu *iommu = NULL;
3426 unsigned long flag;
3427
3428 if (init_iommu_hw()) {
b779260b
JC
3429 if (force_on)
3430 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3431 else
3432 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3433 return;
f59c7b69
FY
3434 }
3435
3436 for_each_active_iommu(iommu, drhd) {
3437
1f5b3c3f 3438 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3439
3440 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3441 iommu->reg + DMAR_FECTL_REG);
3442 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3443 iommu->reg + DMAR_FEDATA_REG);
3444 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3445 iommu->reg + DMAR_FEADDR_REG);
3446 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3447 iommu->reg + DMAR_FEUADDR_REG);
3448
1f5b3c3f 3449 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3450 }
3451
3452 for_each_active_iommu(iommu, drhd)
3453 kfree(iommu->iommu_state);
f59c7b69
FY
3454}
3455
134fac3f 3456static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3457 .resume = iommu_resume,
3458 .suspend = iommu_suspend,
3459};
3460
134fac3f 3461static void __init init_iommu_pm_ops(void)
f59c7b69 3462{
134fac3f 3463 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3464}
3465
3466#else
99592ba4 3467static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3468#endif /* CONFIG_PM */
3469
318fe7df
SS
3470static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3471{
3472 list_add(&rmrr->list, &dmar_rmrr_units);
3473}
3474
3475
3476int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3477{
3478 struct acpi_dmar_reserved_memory *rmrr;
3479 struct dmar_rmrr_unit *rmrru;
3480
3481 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3482 if (!rmrru)
3483 return -ENOMEM;
3484
3485 rmrru->hdr = header;
3486 rmrr = (struct acpi_dmar_reserved_memory *)header;
3487 rmrru->base_address = rmrr->base_address;
3488 rmrru->end_address = rmrr->end_address;
3489
3490 dmar_register_rmrr_unit(rmrru);
3491 return 0;
3492}
3493
3494static int __init
3495rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3496{
3497 struct acpi_dmar_reserved_memory *rmrr;
318fe7df
SS
3498
3499 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
9bdc531e
JL
3500 return dmar_parse_dev_scope((void *)(rmrr + 1),
3501 ((void *)rmrr) + rmrr->header.length,
3502 &rmrru->devices_cnt, &rmrru->devices,
3503 rmrr->segment);
318fe7df
SS
3504}
3505
318fe7df
SS
3506int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3507{
3508 struct acpi_dmar_atsr *atsr;
3509 struct dmar_atsr_unit *atsru;
3510
3511 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3512 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3513 if (!atsru)
3514 return -ENOMEM;
3515
3516 atsru->hdr = hdr;
3517 atsru->include_all = atsr->flags & 0x1;
3518
3519 list_add(&atsru->list, &dmar_atsr_units);
3520
3521 return 0;
3522}
3523
3524static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3525{
318fe7df
SS
3526 struct acpi_dmar_atsr *atsr;
3527
3528 if (atsru->include_all)
3529 return 0;
3530
3531 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
9bdc531e
JL
3532 return dmar_parse_dev_scope((void *)(atsr + 1),
3533 (void *)atsr + atsr->header.length,
3534 &atsru->devices_cnt, &atsru->devices,
3535 atsr->segment);
3536}
3537
3538static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3539{
3540 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3541 kfree(atsru);
3542}
3543
3544static void intel_iommu_free_dmars(void)
3545{
3546 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3547 struct dmar_atsr_unit *atsru, *atsr_n;
3548
3549 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3550 list_del(&rmrru->list);
3551 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3552 kfree(rmrru);
318fe7df
SS
3553 }
3554
9bdc531e
JL
3555 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3556 list_del(&atsru->list);
3557 intel_iommu_free_atsr(atsru);
3558 }
318fe7df
SS
3559}
3560
3561int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3562{
3563 int i;
3564 struct pci_bus *bus;
3565 struct acpi_dmar_atsr *atsr;
3566 struct dmar_atsr_unit *atsru;
3567
3568 dev = pci_physfn(dev);
3569
3570 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3571 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3572 if (atsr->segment == pci_domain_nr(dev->bus))
3573 goto found;
3574 }
3575
3576 return 0;
3577
3578found:
3579 for (bus = dev->bus; bus; bus = bus->parent) {
3580 struct pci_dev *bridge = bus->self;
3581
3582 if (!bridge || !pci_is_pcie(bridge) ||
62f87c0e 3583 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df
SS
3584 return 0;
3585
62f87c0e 3586 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) {
318fe7df
SS
3587 for (i = 0; i < atsru->devices_cnt; i++)
3588 if (atsru->devices[i] == bridge)
3589 return 1;
3590 break;
3591 }
3592 }
3593
3594 if (atsru->include_all)
3595 return 1;
3596
3597 return 0;
3598}
3599
c8f369ab 3600int __init dmar_parse_rmrr_atsr_dev(void)
318fe7df 3601{
9bdc531e
JL
3602 struct dmar_rmrr_unit *rmrr;
3603 struct dmar_atsr_unit *atsr;
318fe7df
SS
3604 int ret = 0;
3605
9bdc531e 3606 list_for_each_entry(rmrr, &dmar_rmrr_units, list) {
318fe7df
SS
3607 ret = rmrr_parse_dev(rmrr);
3608 if (ret)
3609 return ret;
3610 }
3611
9bdc531e 3612 list_for_each_entry(atsr, &dmar_atsr_units, list) {
318fe7df
SS
3613 ret = atsr_parse_dev(atsr);
3614 if (ret)
3615 return ret;
3616 }
3617
3618 return ret;
3619}
3620
99dcaded
FY
3621/*
3622 * Here we only respond to action of unbound device from driver.
3623 *
3624 * Added device is not attached to its DMAR domain here yet. That will happen
3625 * when mapping the device to iova.
3626 */
3627static int device_notifier(struct notifier_block *nb,
3628 unsigned long action, void *data)
3629{
3630 struct device *dev = data;
3631 struct pci_dev *pdev = to_pci_dev(dev);
3632 struct dmar_domain *domain;
3633
816997d0 3634 if (iommu_dummy(pdev))
44cd613c
DW
3635 return 0;
3636
7e7dfab7
JL
3637 if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3638 action != BUS_NOTIFY_DEL_DEVICE)
3639 return 0;
3640
99dcaded
FY
3641 domain = find_domain(pdev);
3642 if (!domain)
3643 return 0;
3644
7e7dfab7
JL
3645 domain_remove_one_dev_info(domain, pdev);
3646 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3647 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3648 list_empty(&domain->devices))
3649 domain_exit(domain);
a97590e5 3650
99dcaded
FY
3651 return 0;
3652}
3653
3654static struct notifier_block device_nb = {
3655 .notifier_call = device_notifier,
3656};
3657
ba395927
KA
3658int __init intel_iommu_init(void)
3659{
9bdc531e 3660 int ret = -ENODEV;
3a93c841 3661 struct dmar_drhd_unit *drhd;
7c919779 3662 struct intel_iommu *iommu;
ba395927 3663
a59b50e9
JC
3664 /* VT-d is required for a TXT/tboot launch, so enforce that */
3665 force_on = tboot_force_iommu();
3666
3667 if (dmar_table_init()) {
3668 if (force_on)
3669 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 3670 goto out_free_dmar;
a59b50e9 3671 }
ba395927 3672
3a93c841
TI
3673 /*
3674 * Disable translation if already enabled prior to OS handover.
3675 */
7c919779 3676 for_each_active_iommu(iommu, drhd)
3a93c841
TI
3677 if (iommu->gcmd & DMA_GCMD_TE)
3678 iommu_disable_translation(iommu);
3a93c841 3679
c2c7286a 3680 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
3681 if (force_on)
3682 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 3683 goto out_free_dmar;
a59b50e9 3684 }
1886e8a9 3685
75f1cdf1 3686 if (no_iommu || dmar_disabled)
9bdc531e 3687 goto out_free_dmar;
2ae21010 3688
51a63e67
JC
3689 if (iommu_init_mempool()) {
3690 if (force_on)
3691 panic("tboot: Failed to initialize iommu memory\n");
9bdc531e 3692 goto out_free_dmar;
51a63e67
JC
3693 }
3694
318fe7df
SS
3695 if (list_empty(&dmar_rmrr_units))
3696 printk(KERN_INFO "DMAR: No RMRR found\n");
3697
3698 if (list_empty(&dmar_atsr_units))
3699 printk(KERN_INFO "DMAR: No ATSR found\n");
3700
51a63e67
JC
3701 if (dmar_init_reserved_ranges()) {
3702 if (force_on)
3703 panic("tboot: Failed to reserve iommu ranges\n");
9bdc531e 3704 goto out_free_mempool;
51a63e67 3705 }
ba395927
KA
3706
3707 init_no_remapping_devices();
3708
b779260b 3709 ret = init_dmars();
ba395927 3710 if (ret) {
a59b50e9
JC
3711 if (force_on)
3712 panic("tboot: Failed to initialize DMARs\n");
ba395927 3713 printk(KERN_ERR "IOMMU: dmar init failed\n");
9bdc531e 3714 goto out_free_reserved_range;
ba395927
KA
3715 }
3716 printk(KERN_INFO
3717 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3718
5e0d2a6f 3719 init_timer(&unmap_timer);
75f1cdf1
FT
3720#ifdef CONFIG_SWIOTLB
3721 swiotlb = 0;
3722#endif
19943b0e 3723 dma_ops = &intel_dma_ops;
4ed0d3e6 3724
134fac3f 3725 init_iommu_pm_ops();
a8bcbb0d 3726
4236d97d 3727 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
a8bcbb0d 3728
99dcaded
FY
3729 bus_register_notifier(&pci_bus_type, &device_nb);
3730
8bc1f85c
ED
3731 intel_iommu_enabled = 1;
3732
ba395927 3733 return 0;
9bdc531e
JL
3734
3735out_free_reserved_range:
3736 put_iova_domain(&reserved_iova_list);
3737out_free_mempool:
3738 iommu_exit_mempool();
3739out_free_dmar:
3740 intel_iommu_free_dmars();
3741 return ret;
ba395927 3742}
e820482c 3743
3199aa6b
HW
3744static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3745 struct pci_dev *pdev)
3746{
3747 struct pci_dev *tmp, *parent;
3748
3749 if (!iommu || !pdev)
3750 return;
3751
3752 /* dependent device detach */
3753 tmp = pci_find_upstream_pcie_bridge(pdev);
3754 /* Secondary interface's bus number and devfn 0 */
3755 if (tmp) {
3756 parent = pdev->bus->self;
3757 while (parent != tmp) {
3758 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 3759 parent->devfn);
3199aa6b
HW
3760 parent = parent->bus->self;
3761 }
45e829ea 3762 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3199aa6b
HW
3763 iommu_detach_dev(iommu,
3764 tmp->subordinate->number, 0);
3765 else /* this is a legacy PCI bridge */
276dbf99
DW
3766 iommu_detach_dev(iommu, tmp->bus->number,
3767 tmp->devfn);
3199aa6b
HW
3768 }
3769}
3770
2c2e2c38 3771static void domain_remove_one_dev_info(struct dmar_domain *domain,
c7151a8d
WH
3772 struct pci_dev *pdev)
3773{
bca2b916 3774 struct device_domain_info *info, *tmp;
c7151a8d
WH
3775 struct intel_iommu *iommu;
3776 unsigned long flags;
3777 int found = 0;
c7151a8d 3778
276dbf99
DW
3779 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3780 pdev->devfn);
c7151a8d
WH
3781 if (!iommu)
3782 return;
3783
3784 spin_lock_irqsave(&device_domain_lock, flags);
bca2b916 3785 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
8519dc44
MH
3786 if (info->segment == pci_domain_nr(pdev->bus) &&
3787 info->bus == pdev->bus->number &&
c7151a8d 3788 info->devfn == pdev->devfn) {
109b9b04 3789 unlink_domain_info(info);
c7151a8d
WH
3790 spin_unlock_irqrestore(&device_domain_lock, flags);
3791
93a23a72 3792 iommu_disable_dev_iotlb(info);
c7151a8d 3793 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3794 iommu_detach_dependent_devices(iommu, pdev);
c7151a8d
WH
3795 free_devinfo_mem(info);
3796
3797 spin_lock_irqsave(&device_domain_lock, flags);
3798
3799 if (found)
3800 break;
3801 else
3802 continue;
3803 }
3804
3805 /* if there is no other devices under the same iommu
3806 * owned by this domain, clear this iommu in iommu_bmp
3807 * update iommu count and coherency
3808 */
276dbf99
DW
3809 if (iommu == device_to_iommu(info->segment, info->bus,
3810 info->devfn))
c7151a8d
WH
3811 found = 1;
3812 }
3813
3e7abe25
RD
3814 spin_unlock_irqrestore(&device_domain_lock, flags);
3815
c7151a8d
WH
3816 if (found == 0) {
3817 unsigned long tmp_flags;
3818 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
1b198bb0 3819 clear_bit(iommu->seq_id, domain->iommu_bmp);
c7151a8d 3820 domain->iommu_count--;
58c610bd 3821 domain_update_iommu_cap(domain);
c7151a8d 3822 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
a97590e5 3823
9b4554b2
AW
3824 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3825 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3826 spin_lock_irqsave(&iommu->lock, tmp_flags);
3827 clear_bit(domain->id, iommu->domain_ids);
3828 iommu->domains[domain->id] = NULL;
3829 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3830 }
c7151a8d 3831 }
c7151a8d
WH
3832}
3833
2c2e2c38 3834static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
3835{
3836 int adjust_width;
3837
3838 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
3839 domain_reserve_special_ranges(domain);
3840
3841 /* calculate AGAW */
3842 domain->gaw = guest_width;
3843 adjust_width = guestwidth_to_adjustwidth(guest_width);
3844 domain->agaw = width_to_agaw(adjust_width);
3845
5e98c4b1 3846 domain->iommu_coherency = 0;
c5b15255 3847 domain->iommu_snooping = 0;
6dd9a7c7 3848 domain->iommu_superpage = 0;
fe40f1e0 3849 domain->max_addr = 0;
4c923d47 3850 domain->nid = -1;
5e98c4b1
WH
3851
3852 /* always allocate the top pgd */
4c923d47 3853 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
3854 if (!domain->pgd)
3855 return -ENOMEM;
3856 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3857 return 0;
3858}
3859
5d450806 3860static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 3861{
5d450806 3862 struct dmar_domain *dmar_domain;
38717946 3863
92d03cc8 3864 dmar_domain = alloc_domain(true);
5d450806 3865 if (!dmar_domain) {
38717946 3866 printk(KERN_ERR
5d450806
JR
3867 "intel_iommu_domain_init: dmar_domain == NULL\n");
3868 return -ENOMEM;
38717946 3869 }
2c2e2c38 3870 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 3871 printk(KERN_ERR
5d450806 3872 "intel_iommu_domain_init() failed\n");
92d03cc8 3873 domain_exit(dmar_domain);
5d450806 3874 return -ENOMEM;
38717946 3875 }
8140a95d 3876 domain_update_iommu_cap(dmar_domain);
5d450806 3877 domain->priv = dmar_domain;
faa3d6f5 3878
8a0e715b
JR
3879 domain->geometry.aperture_start = 0;
3880 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
3881 domain->geometry.force_aperture = true;
3882
5d450806 3883 return 0;
38717946 3884}
38717946 3885
5d450806 3886static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3887{
5d450806
JR
3888 struct dmar_domain *dmar_domain = domain->priv;
3889
3890 domain->priv = NULL;
92d03cc8 3891 domain_exit(dmar_domain);
38717946 3892}
38717946 3893
4c5478c9
JR
3894static int intel_iommu_attach_device(struct iommu_domain *domain,
3895 struct device *dev)
38717946 3896{
4c5478c9
JR
3897 struct dmar_domain *dmar_domain = domain->priv;
3898 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3899 struct intel_iommu *iommu;
3900 int addr_width;
faa3d6f5
WH
3901
3902 /* normally pdev is not mapped */
3903 if (unlikely(domain_context_mapped(pdev))) {
3904 struct dmar_domain *old_domain;
3905
3906 old_domain = find_domain(pdev);
3907 if (old_domain) {
2c2e2c38
FY
3908 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3909 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3910 domain_remove_one_dev_info(old_domain, pdev);
faa3d6f5
WH
3911 else
3912 domain_remove_dev_info(old_domain);
3913 }
3914 }
3915
276dbf99
DW
3916 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3917 pdev->devfn);
fe40f1e0
WH
3918 if (!iommu)
3919 return -ENODEV;
3920
3921 /* check if this iommu agaw is sufficient for max mapped address */
3922 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
3923 if (addr_width > cap_mgaw(iommu->cap))
3924 addr_width = cap_mgaw(iommu->cap);
3925
3926 if (dmar_domain->max_addr > (1LL << addr_width)) {
3927 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 3928 "sufficient for the mapped address (%llx)\n",
a99c47a2 3929 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
3930 return -EFAULT;
3931 }
a99c47a2
TL
3932 dmar_domain->gaw = addr_width;
3933
3934 /*
3935 * Knock out extra levels of page tables if necessary
3936 */
3937 while (iommu->agaw < dmar_domain->agaw) {
3938 struct dma_pte *pte;
3939
3940 pte = dmar_domain->pgd;
3941 if (dma_pte_present(pte)) {
25cbff16
SY
3942 dmar_domain->pgd = (struct dma_pte *)
3943 phys_to_virt(dma_pte_addr(pte));
7a661013 3944 free_pgtable_page(pte);
a99c47a2
TL
3945 }
3946 dmar_domain->agaw--;
3947 }
fe40f1e0 3948
5fe60f4e 3949 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
38717946 3950}
38717946 3951
4c5478c9
JR
3952static void intel_iommu_detach_device(struct iommu_domain *domain,
3953 struct device *dev)
38717946 3954{
4c5478c9
JR
3955 struct dmar_domain *dmar_domain = domain->priv;
3956 struct pci_dev *pdev = to_pci_dev(dev);
3957
2c2e2c38 3958 domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 3959}
c7151a8d 3960
b146a1c9
JR
3961static int intel_iommu_map(struct iommu_domain *domain,
3962 unsigned long iova, phys_addr_t hpa,
5009065d 3963 size_t size, int iommu_prot)
faa3d6f5 3964{
dde57a21 3965 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 3966 u64 max_addr;
dde57a21 3967 int prot = 0;
faa3d6f5 3968 int ret;
fe40f1e0 3969
dde57a21
JR
3970 if (iommu_prot & IOMMU_READ)
3971 prot |= DMA_PTE_READ;
3972 if (iommu_prot & IOMMU_WRITE)
3973 prot |= DMA_PTE_WRITE;
9cf06697
SY
3974 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3975 prot |= DMA_PTE_SNP;
dde57a21 3976
163cc52c 3977 max_addr = iova + size;
dde57a21 3978 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
3979 u64 end;
3980
3981 /* check if minimum agaw is sufficient for mapped address */
8954da1f 3982 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 3983 if (end < max_addr) {
8954da1f 3984 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 3985 "sufficient for the mapped address (%llx)\n",
8954da1f 3986 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
3987 return -EFAULT;
3988 }
dde57a21 3989 dmar_domain->max_addr = max_addr;
fe40f1e0 3990 }
ad051221
DW
3991 /* Round up size to next multiple of PAGE_SIZE, if it and
3992 the low bits of hpa would take us onto the next page */
88cb6a74 3993 size = aligned_nrpages(hpa, size);
ad051221
DW
3994 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3995 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 3996 return ret;
38717946 3997}
38717946 3998
5009065d
OBC
3999static size_t intel_iommu_unmap(struct iommu_domain *domain,
4000 unsigned long iova, size_t size)
38717946 4001{
dde57a21 4002 struct dmar_domain *dmar_domain = domain->priv;
292827cb 4003 int order;
4b99d352 4004
292827cb 4005 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
163cc52c 4006 (iova + size - 1) >> VTD_PAGE_SHIFT);
fe40f1e0 4007
163cc52c
DW
4008 if (dmar_domain->max_addr == iova + size)
4009 dmar_domain->max_addr = iova;
b146a1c9 4010
5009065d 4011 return PAGE_SIZE << order;
38717946 4012}
38717946 4013
d14d6577 4014static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4015 dma_addr_t iova)
38717946 4016{
d14d6577 4017 struct dmar_domain *dmar_domain = domain->priv;
38717946 4018 struct dma_pte *pte;
faa3d6f5 4019 u64 phys = 0;
38717946 4020
6dd9a7c7 4021 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
38717946 4022 if (pte)
faa3d6f5 4023 phys = dma_pte_addr(pte);
38717946 4024
faa3d6f5 4025 return phys;
38717946 4026}
a8bcbb0d 4027
dbb9fd86
SY
4028static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4029 unsigned long cap)
4030{
4031 struct dmar_domain *dmar_domain = domain->priv;
4032
4033 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4034 return dmar_domain->iommu_snooping;
323f99cb 4035 if (cap == IOMMU_CAP_INTR_REMAP)
95a02e97 4036 return irq_remapping_enabled;
dbb9fd86
SY
4037
4038 return 0;
4039}
4040
783f157b 4041#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
70ae6f0d 4042
abdfdde2
AW
4043static int intel_iommu_add_device(struct device *dev)
4044{
4045 struct pci_dev *pdev = to_pci_dev(dev);
3da4af0a 4046 struct pci_dev *bridge, *dma_pdev = NULL;
abdfdde2
AW
4047 struct iommu_group *group;
4048 int ret;
70ae6f0d 4049
abdfdde2
AW
4050 if (!device_to_iommu(pci_domain_nr(pdev->bus),
4051 pdev->bus->number, pdev->devfn))
70ae6f0d
AW
4052 return -ENODEV;
4053
4054 bridge = pci_find_upstream_pcie_bridge(pdev);
4055 if (bridge) {
abdfdde2
AW
4056 if (pci_is_pcie(bridge))
4057 dma_pdev = pci_get_domain_bus_and_slot(
4058 pci_domain_nr(pdev->bus),
4059 bridge->subordinate->number, 0);
3da4af0a 4060 if (!dma_pdev)
abdfdde2
AW
4061 dma_pdev = pci_dev_get(bridge);
4062 } else
4063 dma_pdev = pci_dev_get(pdev);
4064
a4ff1fc2 4065 /* Account for quirked devices */
783f157b
AW
4066 swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4067
a4ff1fc2
AW
4068 /*
4069 * If it's a multifunction device that does not support our
c14d2690
AW
4070 * required ACS flags, add to the same group as lowest numbered
4071 * function that also does not suport the required ACS flags.
a4ff1fc2 4072 */
783f157b 4073 if (dma_pdev->multifunction &&
c14d2690
AW
4074 !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
4075 u8 i, slot = PCI_SLOT(dma_pdev->devfn);
4076
4077 for (i = 0; i < 8; i++) {
4078 struct pci_dev *tmp;
4079
4080 tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
4081 if (!tmp)
4082 continue;
4083
4084 if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
4085 swap_pci_ref(&dma_pdev, tmp);
4086 break;
4087 }
4088 pci_dev_put(tmp);
4089 }
4090 }
783f157b 4091
a4ff1fc2
AW
4092 /*
4093 * Devices on the root bus go through the iommu. If that's not us,
4094 * find the next upstream device and test ACS up to the root bus.
4095 * Finding the next device may require skipping virtual buses.
4096 */
783f157b 4097 while (!pci_is_root_bus(dma_pdev->bus)) {
a4ff1fc2
AW
4098 struct pci_bus *bus = dma_pdev->bus;
4099
4100 while (!bus->self) {
4101 if (!pci_is_root_bus(bus))
4102 bus = bus->parent;
4103 else
4104 goto root_bus;
4105 }
4106
4107 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
783f157b
AW
4108 break;
4109
a4ff1fc2 4110 swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
783f157b
AW
4111 }
4112
a4ff1fc2 4113root_bus:
abdfdde2
AW
4114 group = iommu_group_get(&dma_pdev->dev);
4115 pci_dev_put(dma_pdev);
4116 if (!group) {
4117 group = iommu_group_alloc();
4118 if (IS_ERR(group))
4119 return PTR_ERR(group);
70ae6f0d
AW
4120 }
4121
abdfdde2 4122 ret = iommu_group_add_device(group, dev);
bcb71abe 4123
abdfdde2
AW
4124 iommu_group_put(group);
4125 return ret;
4126}
70ae6f0d 4127
abdfdde2
AW
4128static void intel_iommu_remove_device(struct device *dev)
4129{
4130 iommu_group_remove_device(dev);
70ae6f0d
AW
4131}
4132
a8bcbb0d
JR
4133static struct iommu_ops intel_iommu_ops = {
4134 .domain_init = intel_iommu_domain_init,
4135 .domain_destroy = intel_iommu_domain_destroy,
4136 .attach_dev = intel_iommu_attach_device,
4137 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4138 .map = intel_iommu_map,
4139 .unmap = intel_iommu_unmap,
a8bcbb0d 4140 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 4141 .domain_has_cap = intel_iommu_domain_has_cap,
abdfdde2
AW
4142 .add_device = intel_iommu_add_device,
4143 .remove_device = intel_iommu_remove_device,
6d1c56a9 4144 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4145};
9af88143 4146
9452618e
DV
4147static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4148{
4149 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4150 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4151 dmar_map_gfx = 0;
4152}
4153
4154DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4155DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4156DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4157DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4158DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4159DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4160DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4161
d34d6517 4162static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4163{
4164 /*
4165 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4166 * but needs it. Same seems to hold for the desktop versions.
9af88143
DW
4167 */
4168 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4169 rwbf_quirk = 1;
4170}
4171
4172DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4173DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4174DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4175DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4176DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4177DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4178DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4179
eecfd57f
AJ
4180#define GGC 0x52
4181#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4182#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4183#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4184#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4185#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4186#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4187#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4188#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4189
d34d6517 4190static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4191{
4192 unsigned short ggc;
4193
eecfd57f 4194 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4195 return;
4196
eecfd57f 4197 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
4198 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4199 dmar_map_gfx = 0;
6fbcfb3e
DW
4200 } else if (dmar_map_gfx) {
4201 /* we have to ensure the gfx device is idle before we flush */
4202 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4203 intel_iommu_strict = 1;
4204 }
9eecabcb
DW
4205}
4206DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4207DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4208DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4209DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4210
e0fc7e0b
DW
4211/* On Tylersburg chipsets, some BIOSes have been known to enable the
4212 ISOCH DMAR unit for the Azalia sound device, but not give it any
4213 TLB entries, which causes it to deadlock. Check for that. We do
4214 this in a function called from init_dmars(), instead of in a PCI
4215 quirk, because we don't want to print the obnoxious "BIOS broken"
4216 message if VT-d is actually disabled.
4217*/
4218static void __init check_tylersburg_isoch(void)
4219{
4220 struct pci_dev *pdev;
4221 uint32_t vtisochctrl;
4222
4223 /* If there's no Azalia in the system anyway, forget it. */
4224 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4225 if (!pdev)
4226 return;
4227 pci_dev_put(pdev);
4228
4229 /* System Management Registers. Might be hidden, in which case
4230 we can't do the sanity check. But that's OK, because the
4231 known-broken BIOSes _don't_ actually hide it, so far. */
4232 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4233 if (!pdev)
4234 return;
4235
4236 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4237 pci_dev_put(pdev);
4238 return;
4239 }
4240
4241 pci_dev_put(pdev);
4242
4243 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4244 if (vtisochctrl & 1)
4245 return;
4246
4247 /* Drop all bits other than the number of TLB entries */
4248 vtisochctrl &= 0x1c;
4249
4250 /* If we have the recommended number of TLB entries (16), fine. */
4251 if (vtisochctrl == 0x10)
4252 return;
4253
4254 /* Zero TLB entries? You get to ride the short bus to school. */
4255 if (!vtisochctrl) {
4256 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4257 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4258 dmi_get_system_info(DMI_BIOS_VENDOR),
4259 dmi_get_system_info(DMI_BIOS_VERSION),
4260 dmi_get_system_info(DMI_PRODUCT_VERSION));
4261 iommu_identity_mapping |= IDENTMAP_AZALIA;
4262 return;
4263 }
4264
4265 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4266 vtisochctrl);
4267}