]> git.ipfire.org Git - people/arne_f/kernel.git/blame - drivers/iommu/intel-iommu.c
iommu/vt-d: Introduce a rwsem to protect global data structures
[people/arne_f/kernel.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
54485c30 27#include <linux/export.h>
ba395927
KA
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
ba395927
KA
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
5e0d2a6f 36#include <linux/timer.h>
38717946 37#include <linux/iova.h>
5d450806 38#include <linux/iommu.h>
38717946 39#include <linux/intel-iommu.h>
134fac3f 40#include <linux/syscore_ops.h>
69575d38 41#include <linux/tboot.h>
adb2fe02 42#include <linux/dmi.h>
5cdede24 43#include <linux/pci-ats.h>
0ee332c1 44#include <linux/memblock.h>
8a8f422d 45#include <asm/irq_remapping.h>
ba395927 46#include <asm/cacheflush.h>
46a7fa27 47#include <asm/iommu.h>
ba395927 48
078e1ee2 49#include "irq_remapping.h"
61e015ac 50#include "pci.h"
078e1ee2 51
5b6985ce
FY
52#define ROOT_SIZE VTD_PAGE_SIZE
53#define CONTEXT_SIZE VTD_PAGE_SIZE
54
ba395927
KA
55#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
56#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 57#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
58
59#define IOAPIC_RANGE_START (0xfee00000)
60#define IOAPIC_RANGE_END (0xfeefffff)
61#define IOVA_START_ADDR (0x1000)
62
63#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
64
4ed0d3e6 65#define MAX_AGAW_WIDTH 64
5c645b35 66#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 67
2ebe3151
DW
68#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
69#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
70
71/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
72 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
73#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
74 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
75#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 76
f27be03b 77#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 78#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 79#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 80
df08cdc7
AM
81/* page table handling */
82#define LEVEL_STRIDE (9)
83#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
84
6d1c56a9
OBC
85/*
86 * This bitmap is used to advertise the page sizes our hardware support
87 * to the IOMMU core, which will then use this information to split
88 * physically contiguous memory regions it is mapping into page sizes
89 * that we support.
90 *
91 * Traditionally the IOMMU core just handed us the mappings directly,
92 * after making sure the size is an order of a 4KiB page and that the
93 * mapping has natural alignment.
94 *
95 * To retain this behavior, we currently advertise that we support
96 * all page sizes that are an order of 4KiB.
97 *
98 * If at some point we'd like to utilize the IOMMU core's new behavior,
99 * we could change this to advertise the real page sizes we support.
100 */
101#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
102
df08cdc7
AM
103static inline int agaw_to_level(int agaw)
104{
105 return agaw + 2;
106}
107
108static inline int agaw_to_width(int agaw)
109{
5c645b35 110 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
111}
112
113static inline int width_to_agaw(int width)
114{
5c645b35 115 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
116}
117
118static inline unsigned int level_to_offset_bits(int level)
119{
120 return (level - 1) * LEVEL_STRIDE;
121}
122
123static inline int pfn_level_offset(unsigned long pfn, int level)
124{
125 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
126}
127
128static inline unsigned long level_mask(int level)
129{
130 return -1UL << level_to_offset_bits(level);
131}
132
133static inline unsigned long level_size(int level)
134{
135 return 1UL << level_to_offset_bits(level);
136}
137
138static inline unsigned long align_to_level(unsigned long pfn, int level)
139{
140 return (pfn + level_size(level) - 1) & level_mask(level);
141}
fd18de50 142
6dd9a7c7
YS
143static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
144{
5c645b35 145 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
146}
147
dd4e8319
DW
148/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
149 are never going to work. */
150static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
151{
152 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
153}
154
155static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
156{
157 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
158}
159static inline unsigned long page_to_dma_pfn(struct page *pg)
160{
161 return mm_to_dma_pfn(page_to_pfn(pg));
162}
163static inline unsigned long virt_to_dma_pfn(void *p)
164{
165 return page_to_dma_pfn(virt_to_page(p));
166}
167
d9630fe9
WH
168/* global iommu list, set NULL for ignored DMAR units */
169static struct intel_iommu **g_iommus;
170
e0fc7e0b 171static void __init check_tylersburg_isoch(void);
9af88143
DW
172static int rwbf_quirk;
173
b779260b
JC
174/*
175 * set to 1 to panic kernel if can't successfully enable VT-d
176 * (used when kernel is launched w/ TXT)
177 */
178static int force_on = 0;
179
46b08e1a
MM
180/*
181 * 0: Present
182 * 1-11: Reserved
183 * 12-63: Context Ptr (12 - (haw-1))
184 * 64-127: Reserved
185 */
186struct root_entry {
187 u64 val;
188 u64 rsvd1;
189};
190#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
191static inline bool root_present(struct root_entry *root)
192{
193 return (root->val & 1);
194}
195static inline void set_root_present(struct root_entry *root)
196{
197 root->val |= 1;
198}
199static inline void set_root_value(struct root_entry *root, unsigned long value)
200{
201 root->val |= value & VTD_PAGE_MASK;
202}
203
204static inline struct context_entry *
205get_context_addr_from_root(struct root_entry *root)
206{
207 return (struct context_entry *)
208 (root_present(root)?phys_to_virt(
209 root->val & VTD_PAGE_MASK) :
210 NULL);
211}
212
7a8fc25e
MM
213/*
214 * low 64 bits:
215 * 0: present
216 * 1: fault processing disable
217 * 2-3: translation type
218 * 12-63: address space root
219 * high 64 bits:
220 * 0-2: address width
221 * 3-6: aval
222 * 8-23: domain id
223 */
224struct context_entry {
225 u64 lo;
226 u64 hi;
227};
c07e7d21
MM
228
229static inline bool context_present(struct context_entry *context)
230{
231 return (context->lo & 1);
232}
233static inline void context_set_present(struct context_entry *context)
234{
235 context->lo |= 1;
236}
237
238static inline void context_set_fault_enable(struct context_entry *context)
239{
240 context->lo &= (((u64)-1) << 2) | 1;
241}
242
c07e7d21
MM
243static inline void context_set_translation_type(struct context_entry *context,
244 unsigned long value)
245{
246 context->lo &= (((u64)-1) << 4) | 3;
247 context->lo |= (value & 3) << 2;
248}
249
250static inline void context_set_address_root(struct context_entry *context,
251 unsigned long value)
252{
253 context->lo |= value & VTD_PAGE_MASK;
254}
255
256static inline void context_set_address_width(struct context_entry *context,
257 unsigned long value)
258{
259 context->hi |= value & 7;
260}
261
262static inline void context_set_domain_id(struct context_entry *context,
263 unsigned long value)
264{
265 context->hi |= (value & ((1 << 16) - 1)) << 8;
266}
267
268static inline void context_clear_entry(struct context_entry *context)
269{
270 context->lo = 0;
271 context->hi = 0;
272}
7a8fc25e 273
622ba12a
MM
274/*
275 * 0: readable
276 * 1: writable
277 * 2-6: reserved
278 * 7: super page
9cf06697
SY
279 * 8-10: available
280 * 11: snoop behavior
622ba12a
MM
281 * 12-63: Host physcial address
282 */
283struct dma_pte {
284 u64 val;
285};
622ba12a 286
19c239ce
MM
287static inline void dma_clear_pte(struct dma_pte *pte)
288{
289 pte->val = 0;
290}
291
19c239ce
MM
292static inline u64 dma_pte_addr(struct dma_pte *pte)
293{
c85994e4
DW
294#ifdef CONFIG_64BIT
295 return pte->val & VTD_PAGE_MASK;
296#else
297 /* Must have a full atomic 64-bit read */
1a8bd481 298 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 299#endif
19c239ce
MM
300}
301
19c239ce
MM
302static inline bool dma_pte_present(struct dma_pte *pte)
303{
304 return (pte->val & 3) != 0;
305}
622ba12a 306
4399c8bf
AK
307static inline bool dma_pte_superpage(struct dma_pte *pte)
308{
309 return (pte->val & (1 << 7));
310}
311
75e6bf96
DW
312static inline int first_pte_in_page(struct dma_pte *pte)
313{
314 return !((unsigned long)pte & ~VTD_PAGE_MASK);
315}
316
2c2e2c38
FY
317/*
318 * This domain is a statically identity mapping domain.
319 * 1. This domain creats a static 1:1 mapping to all usable memory.
320 * 2. It maps to each iommu if successful.
321 * 3. Each iommu mapps to this domain if successful.
322 */
19943b0e
DW
323static struct dmar_domain *si_domain;
324static int hw_pass_through = 1;
2c2e2c38 325
3b5410e7 326/* devices under the same p2p bridge are owned in one domain */
cdc7b837 327#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 328
1ce28feb
WH
329/* domain represents a virtual machine, more than one devices
330 * across iommus may be owned in one domain, e.g. kvm guest.
331 */
332#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
333
2c2e2c38
FY
334/* si_domain contains mulitple devices */
335#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
336
1b198bb0
MT
337/* define the limit of IOMMUs supported in each domain */
338#ifdef CONFIG_X86
339# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
340#else
341# define IOMMU_UNITS_SUPPORTED 64
342#endif
343
99126f7c
MM
344struct dmar_domain {
345 int id; /* domain id */
4c923d47 346 int nid; /* node id */
1b198bb0
MT
347 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
348 /* bitmap of iommus this domain uses*/
99126f7c
MM
349
350 struct list_head devices; /* all devices' list */
351 struct iova_domain iovad; /* iova's that belong to this domain */
352
353 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
354 int gaw; /* max guest address width */
355
356 /* adjusted guest address width, 0 is level 2 30-bit */
357 int agaw;
358
3b5410e7 359 int flags; /* flags to find out type of domain */
8e604097
WH
360
361 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 362 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 363 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
364 int iommu_superpage;/* Level of superpages supported:
365 0 == 4KiB (no superpages), 1 == 2MiB,
366 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 367 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 368 u64 max_addr; /* maximum mapped address */
99126f7c
MM
369};
370
a647dacb
MM
371/* PCI domain-device relationship */
372struct device_domain_info {
373 struct list_head link; /* link to domain siblings */
374 struct list_head global; /* link to global list */
276dbf99
DW
375 int segment; /* PCI domain */
376 u8 bus; /* PCI bus number */
a647dacb 377 u8 devfn; /* PCI devfn number */
45e829ea 378 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 379 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
380 struct dmar_domain *domain; /* pointer to domain */
381};
382
b94e4117
JL
383struct dmar_rmrr_unit {
384 struct list_head list; /* list of rmrr units */
385 struct acpi_dmar_header *hdr; /* ACPI header */
386 u64 base_address; /* reserved base address*/
387 u64 end_address; /* reserved end address */
388 struct pci_dev **devices; /* target devices */
389 int devices_cnt; /* target device count */
390};
391
392struct dmar_atsr_unit {
393 struct list_head list; /* list of ATSR units */
394 struct acpi_dmar_header *hdr; /* ACPI header */
395 struct pci_dev **devices; /* target devices */
396 int devices_cnt; /* target device count */
397 u8 include_all:1; /* include all ports */
398};
399
400static LIST_HEAD(dmar_atsr_units);
401static LIST_HEAD(dmar_rmrr_units);
402
403#define for_each_rmrr_units(rmrr) \
404 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
405
5e0d2a6f 406static void flush_unmaps_timeout(unsigned long data);
407
b707cb02 408static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
5e0d2a6f 409
80b20dd8 410#define HIGH_WATER_MARK 250
411struct deferred_flush_tables {
412 int next;
413 struct iova *iova[HIGH_WATER_MARK];
414 struct dmar_domain *domain[HIGH_WATER_MARK];
415};
416
417static struct deferred_flush_tables *deferred_flush;
418
5e0d2a6f 419/* bitmap for indexing intel_iommus */
5e0d2a6f 420static int g_num_of_iommus;
421
422static DEFINE_SPINLOCK(async_umap_flush_lock);
423static LIST_HEAD(unmaps_to_do);
424
425static int timer_on;
426static long list_size;
5e0d2a6f 427
92d03cc8 428static void domain_exit(struct dmar_domain *domain);
ba395927 429static void domain_remove_dev_info(struct dmar_domain *domain);
b94e4117
JL
430static void domain_remove_one_dev_info(struct dmar_domain *domain,
431 struct pci_dev *pdev);
92d03cc8
JL
432static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
433 struct pci_dev *pdev);
ba395927 434
d3f13810 435#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
436int dmar_disabled = 0;
437#else
438int dmar_disabled = 1;
d3f13810 439#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 440
8bc1f85c
ED
441int intel_iommu_enabled = 0;
442EXPORT_SYMBOL_GPL(intel_iommu_enabled);
443
2d9e667e 444static int dmar_map_gfx = 1;
7d3b03ce 445static int dmar_forcedac;
5e0d2a6f 446static int intel_iommu_strict;
6dd9a7c7 447static int intel_iommu_superpage = 1;
ba395927 448
c0771df8
DW
449int intel_iommu_gfx_mapped;
450EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
451
ba395927
KA
452#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
453static DEFINE_SPINLOCK(device_domain_lock);
454static LIST_HEAD(device_domain_list);
455
a8bcbb0d
JR
456static struct iommu_ops intel_iommu_ops;
457
ba395927
KA
458static int __init intel_iommu_setup(char *str)
459{
460 if (!str)
461 return -EINVAL;
462 while (*str) {
0cd5c3c8
KM
463 if (!strncmp(str, "on", 2)) {
464 dmar_disabled = 0;
465 printk(KERN_INFO "Intel-IOMMU: enabled\n");
466 } else if (!strncmp(str, "off", 3)) {
ba395927 467 dmar_disabled = 1;
0cd5c3c8 468 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
469 } else if (!strncmp(str, "igfx_off", 8)) {
470 dmar_map_gfx = 0;
471 printk(KERN_INFO
472 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 473 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 474 printk(KERN_INFO
7d3b03ce
KA
475 "Intel-IOMMU: Forcing DAC for PCI devices\n");
476 dmar_forcedac = 1;
5e0d2a6f 477 } else if (!strncmp(str, "strict", 6)) {
478 printk(KERN_INFO
479 "Intel-IOMMU: disable batched IOTLB flush\n");
480 intel_iommu_strict = 1;
6dd9a7c7
YS
481 } else if (!strncmp(str, "sp_off", 6)) {
482 printk(KERN_INFO
483 "Intel-IOMMU: disable supported super page\n");
484 intel_iommu_superpage = 0;
ba395927
KA
485 }
486
487 str += strcspn(str, ",");
488 while (*str == ',')
489 str++;
490 }
491 return 0;
492}
493__setup("intel_iommu=", intel_iommu_setup);
494
495static struct kmem_cache *iommu_domain_cache;
496static struct kmem_cache *iommu_devinfo_cache;
497static struct kmem_cache *iommu_iova_cache;
498
4c923d47 499static inline void *alloc_pgtable_page(int node)
eb3fa7cb 500{
4c923d47
SS
501 struct page *page;
502 void *vaddr = NULL;
eb3fa7cb 503
4c923d47
SS
504 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
505 if (page)
506 vaddr = page_address(page);
eb3fa7cb 507 return vaddr;
ba395927
KA
508}
509
510static inline void free_pgtable_page(void *vaddr)
511{
512 free_page((unsigned long)vaddr);
513}
514
515static inline void *alloc_domain_mem(void)
516{
354bb65e 517 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
518}
519
38717946 520static void free_domain_mem(void *vaddr)
ba395927
KA
521{
522 kmem_cache_free(iommu_domain_cache, vaddr);
523}
524
525static inline void * alloc_devinfo_mem(void)
526{
354bb65e 527 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
528}
529
530static inline void free_devinfo_mem(void *vaddr)
531{
532 kmem_cache_free(iommu_devinfo_cache, vaddr);
533}
534
535struct iova *alloc_iova_mem(void)
536{
354bb65e 537 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
538}
539
540void free_iova_mem(struct iova *iova)
541{
542 kmem_cache_free(iommu_iova_cache, iova);
543}
544
1b573683 545
4ed0d3e6 546static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
547{
548 unsigned long sagaw;
549 int agaw = -1;
550
551 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 552 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
553 agaw >= 0; agaw--) {
554 if (test_bit(agaw, &sagaw))
555 break;
556 }
557
558 return agaw;
559}
560
4ed0d3e6
FY
561/*
562 * Calculate max SAGAW for each iommu.
563 */
564int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
565{
566 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
567}
568
569/*
570 * calculate agaw for each iommu.
571 * "SAGAW" may be different across iommus, use a default agaw, and
572 * get a supported less agaw for iommus that don't support the default agaw.
573 */
574int iommu_calculate_agaw(struct intel_iommu *iommu)
575{
576 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
577}
578
2c2e2c38 579/* This functionin only returns single iommu in a domain */
8c11e798
WH
580static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
581{
582 int iommu_id;
583
2c2e2c38 584 /* si_domain and vm domain should not get here. */
1ce28feb 585 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 586 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 587
1b198bb0 588 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
8c11e798
WH
589 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
590 return NULL;
591
592 return g_iommus[iommu_id];
593}
594
8e604097
WH
595static void domain_update_iommu_coherency(struct dmar_domain *domain)
596{
597 int i;
598
2e12bc29
AW
599 i = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
600
601 domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0;
8e604097 602
1b198bb0 603 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
8e604097
WH
604 if (!ecap_coherent(g_iommus[i]->ecap)) {
605 domain->iommu_coherency = 0;
606 break;
607 }
8e604097
WH
608 }
609}
610
58c610bd
SY
611static void domain_update_iommu_snooping(struct dmar_domain *domain)
612{
613 int i;
614
615 domain->iommu_snooping = 1;
616
1b198bb0 617 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
618 if (!ecap_sc_support(g_iommus[i]->ecap)) {
619 domain->iommu_snooping = 0;
620 break;
621 }
58c610bd
SY
622 }
623}
624
6dd9a7c7
YS
625static void domain_update_iommu_superpage(struct dmar_domain *domain)
626{
8140a95d
AK
627 struct dmar_drhd_unit *drhd;
628 struct intel_iommu *iommu = NULL;
629 int mask = 0xf;
6dd9a7c7
YS
630
631 if (!intel_iommu_superpage) {
632 domain->iommu_superpage = 0;
633 return;
634 }
635
8140a95d
AK
636 /* set iommu_superpage to the smallest common denominator */
637 for_each_active_iommu(iommu, drhd) {
638 mask &= cap_super_page_val(iommu->cap);
6dd9a7c7
YS
639 if (!mask) {
640 break;
641 }
642 }
643 domain->iommu_superpage = fls(mask);
644}
645
58c610bd
SY
646/* Some capabilities may be different across iommus */
647static void domain_update_iommu_cap(struct dmar_domain *domain)
648{
649 domain_update_iommu_coherency(domain);
650 domain_update_iommu_snooping(domain);
6dd9a7c7 651 domain_update_iommu_superpage(domain);
58c610bd
SY
652}
653
276dbf99 654static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
c7151a8d
WH
655{
656 struct dmar_drhd_unit *drhd = NULL;
b683b230
JL
657 struct intel_iommu *iommu;
658 struct pci_dev *dev;
c7151a8d
WH
659 int i;
660
b683b230 661 for_each_active_iommu(iommu, drhd) {
276dbf99
DW
662 if (segment != drhd->segment)
663 continue;
c7151a8d 664
b683b230
JL
665 for_each_active_dev_scope(drhd->devices,
666 drhd->devices_cnt, i, dev) {
667 if (dev->bus->number == bus && dev->devfn == devfn)
668 goto out;
669 if (dev->subordinate &&
670 dev->subordinate->number <= bus &&
671 dev->subordinate->busn_res.end >= bus)
672 goto out;
924b6231 673 }
c7151a8d
WH
674
675 if (drhd->include_all)
b683b230 676 goto out;
c7151a8d 677 }
b683b230
JL
678 iommu = NULL;
679out:
c7151a8d 680
b683b230 681 return iommu;
c7151a8d
WH
682}
683
5331fe6f
WH
684static void domain_flush_cache(struct dmar_domain *domain,
685 void *addr, int size)
686{
687 if (!domain->iommu_coherency)
688 clflush_cache_range(addr, size);
689}
690
ba395927
KA
691/* Gets context entry for a given bus and devfn */
692static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
693 u8 bus, u8 devfn)
694{
695 struct root_entry *root;
696 struct context_entry *context;
697 unsigned long phy_addr;
698 unsigned long flags;
699
700 spin_lock_irqsave(&iommu->lock, flags);
701 root = &iommu->root_entry[bus];
702 context = get_context_addr_from_root(root);
703 if (!context) {
4c923d47
SS
704 context = (struct context_entry *)
705 alloc_pgtable_page(iommu->node);
ba395927
KA
706 if (!context) {
707 spin_unlock_irqrestore(&iommu->lock, flags);
708 return NULL;
709 }
5b6985ce 710 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
711 phy_addr = virt_to_phys((void *)context);
712 set_root_value(root, phy_addr);
713 set_root_present(root);
714 __iommu_flush_cache(iommu, root, sizeof(*root));
715 }
716 spin_unlock_irqrestore(&iommu->lock, flags);
717 return &context[devfn];
718}
719
720static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
721{
722 struct root_entry *root;
723 struct context_entry *context;
724 int ret;
725 unsigned long flags;
726
727 spin_lock_irqsave(&iommu->lock, flags);
728 root = &iommu->root_entry[bus];
729 context = get_context_addr_from_root(root);
730 if (!context) {
731 ret = 0;
732 goto out;
733 }
c07e7d21 734 ret = context_present(&context[devfn]);
ba395927
KA
735out:
736 spin_unlock_irqrestore(&iommu->lock, flags);
737 return ret;
738}
739
740static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
741{
742 struct root_entry *root;
743 struct context_entry *context;
744 unsigned long flags;
745
746 spin_lock_irqsave(&iommu->lock, flags);
747 root = &iommu->root_entry[bus];
748 context = get_context_addr_from_root(root);
749 if (context) {
c07e7d21 750 context_clear_entry(&context[devfn]);
ba395927
KA
751 __iommu_flush_cache(iommu, &context[devfn], \
752 sizeof(*context));
753 }
754 spin_unlock_irqrestore(&iommu->lock, flags);
755}
756
757static void free_context_table(struct intel_iommu *iommu)
758{
759 struct root_entry *root;
760 int i;
761 unsigned long flags;
762 struct context_entry *context;
763
764 spin_lock_irqsave(&iommu->lock, flags);
765 if (!iommu->root_entry) {
766 goto out;
767 }
768 for (i = 0; i < ROOT_ENTRY_NR; i++) {
769 root = &iommu->root_entry[i];
770 context = get_context_addr_from_root(root);
771 if (context)
772 free_pgtable_page(context);
773 }
774 free_pgtable_page(iommu->root_entry);
775 iommu->root_entry = NULL;
776out:
777 spin_unlock_irqrestore(&iommu->lock, flags);
778}
779
b026fd28 780static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
4399c8bf 781 unsigned long pfn, int target_level)
ba395927 782{
b026fd28 783 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
784 struct dma_pte *parent, *pte = NULL;
785 int level = agaw_to_level(domain->agaw);
4399c8bf 786 int offset;
ba395927
KA
787
788 BUG_ON(!domain->pgd);
f9423606
JS
789
790 if (addr_width < BITS_PER_LONG && pfn >> addr_width)
791 /* Address beyond IOMMU's addressing capabilities. */
792 return NULL;
793
ba395927
KA
794 parent = domain->pgd;
795
ba395927
KA
796 while (level > 0) {
797 void *tmp_page;
798
b026fd28 799 offset = pfn_level_offset(pfn, level);
ba395927 800 pte = &parent[offset];
4399c8bf 801 if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7
YS
802 break;
803 if (level == target_level)
ba395927
KA
804 break;
805
19c239ce 806 if (!dma_pte_present(pte)) {
c85994e4
DW
807 uint64_t pteval;
808
4c923d47 809 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 810
206a73c1 811 if (!tmp_page)
ba395927 812 return NULL;
206a73c1 813
c85994e4 814 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 815 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
c85994e4
DW
816 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
817 /* Someone else set it while we were thinking; use theirs. */
818 free_pgtable_page(tmp_page);
819 } else {
820 dma_pte_addr(pte);
821 domain_flush_cache(domain, pte, sizeof(*pte));
822 }
ba395927 823 }
19c239ce 824 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
825 level--;
826 }
827
ba395927
KA
828 return pte;
829}
830
6dd9a7c7 831
ba395927 832/* return address's pte at specific level */
90dcfb5e
DW
833static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
834 unsigned long pfn,
6dd9a7c7 835 int level, int *large_page)
ba395927
KA
836{
837 struct dma_pte *parent, *pte = NULL;
838 int total = agaw_to_level(domain->agaw);
839 int offset;
840
841 parent = domain->pgd;
842 while (level <= total) {
90dcfb5e 843 offset = pfn_level_offset(pfn, total);
ba395927
KA
844 pte = &parent[offset];
845 if (level == total)
846 return pte;
847
6dd9a7c7
YS
848 if (!dma_pte_present(pte)) {
849 *large_page = total;
ba395927 850 break;
6dd9a7c7
YS
851 }
852
853 if (pte->val & DMA_PTE_LARGE_PAGE) {
854 *large_page = total;
855 return pte;
856 }
857
19c239ce 858 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
859 total--;
860 }
861 return NULL;
862}
863
ba395927 864/* clear last level pte, a tlb flush should be followed */
292827cb 865static int dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
866 unsigned long start_pfn,
867 unsigned long last_pfn)
ba395927 868{
04b18e65 869 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
6dd9a7c7 870 unsigned int large_page = 1;
310a5ab9 871 struct dma_pte *first_pte, *pte;
66eae846 872
04b18e65 873 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 874 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 875 BUG_ON(start_pfn > last_pfn);
ba395927 876
04b18e65 877 /* we don't need lock here; nobody else touches the iova range */
59c36286 878 do {
6dd9a7c7
YS
879 large_page = 1;
880 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 881 if (!pte) {
6dd9a7c7 882 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
883 continue;
884 }
6dd9a7c7 885 do {
310a5ab9 886 dma_clear_pte(pte);
6dd9a7c7 887 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 888 pte++;
75e6bf96
DW
889 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
890
310a5ab9
DW
891 domain_flush_cache(domain, first_pte,
892 (void *)pte - (void *)first_pte);
59c36286
DW
893
894 } while (start_pfn && start_pfn <= last_pfn);
292827cb 895
5c645b35 896 return min_t(int, (large_page - 1) * 9, MAX_AGAW_PFN_WIDTH);
ba395927
KA
897}
898
3269ee0b
AW
899static void dma_pte_free_level(struct dmar_domain *domain, int level,
900 struct dma_pte *pte, unsigned long pfn,
901 unsigned long start_pfn, unsigned long last_pfn)
902{
903 pfn = max(start_pfn, pfn);
904 pte = &pte[pfn_level_offset(pfn, level)];
905
906 do {
907 unsigned long level_pfn;
908 struct dma_pte *level_pte;
909
910 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
911 goto next;
912
913 level_pfn = pfn & level_mask(level - 1);
914 level_pte = phys_to_virt(dma_pte_addr(pte));
915
916 if (level > 2)
917 dma_pte_free_level(domain, level - 1, level_pte,
918 level_pfn, start_pfn, last_pfn);
919
920 /* If range covers entire pagetable, free it */
921 if (!(start_pfn > level_pfn ||
08336fd2 922 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
923 dma_clear_pte(pte);
924 domain_flush_cache(domain, pte, sizeof(*pte));
925 free_pgtable_page(level_pte);
926 }
927next:
928 pfn += level_size(level);
929 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
930}
931
ba395927
KA
932/* free page table pages. last level pte should already be cleared */
933static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
934 unsigned long start_pfn,
935 unsigned long last_pfn)
ba395927 936{
6660c63a 937 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927 938
6660c63a
DW
939 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
940 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 941 BUG_ON(start_pfn > last_pfn);
ba395927 942
f3a0a52f 943 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
944 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
945 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 946
ba395927 947 /* free pgd */
d794dc9b 948 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
949 free_pgtable_page(domain->pgd);
950 domain->pgd = NULL;
951 }
952}
953
954/* iommu handling */
955static int iommu_alloc_root_entry(struct intel_iommu *iommu)
956{
957 struct root_entry *root;
958 unsigned long flags;
959
4c923d47 960 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
961 if (!root)
962 return -ENOMEM;
963
5b6985ce 964 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
965
966 spin_lock_irqsave(&iommu->lock, flags);
967 iommu->root_entry = root;
968 spin_unlock_irqrestore(&iommu->lock, flags);
969
970 return 0;
971}
972
ba395927
KA
973static void iommu_set_root_entry(struct intel_iommu *iommu)
974{
975 void *addr;
c416daa9 976 u32 sts;
ba395927
KA
977 unsigned long flag;
978
979 addr = iommu->root_entry;
980
1f5b3c3f 981 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
982 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
983
c416daa9 984 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
985
986 /* Make sure hardware complete it */
987 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 988 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 989
1f5b3c3f 990 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
991}
992
993static void iommu_flush_write_buffer(struct intel_iommu *iommu)
994{
995 u32 val;
996 unsigned long flag;
997
9af88143 998 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 999 return;
ba395927 1000
1f5b3c3f 1001 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1002 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1003
1004 /* Make sure hardware complete it */
1005 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1006 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1007
1f5b3c3f 1008 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1009}
1010
1011/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1012static void __iommu_flush_context(struct intel_iommu *iommu,
1013 u16 did, u16 source_id, u8 function_mask,
1014 u64 type)
ba395927
KA
1015{
1016 u64 val = 0;
1017 unsigned long flag;
1018
ba395927
KA
1019 switch (type) {
1020 case DMA_CCMD_GLOBAL_INVL:
1021 val = DMA_CCMD_GLOBAL_INVL;
1022 break;
1023 case DMA_CCMD_DOMAIN_INVL:
1024 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1025 break;
1026 case DMA_CCMD_DEVICE_INVL:
1027 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1028 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1029 break;
1030 default:
1031 BUG();
1032 }
1033 val |= DMA_CCMD_ICC;
1034
1f5b3c3f 1035 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1036 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1037
1038 /* Make sure hardware complete it */
1039 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1040 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1041
1f5b3c3f 1042 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1043}
1044
ba395927 1045/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1046static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1047 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1048{
1049 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1050 u64 val = 0, val_iva = 0;
1051 unsigned long flag;
1052
ba395927
KA
1053 switch (type) {
1054 case DMA_TLB_GLOBAL_FLUSH:
1055 /* global flush doesn't need set IVA_REG */
1056 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1057 break;
1058 case DMA_TLB_DSI_FLUSH:
1059 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1060 break;
1061 case DMA_TLB_PSI_FLUSH:
1062 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1063 /* Note: always flush non-leaf currently */
1064 val_iva = size_order | addr;
1065 break;
1066 default:
1067 BUG();
1068 }
1069 /* Note: set drain read/write */
1070#if 0
1071 /*
1072 * This is probably to be super secure.. Looks like we can
1073 * ignore it without any impact.
1074 */
1075 if (cap_read_drain(iommu->cap))
1076 val |= DMA_TLB_READ_DRAIN;
1077#endif
1078 if (cap_write_drain(iommu->cap))
1079 val |= DMA_TLB_WRITE_DRAIN;
1080
1f5b3c3f 1081 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1082 /* Note: Only uses first TLB reg currently */
1083 if (val_iva)
1084 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1085 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1086
1087 /* Make sure hardware complete it */
1088 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1089 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1090
1f5b3c3f 1091 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1092
1093 /* check IOTLB invalidation granularity */
1094 if (DMA_TLB_IAIG(val) == 0)
1095 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1096 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1097 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1098 (unsigned long long)DMA_TLB_IIRG(type),
1099 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1100}
1101
93a23a72
YZ
1102static struct device_domain_info *iommu_support_dev_iotlb(
1103 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1104{
1105 int found = 0;
1106 unsigned long flags;
1107 struct device_domain_info *info;
1108 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1109
1110 if (!ecap_dev_iotlb_support(iommu->ecap))
1111 return NULL;
1112
1113 if (!iommu->qi)
1114 return NULL;
1115
1116 spin_lock_irqsave(&device_domain_lock, flags);
1117 list_for_each_entry(info, &domain->devices, link)
1118 if (info->bus == bus && info->devfn == devfn) {
1119 found = 1;
1120 break;
1121 }
1122 spin_unlock_irqrestore(&device_domain_lock, flags);
1123
1124 if (!found || !info->dev)
1125 return NULL;
1126
1127 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1128 return NULL;
1129
1130 if (!dmar_find_matched_atsr_unit(info->dev))
1131 return NULL;
1132
1133 info->iommu = iommu;
1134
1135 return info;
1136}
1137
1138static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1139{
93a23a72
YZ
1140 if (!info)
1141 return;
1142
1143 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1144}
1145
1146static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1147{
1148 if (!info->dev || !pci_ats_enabled(info->dev))
1149 return;
1150
1151 pci_disable_ats(info->dev);
1152}
1153
1154static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1155 u64 addr, unsigned mask)
1156{
1157 u16 sid, qdep;
1158 unsigned long flags;
1159 struct device_domain_info *info;
1160
1161 spin_lock_irqsave(&device_domain_lock, flags);
1162 list_for_each_entry(info, &domain->devices, link) {
1163 if (!info->dev || !pci_ats_enabled(info->dev))
1164 continue;
1165
1166 sid = info->bus << 8 | info->devfn;
1167 qdep = pci_ats_queue_depth(info->dev);
1168 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1169 }
1170 spin_unlock_irqrestore(&device_domain_lock, flags);
1171}
1172
1f0ef2aa 1173static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
82653633 1174 unsigned long pfn, unsigned int pages, int map)
ba395927 1175{
9dd2fe89 1176 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1177 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1178
ba395927
KA
1179 BUG_ON(pages == 0);
1180
ba395927 1181 /*
9dd2fe89
YZ
1182 * Fallback to domain selective flush if no PSI support or the size is
1183 * too big.
ba395927
KA
1184 * PSI requires page size to be 2 ^ x, and the base address is naturally
1185 * aligned to the size
1186 */
9dd2fe89
YZ
1187 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1188 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1189 DMA_TLB_DSI_FLUSH);
9dd2fe89
YZ
1190 else
1191 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1192 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1193
1194 /*
82653633
NA
1195 * In caching mode, changes of pages from non-present to present require
1196 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1197 */
82653633 1198 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1199 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1200}
1201
f8bab735 1202static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1203{
1204 u32 pmen;
1205 unsigned long flags;
1206
1f5b3c3f 1207 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1208 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1209 pmen &= ~DMA_PMEN_EPM;
1210 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1211
1212 /* wait for the protected region status bit to clear */
1213 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1214 readl, !(pmen & DMA_PMEN_PRS), pmen);
1215
1f5b3c3f 1216 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1217}
1218
ba395927
KA
1219static int iommu_enable_translation(struct intel_iommu *iommu)
1220{
1221 u32 sts;
1222 unsigned long flags;
1223
1f5b3c3f 1224 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1225 iommu->gcmd |= DMA_GCMD_TE;
1226 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1227
1228 /* Make sure hardware complete it */
1229 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1230 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1231
1f5b3c3f 1232 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1233 return 0;
1234}
1235
1236static int iommu_disable_translation(struct intel_iommu *iommu)
1237{
1238 u32 sts;
1239 unsigned long flag;
1240
1f5b3c3f 1241 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1242 iommu->gcmd &= ~DMA_GCMD_TE;
1243 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1244
1245 /* Make sure hardware complete it */
1246 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1247 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1248
1f5b3c3f 1249 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1250 return 0;
1251}
1252
3460a6d9 1253
ba395927
KA
1254static int iommu_init_domains(struct intel_iommu *iommu)
1255{
1256 unsigned long ndomains;
1257 unsigned long nlongs;
1258
1259 ndomains = cap_ndoms(iommu->cap);
852bdb04
JL
1260 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1261 iommu->seq_id, ndomains);
ba395927
KA
1262 nlongs = BITS_TO_LONGS(ndomains);
1263
94a91b50
DD
1264 spin_lock_init(&iommu->lock);
1265
ba395927
KA
1266 /* TBD: there might be 64K domains,
1267 * consider other allocation for future chip
1268 */
1269 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1270 if (!iommu->domain_ids) {
852bdb04
JL
1271 pr_err("IOMMU%d: allocating domain id array failed\n",
1272 iommu->seq_id);
ba395927
KA
1273 return -ENOMEM;
1274 }
1275 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1276 GFP_KERNEL);
1277 if (!iommu->domains) {
852bdb04
JL
1278 pr_err("IOMMU%d: allocating domain array failed\n",
1279 iommu->seq_id);
1280 kfree(iommu->domain_ids);
1281 iommu->domain_ids = NULL;
ba395927
KA
1282 return -ENOMEM;
1283 }
1284
1285 /*
1286 * if Caching mode is set, then invalid translations are tagged
1287 * with domainid 0. Hence we need to pre-allocate it.
1288 */
1289 if (cap_caching_mode(iommu->cap))
1290 set_bit(0, iommu->domain_ids);
1291 return 0;
1292}
ba395927 1293
a868e6b7 1294static void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1295{
1296 struct dmar_domain *domain;
5ced12af 1297 int i, count;
c7151a8d 1298 unsigned long flags;
ba395927 1299
94a91b50 1300 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1301 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
a4eaa86c
JL
1302 /*
1303 * Domain id 0 is reserved for invalid translation
1304 * if hardware supports caching mode.
1305 */
1306 if (cap_caching_mode(iommu->cap) && i == 0)
1307 continue;
1308
94a91b50
DD
1309 domain = iommu->domains[i];
1310 clear_bit(i, iommu->domain_ids);
1311
1312 spin_lock_irqsave(&domain->iommu_lock, flags);
5ced12af
JL
1313 count = --domain->iommu_count;
1314 spin_unlock_irqrestore(&domain->iommu_lock, flags);
92d03cc8
JL
1315 if (count == 0)
1316 domain_exit(domain);
5e98c4b1 1317 }
ba395927
KA
1318 }
1319
1320 if (iommu->gcmd & DMA_GCMD_TE)
1321 iommu_disable_translation(iommu);
1322
ba395927
KA
1323 kfree(iommu->domains);
1324 kfree(iommu->domain_ids);
a868e6b7
JL
1325 iommu->domains = NULL;
1326 iommu->domain_ids = NULL;
ba395927 1327
d9630fe9
WH
1328 g_iommus[iommu->seq_id] = NULL;
1329
ba395927
KA
1330 /* free context mapping */
1331 free_context_table(iommu);
ba395927
KA
1332}
1333
92d03cc8 1334static struct dmar_domain *alloc_domain(bool vm)
ba395927 1335{
92d03cc8
JL
1336 /* domain id for virtual machine, it won't be set in context */
1337 static atomic_t vm_domid = ATOMIC_INIT(0);
ba395927 1338 struct dmar_domain *domain;
ba395927
KA
1339
1340 domain = alloc_domain_mem();
1341 if (!domain)
1342 return NULL;
1343
4c923d47 1344 domain->nid = -1;
92d03cc8 1345 domain->iommu_count = 0;
1b198bb0 1346 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
2c2e2c38 1347 domain->flags = 0;
92d03cc8
JL
1348 spin_lock_init(&domain->iommu_lock);
1349 INIT_LIST_HEAD(&domain->devices);
1350 if (vm) {
1351 domain->id = atomic_inc_return(&vm_domid);
1352 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
1353 }
2c2e2c38
FY
1354
1355 return domain;
1356}
1357
1358static int iommu_attach_domain(struct dmar_domain *domain,
1359 struct intel_iommu *iommu)
1360{
1361 int num;
1362 unsigned long ndomains;
1363 unsigned long flags;
1364
ba395927
KA
1365 ndomains = cap_ndoms(iommu->cap);
1366
1367 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1368
ba395927
KA
1369 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1370 if (num >= ndomains) {
1371 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1372 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1373 return -ENOMEM;
ba395927
KA
1374 }
1375
ba395927 1376 domain->id = num;
9ebd682e 1377 domain->iommu_count++;
2c2e2c38 1378 set_bit(num, iommu->domain_ids);
1b198bb0 1379 set_bit(iommu->seq_id, domain->iommu_bmp);
ba395927
KA
1380 iommu->domains[num] = domain;
1381 spin_unlock_irqrestore(&iommu->lock, flags);
1382
2c2e2c38 1383 return 0;
ba395927
KA
1384}
1385
2c2e2c38
FY
1386static void iommu_detach_domain(struct dmar_domain *domain,
1387 struct intel_iommu *iommu)
ba395927
KA
1388{
1389 unsigned long flags;
2c2e2c38 1390 int num, ndomains;
ba395927 1391
8c11e798 1392 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1393 ndomains = cap_ndoms(iommu->cap);
a45946ab 1394 for_each_set_bit(num, iommu->domain_ids, ndomains) {
2c2e2c38 1395 if (iommu->domains[num] == domain) {
92d03cc8
JL
1396 clear_bit(num, iommu->domain_ids);
1397 iommu->domains[num] = NULL;
2c2e2c38
FY
1398 break;
1399 }
2c2e2c38 1400 }
8c11e798 1401 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1402}
1403
1404static struct iova_domain reserved_iova_list;
8a443df4 1405static struct lock_class_key reserved_rbtree_key;
ba395927 1406
51a63e67 1407static int dmar_init_reserved_ranges(void)
ba395927
KA
1408{
1409 struct pci_dev *pdev = NULL;
1410 struct iova *iova;
1411 int i;
ba395927 1412
f661197e 1413 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1414
8a443df4
MG
1415 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1416 &reserved_rbtree_key);
1417
ba395927
KA
1418 /* IOAPIC ranges shouldn't be accessed by DMA */
1419 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1420 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1421 if (!iova) {
ba395927 1422 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1423 return -ENODEV;
1424 }
ba395927
KA
1425
1426 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1427 for_each_pci_dev(pdev) {
1428 struct resource *r;
1429
1430 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1431 r = &pdev->resource[i];
1432 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1433 continue;
1a4a4551
DW
1434 iova = reserve_iova(&reserved_iova_list,
1435 IOVA_PFN(r->start),
1436 IOVA_PFN(r->end));
51a63e67 1437 if (!iova) {
ba395927 1438 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1439 return -ENODEV;
1440 }
ba395927
KA
1441 }
1442 }
51a63e67 1443 return 0;
ba395927
KA
1444}
1445
1446static void domain_reserve_special_ranges(struct dmar_domain *domain)
1447{
1448 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1449}
1450
1451static inline int guestwidth_to_adjustwidth(int gaw)
1452{
1453 int agaw;
1454 int r = (gaw - 12) % 9;
1455
1456 if (r == 0)
1457 agaw = gaw;
1458 else
1459 agaw = gaw + 9 - r;
1460 if (agaw > 64)
1461 agaw = 64;
1462 return agaw;
1463}
1464
1465static int domain_init(struct dmar_domain *domain, int guest_width)
1466{
1467 struct intel_iommu *iommu;
1468 int adjust_width, agaw;
1469 unsigned long sagaw;
1470
f661197e 1471 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1472 domain_reserve_special_ranges(domain);
1473
1474 /* calculate AGAW */
8c11e798 1475 iommu = domain_get_iommu(domain);
ba395927
KA
1476 if (guest_width > cap_mgaw(iommu->cap))
1477 guest_width = cap_mgaw(iommu->cap);
1478 domain->gaw = guest_width;
1479 adjust_width = guestwidth_to_adjustwidth(guest_width);
1480 agaw = width_to_agaw(adjust_width);
1481 sagaw = cap_sagaw(iommu->cap);
1482 if (!test_bit(agaw, &sagaw)) {
1483 /* hardware doesn't support it, choose a bigger one */
1484 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1485 agaw = find_next_bit(&sagaw, 5, agaw);
1486 if (agaw >= 5)
1487 return -ENODEV;
1488 }
1489 domain->agaw = agaw;
ba395927 1490
8e604097
WH
1491 if (ecap_coherent(iommu->ecap))
1492 domain->iommu_coherency = 1;
1493 else
1494 domain->iommu_coherency = 0;
1495
58c610bd
SY
1496 if (ecap_sc_support(iommu->ecap))
1497 domain->iommu_snooping = 1;
1498 else
1499 domain->iommu_snooping = 0;
1500
6dd9a7c7 1501 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
4c923d47 1502 domain->nid = iommu->node;
c7151a8d 1503
ba395927 1504 /* always allocate the top pgd */
4c923d47 1505 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1506 if (!domain->pgd)
1507 return -ENOMEM;
5b6985ce 1508 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1509 return 0;
1510}
1511
1512static void domain_exit(struct dmar_domain *domain)
1513{
2c2e2c38
FY
1514 struct dmar_drhd_unit *drhd;
1515 struct intel_iommu *iommu;
ba395927
KA
1516
1517 /* Domain 0 is reserved, so dont process it */
1518 if (!domain)
1519 return;
1520
7b668357
AW
1521 /* Flush any lazy unmaps that may reference this domain */
1522 if (!intel_iommu_strict)
1523 flush_unmaps_timeout(0);
1524
92d03cc8 1525 /* remove associated devices */
ba395927 1526 domain_remove_dev_info(domain);
92d03cc8 1527
ba395927
KA
1528 /* destroy iovas */
1529 put_iova_domain(&domain->iovad);
ba395927
KA
1530
1531 /* clear ptes */
595badf5 1532 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927
KA
1533
1534 /* free page tables */
d794dc9b 1535 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1536
92d03cc8 1537 /* clear attached or cached domains */
2c2e2c38 1538 for_each_active_iommu(iommu, drhd)
92d03cc8
JL
1539 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1540 test_bit(iommu->seq_id, domain->iommu_bmp))
2c2e2c38
FY
1541 iommu_detach_domain(domain, iommu);
1542
ba395927
KA
1543 free_domain_mem(domain);
1544}
1545
4ed0d3e6
FY
1546static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1547 u8 bus, u8 devfn, int translation)
ba395927
KA
1548{
1549 struct context_entry *context;
ba395927 1550 unsigned long flags;
5331fe6f 1551 struct intel_iommu *iommu;
ea6606b0
WH
1552 struct dma_pte *pgd;
1553 unsigned long num;
1554 unsigned long ndomains;
1555 int id;
1556 int agaw;
93a23a72 1557 struct device_domain_info *info = NULL;
ba395927
KA
1558
1559 pr_debug("Set context mapping for %02x:%02x.%d\n",
1560 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1561
ba395927 1562 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1563 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1564 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1565
276dbf99 1566 iommu = device_to_iommu(segment, bus, devfn);
5331fe6f
WH
1567 if (!iommu)
1568 return -ENODEV;
1569
ba395927
KA
1570 context = device_to_context_entry(iommu, bus, devfn);
1571 if (!context)
1572 return -ENOMEM;
1573 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1574 if (context_present(context)) {
ba395927
KA
1575 spin_unlock_irqrestore(&iommu->lock, flags);
1576 return 0;
1577 }
1578
ea6606b0
WH
1579 id = domain->id;
1580 pgd = domain->pgd;
1581
2c2e2c38
FY
1582 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1583 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1584 int found = 0;
1585
1586 /* find an available domain id for this device in iommu */
1587 ndomains = cap_ndoms(iommu->cap);
a45946ab 1588 for_each_set_bit(num, iommu->domain_ids, ndomains) {
ea6606b0
WH
1589 if (iommu->domains[num] == domain) {
1590 id = num;
1591 found = 1;
1592 break;
1593 }
ea6606b0
WH
1594 }
1595
1596 if (found == 0) {
1597 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1598 if (num >= ndomains) {
1599 spin_unlock_irqrestore(&iommu->lock, flags);
1600 printk(KERN_ERR "IOMMU: no free domain ids\n");
1601 return -EFAULT;
1602 }
1603
1604 set_bit(num, iommu->domain_ids);
1605 iommu->domains[num] = domain;
1606 id = num;
1607 }
1608
1609 /* Skip top levels of page tables for
1610 * iommu which has less agaw than default.
1672af11 1611 * Unnecessary for PT mode.
ea6606b0 1612 */
1672af11
CW
1613 if (translation != CONTEXT_TT_PASS_THROUGH) {
1614 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1615 pgd = phys_to_virt(dma_pte_addr(pgd));
1616 if (!dma_pte_present(pgd)) {
1617 spin_unlock_irqrestore(&iommu->lock, flags);
1618 return -ENOMEM;
1619 }
ea6606b0
WH
1620 }
1621 }
1622 }
1623
1624 context_set_domain_id(context, id);
4ed0d3e6 1625
93a23a72
YZ
1626 if (translation != CONTEXT_TT_PASS_THROUGH) {
1627 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1628 translation = info ? CONTEXT_TT_DEV_IOTLB :
1629 CONTEXT_TT_MULTI_LEVEL;
1630 }
4ed0d3e6
FY
1631 /*
1632 * In pass through mode, AW must be programmed to indicate the largest
1633 * AGAW value supported by hardware. And ASR is ignored by hardware.
1634 */
93a23a72 1635 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1636 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1637 else {
1638 context_set_address_root(context, virt_to_phys(pgd));
1639 context_set_address_width(context, iommu->agaw);
1640 }
4ed0d3e6
FY
1641
1642 context_set_translation_type(context, translation);
c07e7d21
MM
1643 context_set_fault_enable(context);
1644 context_set_present(context);
5331fe6f 1645 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1646
4c25a2c1
DW
1647 /*
1648 * It's a non-present to present mapping. If hardware doesn't cache
1649 * non-present entry we only need to flush the write-buffer. If the
1650 * _does_ cache non-present entries, then it does so in the special
1651 * domain #0, which we have to flush:
1652 */
1653 if (cap_caching_mode(iommu->cap)) {
1654 iommu->flush.flush_context(iommu, 0,
1655 (((u16)bus) << 8) | devfn,
1656 DMA_CCMD_MASK_NOBIT,
1657 DMA_CCMD_DEVICE_INVL);
82653633 1658 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1659 } else {
ba395927 1660 iommu_flush_write_buffer(iommu);
4c25a2c1 1661 }
93a23a72 1662 iommu_enable_dev_iotlb(info);
ba395927 1663 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1664
1665 spin_lock_irqsave(&domain->iommu_lock, flags);
1b198bb0 1666 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
c7151a8d 1667 domain->iommu_count++;
4c923d47
SS
1668 if (domain->iommu_count == 1)
1669 domain->nid = iommu->node;
58c610bd 1670 domain_update_iommu_cap(domain);
c7151a8d
WH
1671 }
1672 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1673 return 0;
1674}
1675
1676static int
4ed0d3e6
FY
1677domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1678 int translation)
ba395927
KA
1679{
1680 int ret;
1681 struct pci_dev *tmp, *parent;
1682
276dbf99 1683 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
4ed0d3e6
FY
1684 pdev->bus->number, pdev->devfn,
1685 translation);
ba395927
KA
1686 if (ret)
1687 return ret;
1688
1689 /* dependent device mapping */
1690 tmp = pci_find_upstream_pcie_bridge(pdev);
1691 if (!tmp)
1692 return 0;
1693 /* Secondary interface's bus number and devfn 0 */
1694 parent = pdev->bus->self;
1695 while (parent != tmp) {
276dbf99
DW
1696 ret = domain_context_mapping_one(domain,
1697 pci_domain_nr(parent->bus),
1698 parent->bus->number,
4ed0d3e6 1699 parent->devfn, translation);
ba395927
KA
1700 if (ret)
1701 return ret;
1702 parent = parent->bus->self;
1703 }
45e829ea 1704 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
ba395927 1705 return domain_context_mapping_one(domain,
276dbf99 1706 pci_domain_nr(tmp->subordinate),
4ed0d3e6
FY
1707 tmp->subordinate->number, 0,
1708 translation);
ba395927
KA
1709 else /* this is a legacy PCI bridge */
1710 return domain_context_mapping_one(domain,
276dbf99
DW
1711 pci_domain_nr(tmp->bus),
1712 tmp->bus->number,
4ed0d3e6
FY
1713 tmp->devfn,
1714 translation);
ba395927
KA
1715}
1716
5331fe6f 1717static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1718{
1719 int ret;
1720 struct pci_dev *tmp, *parent;
5331fe6f
WH
1721 struct intel_iommu *iommu;
1722
276dbf99
DW
1723 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1724 pdev->devfn);
5331fe6f
WH
1725 if (!iommu)
1726 return -ENODEV;
ba395927 1727
276dbf99 1728 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
ba395927
KA
1729 if (!ret)
1730 return ret;
1731 /* dependent device mapping */
1732 tmp = pci_find_upstream_pcie_bridge(pdev);
1733 if (!tmp)
1734 return ret;
1735 /* Secondary interface's bus number and devfn 0 */
1736 parent = pdev->bus->self;
1737 while (parent != tmp) {
8c11e798 1738 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1739 parent->devfn);
ba395927
KA
1740 if (!ret)
1741 return ret;
1742 parent = parent->bus->self;
1743 }
5f4d91a1 1744 if (pci_is_pcie(tmp))
276dbf99
DW
1745 return device_context_mapped(iommu, tmp->subordinate->number,
1746 0);
ba395927 1747 else
276dbf99
DW
1748 return device_context_mapped(iommu, tmp->bus->number,
1749 tmp->devfn);
ba395927
KA
1750}
1751
f532959b
FY
1752/* Returns a number of VTD pages, but aligned to MM page size */
1753static inline unsigned long aligned_nrpages(unsigned long host_addr,
1754 size_t size)
1755{
1756 host_addr &= ~PAGE_MASK;
1757 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1758}
1759
6dd9a7c7
YS
1760/* Return largest possible superpage level for a given mapping */
1761static inline int hardware_largepage_caps(struct dmar_domain *domain,
1762 unsigned long iov_pfn,
1763 unsigned long phy_pfn,
1764 unsigned long pages)
1765{
1766 int support, level = 1;
1767 unsigned long pfnmerge;
1768
1769 support = domain->iommu_superpage;
1770
1771 /* To use a large page, the virtual *and* physical addresses
1772 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1773 of them will mean we have to use smaller pages. So just
1774 merge them and check both at once. */
1775 pfnmerge = iov_pfn | phy_pfn;
1776
1777 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1778 pages >>= VTD_STRIDE_SHIFT;
1779 if (!pages)
1780 break;
1781 pfnmerge >>= VTD_STRIDE_SHIFT;
1782 level++;
1783 support--;
1784 }
1785 return level;
1786}
1787
9051aa02
DW
1788static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1789 struct scatterlist *sg, unsigned long phys_pfn,
1790 unsigned long nr_pages, int prot)
e1605495
DW
1791{
1792 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1793 phys_addr_t uninitialized_var(pteval);
e1605495 1794 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1795 unsigned long sg_res;
6dd9a7c7
YS
1796 unsigned int largepage_lvl = 0;
1797 unsigned long lvl_pages = 0;
e1605495
DW
1798
1799 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1800
1801 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1802 return -EINVAL;
1803
1804 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1805
9051aa02
DW
1806 if (sg)
1807 sg_res = 0;
1808 else {
1809 sg_res = nr_pages + 1;
1810 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1811 }
1812
6dd9a7c7 1813 while (nr_pages > 0) {
c85994e4
DW
1814 uint64_t tmp;
1815
e1605495 1816 if (!sg_res) {
f532959b 1817 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1818 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1819 sg->dma_length = sg->length;
1820 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 1821 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 1822 }
6dd9a7c7 1823
e1605495 1824 if (!pte) {
6dd9a7c7
YS
1825 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1826
1827 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
e1605495
DW
1828 if (!pte)
1829 return -ENOMEM;
6dd9a7c7 1830 /* It is large page*/
6491d4d0 1831 if (largepage_lvl > 1) {
6dd9a7c7 1832 pteval |= DMA_PTE_LARGE_PAGE;
6491d4d0
WD
1833 /* Ensure that old small page tables are removed to make room
1834 for superpage, if they exist. */
1835 dma_pte_clear_range(domain, iov_pfn,
1836 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1837 dma_pte_free_pagetable(domain, iov_pfn,
1838 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1839 } else {
6dd9a7c7 1840 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 1841 }
6dd9a7c7 1842
e1605495
DW
1843 }
1844 /* We don't need lock here, nobody else
1845 * touches the iova range
1846 */
7766a3fb 1847 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 1848 if (tmp) {
1bf20f0d 1849 static int dumps = 5;
c85994e4
DW
1850 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1851 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
1852 if (dumps) {
1853 dumps--;
1854 debug_dma_dump_mappings(NULL);
1855 }
1856 WARN_ON(1);
1857 }
6dd9a7c7
YS
1858
1859 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1860
1861 BUG_ON(nr_pages < lvl_pages);
1862 BUG_ON(sg_res < lvl_pages);
1863
1864 nr_pages -= lvl_pages;
1865 iov_pfn += lvl_pages;
1866 phys_pfn += lvl_pages;
1867 pteval += lvl_pages * VTD_PAGE_SIZE;
1868 sg_res -= lvl_pages;
1869
1870 /* If the next PTE would be the first in a new page, then we
1871 need to flush the cache on the entries we've just written.
1872 And then we'll need to recalculate 'pte', so clear it and
1873 let it get set again in the if (!pte) block above.
1874
1875 If we're done (!nr_pages) we need to flush the cache too.
1876
1877 Also if we've been setting superpages, we may need to
1878 recalculate 'pte' and switch back to smaller pages for the
1879 end of the mapping, if the trailing size is not enough to
1880 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 1881 pte++;
6dd9a7c7
YS
1882 if (!nr_pages || first_pte_in_page(pte) ||
1883 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
1884 domain_flush_cache(domain, first_pte,
1885 (void *)pte - (void *)first_pte);
1886 pte = NULL;
1887 }
6dd9a7c7
YS
1888
1889 if (!sg_res && nr_pages)
e1605495
DW
1890 sg = sg_next(sg);
1891 }
1892 return 0;
1893}
1894
9051aa02
DW
1895static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1896 struct scatterlist *sg, unsigned long nr_pages,
1897 int prot)
ba395927 1898{
9051aa02
DW
1899 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1900}
6f6a00e4 1901
9051aa02
DW
1902static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1903 unsigned long phys_pfn, unsigned long nr_pages,
1904 int prot)
1905{
1906 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
1907}
1908
c7151a8d 1909static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1910{
c7151a8d
WH
1911 if (!iommu)
1912 return;
8c11e798
WH
1913
1914 clear_context_table(iommu, bus, devfn);
1915 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 1916 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 1917 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
1918}
1919
109b9b04
DW
1920static inline void unlink_domain_info(struct device_domain_info *info)
1921{
1922 assert_spin_locked(&device_domain_lock);
1923 list_del(&info->link);
1924 list_del(&info->global);
1925 if (info->dev)
1926 info->dev->dev.archdata.iommu = NULL;
1927}
1928
ba395927
KA
1929static void domain_remove_dev_info(struct dmar_domain *domain)
1930{
1931 struct device_domain_info *info;
92d03cc8 1932 unsigned long flags, flags2;
c7151a8d 1933 struct intel_iommu *iommu;
ba395927
KA
1934
1935 spin_lock_irqsave(&device_domain_lock, flags);
1936 while (!list_empty(&domain->devices)) {
1937 info = list_entry(domain->devices.next,
1938 struct device_domain_info, link);
109b9b04 1939 unlink_domain_info(info);
ba395927
KA
1940 spin_unlock_irqrestore(&device_domain_lock, flags);
1941
93a23a72 1942 iommu_disable_dev_iotlb(info);
276dbf99 1943 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 1944 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927 1945
92d03cc8
JL
1946 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
1947 iommu_detach_dependent_devices(iommu, info->dev);
1948 /* clear this iommu in iommu_bmp, update iommu count
1949 * and capabilities
1950 */
1951 spin_lock_irqsave(&domain->iommu_lock, flags2);
1952 if (test_and_clear_bit(iommu->seq_id,
1953 domain->iommu_bmp)) {
1954 domain->iommu_count--;
1955 domain_update_iommu_cap(domain);
1956 }
1957 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
1958 }
1959
1960 free_devinfo_mem(info);
ba395927
KA
1961 spin_lock_irqsave(&device_domain_lock, flags);
1962 }
1963 spin_unlock_irqrestore(&device_domain_lock, flags);
1964}
1965
1966/*
1967 * find_domain
358dd8ac 1968 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1969 */
38717946 1970static struct dmar_domain *
ba395927
KA
1971find_domain(struct pci_dev *pdev)
1972{
1973 struct device_domain_info *info;
1974
1975 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1976 info = pdev->dev.archdata.iommu;
ba395927
KA
1977 if (info)
1978 return info->domain;
1979 return NULL;
1980}
1981
745f2586
JL
1982static inline struct dmar_domain *
1983dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
1984{
1985 struct device_domain_info *info;
1986
1987 list_for_each_entry(info, &device_domain_list, global)
1988 if (info->segment == segment && info->bus == bus &&
1989 info->devfn == devfn)
1990 return info->domain;
1991
1992 return NULL;
1993}
1994
1995static int dmar_insert_dev_info(int segment, int bus, int devfn,
1996 struct pci_dev *dev, struct dmar_domain **domp)
1997{
1998 struct dmar_domain *found, *domain = *domp;
1999 struct device_domain_info *info;
2000 unsigned long flags;
2001
2002 info = alloc_devinfo_mem();
2003 if (!info)
2004 return -ENOMEM;
2005
2006 info->segment = segment;
2007 info->bus = bus;
2008 info->devfn = devfn;
2009 info->dev = dev;
2010 info->domain = domain;
2011 if (!dev)
2012 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2013
2014 spin_lock_irqsave(&device_domain_lock, flags);
2015 if (dev)
2016 found = find_domain(dev);
2017 else
2018 found = dmar_search_domain_by_dev_info(segment, bus, devfn);
2019 if (found) {
2020 spin_unlock_irqrestore(&device_domain_lock, flags);
2021 free_devinfo_mem(info);
2022 if (found != domain) {
2023 domain_exit(domain);
2024 *domp = found;
2025 }
2026 } else {
2027 list_add(&info->link, &domain->devices);
2028 list_add(&info->global, &device_domain_list);
2029 if (dev)
2030 dev->dev.archdata.iommu = info;
2031 spin_unlock_irqrestore(&device_domain_lock, flags);
2032 }
2033
2034 return 0;
2035}
2036
ba395927
KA
2037/* domain is initialized */
2038static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
2039{
e85bb5d4 2040 struct dmar_domain *domain, *free = NULL;
ba395927
KA
2041 struct intel_iommu *iommu;
2042 struct dmar_drhd_unit *drhd;
ba395927
KA
2043 struct pci_dev *dev_tmp;
2044 unsigned long flags;
2045 int bus = 0, devfn = 0;
276dbf99 2046 int segment;
ba395927
KA
2047
2048 domain = find_domain(pdev);
2049 if (domain)
2050 return domain;
2051
276dbf99
DW
2052 segment = pci_domain_nr(pdev->bus);
2053
ba395927
KA
2054 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
2055 if (dev_tmp) {
5f4d91a1 2056 if (pci_is_pcie(dev_tmp)) {
ba395927
KA
2057 bus = dev_tmp->subordinate->number;
2058 devfn = 0;
2059 } else {
2060 bus = dev_tmp->bus->number;
2061 devfn = dev_tmp->devfn;
2062 }
2063 spin_lock_irqsave(&device_domain_lock, flags);
745f2586 2064 domain = dmar_search_domain_by_dev_info(segment, bus, devfn);
ba395927
KA
2065 spin_unlock_irqrestore(&device_domain_lock, flags);
2066 /* pcie-pci bridge already has a domain, uses it */
745f2586 2067 if (domain)
ba395927 2068 goto found_domain;
ba395927
KA
2069 }
2070
ba395927
KA
2071 drhd = dmar_find_matched_drhd_unit(pdev);
2072 if (!drhd) {
2073 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
2074 pci_name(pdev));
2075 return NULL;
2076 }
2077 iommu = drhd->iommu;
2078
745f2586 2079 /* Allocate and intialize new domain for the device */
92d03cc8 2080 domain = alloc_domain(false);
745f2586
JL
2081 if (!domain)
2082 goto error;
2083 if (iommu_attach_domain(domain, iommu)) {
2fe9723d 2084 free_domain_mem(domain);
ba395927 2085 goto error;
2c2e2c38 2086 }
e85bb5d4
JL
2087 free = domain;
2088 if (domain_init(domain, gaw))
ba395927 2089 goto error;
ba395927
KA
2090
2091 /* register pcie-to-pci device */
2092 if (dev_tmp) {
e85bb5d4 2093 if (dmar_insert_dev_info(segment, bus, devfn, NULL, &domain))
ba395927 2094 goto error;
e85bb5d4
JL
2095 else
2096 free = NULL;
ba395927
KA
2097 }
2098
2099found_domain:
745f2586
JL
2100 if (dmar_insert_dev_info(segment, pdev->bus->number, pdev->devfn,
2101 pdev, &domain) == 0)
ba395927 2102 return domain;
ba395927 2103error:
e85bb5d4
JL
2104 if (free)
2105 domain_exit(free);
ba395927
KA
2106 /* recheck it here, maybe others set it */
2107 return find_domain(pdev);
2108}
2109
2c2e2c38 2110static int iommu_identity_mapping;
e0fc7e0b
DW
2111#define IDENTMAP_ALL 1
2112#define IDENTMAP_GFX 2
2113#define IDENTMAP_AZALIA 4
2c2e2c38 2114
b213203e
DW
2115static int iommu_domain_identity_map(struct dmar_domain *domain,
2116 unsigned long long start,
2117 unsigned long long end)
ba395927 2118{
c5395d5c
DW
2119 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2120 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2121
2122 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2123 dma_to_mm_pfn(last_vpfn))) {
ba395927 2124 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2125 return -ENOMEM;
ba395927
KA
2126 }
2127
c5395d5c
DW
2128 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2129 start, end, domain->id);
ba395927
KA
2130 /*
2131 * RMRR range might have overlap with physical memory range,
2132 * clear it first
2133 */
c5395d5c 2134 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2135
c5395d5c
DW
2136 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2137 last_vpfn - first_vpfn + 1,
61df7443 2138 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2139}
2140
2141static int iommu_prepare_identity_map(struct pci_dev *pdev,
2142 unsigned long long start,
2143 unsigned long long end)
2144{
2145 struct dmar_domain *domain;
2146 int ret;
2147
c7ab48d2 2148 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2149 if (!domain)
2150 return -ENOMEM;
2151
19943b0e
DW
2152 /* For _hardware_ passthrough, don't bother. But for software
2153 passthrough, we do it anyway -- it may indicate a memory
2154 range which is reserved in E820, so which didn't get set
2155 up to start with in si_domain */
2156 if (domain == si_domain && hw_pass_through) {
2157 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2158 pci_name(pdev), start, end);
2159 return 0;
2160 }
2161
2162 printk(KERN_INFO
2163 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2164 pci_name(pdev), start, end);
2ff729f5 2165
5595b528
DW
2166 if (end < start) {
2167 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2168 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2169 dmi_get_system_info(DMI_BIOS_VENDOR),
2170 dmi_get_system_info(DMI_BIOS_VERSION),
2171 dmi_get_system_info(DMI_PRODUCT_VERSION));
2172 ret = -EIO;
2173 goto error;
2174 }
2175
2ff729f5
DW
2176 if (end >> agaw_to_width(domain->agaw)) {
2177 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2178 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2179 agaw_to_width(domain->agaw),
2180 dmi_get_system_info(DMI_BIOS_VENDOR),
2181 dmi_get_system_info(DMI_BIOS_VERSION),
2182 dmi_get_system_info(DMI_PRODUCT_VERSION));
2183 ret = -EIO;
2184 goto error;
2185 }
19943b0e 2186
b213203e 2187 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2188 if (ret)
2189 goto error;
2190
2191 /* context entry init */
4ed0d3e6 2192 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2193 if (ret)
2194 goto error;
2195
2196 return 0;
2197
2198 error:
ba395927
KA
2199 domain_exit(domain);
2200 return ret;
ba395927
KA
2201}
2202
2203static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2204 struct pci_dev *pdev)
2205{
358dd8ac 2206 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2207 return 0;
2208 return iommu_prepare_identity_map(pdev, rmrr->base_address,
70e535d1 2209 rmrr->end_address);
ba395927
KA
2210}
2211
d3f13810 2212#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2213static inline void iommu_prepare_isa(void)
2214{
2215 struct pci_dev *pdev;
2216 int ret;
2217
2218 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2219 if (!pdev)
2220 return;
2221
c7ab48d2 2222 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
70e535d1 2223 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
49a0429e
KA
2224
2225 if (ret)
c7ab48d2
DW
2226 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2227 "floppy might not work\n");
49a0429e
KA
2228
2229}
2230#else
2231static inline void iommu_prepare_isa(void)
2232{
2233 return;
2234}
d3f13810 2235#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2236
2c2e2c38 2237static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2238
071e1374 2239static int __init si_domain_init(int hw)
2c2e2c38
FY
2240{
2241 struct dmar_drhd_unit *drhd;
2242 struct intel_iommu *iommu;
c7ab48d2 2243 int nid, ret = 0;
2c2e2c38 2244
92d03cc8 2245 si_domain = alloc_domain(false);
2c2e2c38
FY
2246 if (!si_domain)
2247 return -EFAULT;
2248
92d03cc8
JL
2249 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2250
2c2e2c38
FY
2251 for_each_active_iommu(iommu, drhd) {
2252 ret = iommu_attach_domain(si_domain, iommu);
2253 if (ret) {
2254 domain_exit(si_domain);
2255 return -EFAULT;
2256 }
2257 }
2258
2259 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2260 domain_exit(si_domain);
2261 return -EFAULT;
2262 }
2263
9544c003
JL
2264 pr_debug("IOMMU: identity mapping domain is domain %d\n",
2265 si_domain->id);
2c2e2c38 2266
19943b0e
DW
2267 if (hw)
2268 return 0;
2269
c7ab48d2 2270 for_each_online_node(nid) {
5dfe8660
TH
2271 unsigned long start_pfn, end_pfn;
2272 int i;
2273
2274 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2275 ret = iommu_domain_identity_map(si_domain,
2276 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2277 if (ret)
2278 return ret;
2279 }
c7ab48d2
DW
2280 }
2281
2c2e2c38
FY
2282 return 0;
2283}
2284
2c2e2c38
FY
2285static int identity_mapping(struct pci_dev *pdev)
2286{
2287 struct device_domain_info *info;
2288
2289 if (likely(!iommu_identity_mapping))
2290 return 0;
2291
cb452a40
MT
2292 info = pdev->dev.archdata.iommu;
2293 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2294 return (info->domain == si_domain);
2c2e2c38 2295
2c2e2c38
FY
2296 return 0;
2297}
2298
2299static int domain_add_dev_info(struct dmar_domain *domain,
5fe60f4e
DW
2300 struct pci_dev *pdev,
2301 int translation)
2c2e2c38
FY
2302{
2303 struct device_domain_info *info;
2304 unsigned long flags;
5fe60f4e 2305 int ret;
2c2e2c38
FY
2306
2307 info = alloc_devinfo_mem();
2308 if (!info)
2309 return -ENOMEM;
2310
2311 info->segment = pci_domain_nr(pdev->bus);
2312 info->bus = pdev->bus->number;
2313 info->devfn = pdev->devfn;
2314 info->dev = pdev;
2315 info->domain = domain;
2316
2317 spin_lock_irqsave(&device_domain_lock, flags);
2318 list_add(&info->link, &domain->devices);
2319 list_add(&info->global, &device_domain_list);
2320 pdev->dev.archdata.iommu = info;
2321 spin_unlock_irqrestore(&device_domain_lock, flags);
2322
e2ad23d0
DW
2323 ret = domain_context_mapping(domain, pdev, translation);
2324 if (ret) {
2325 spin_lock_irqsave(&device_domain_lock, flags);
109b9b04 2326 unlink_domain_info(info);
e2ad23d0
DW
2327 spin_unlock_irqrestore(&device_domain_lock, flags);
2328 free_devinfo_mem(info);
2329 return ret;
2330 }
2331
2c2e2c38
FY
2332 return 0;
2333}
2334
ea2447f7
TM
2335static bool device_has_rmrr(struct pci_dev *dev)
2336{
2337 struct dmar_rmrr_unit *rmrr;
b683b230 2338 struct pci_dev *tmp;
ea2447f7
TM
2339 int i;
2340
2341 for_each_rmrr_units(rmrr) {
b683b230
JL
2342 /*
2343 * Return TRUE if this RMRR contains the device that
2344 * is passed in.
2345 */
2346 for_each_active_dev_scope(rmrr->devices,
2347 rmrr->devices_cnt, i, tmp)
2348 if (tmp == dev) {
ea2447f7 2349 return true;
b683b230 2350 }
ea2447f7
TM
2351 }
2352 return false;
2353}
2354
6941af28
DW
2355static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2356{
ea2447f7
TM
2357
2358 /*
2359 * We want to prevent any device associated with an RMRR from
2360 * getting placed into the SI Domain. This is done because
2361 * problems exist when devices are moved in and out of domains
2362 * and their respective RMRR info is lost. We exempt USB devices
2363 * from this process due to their usage of RMRRs that are known
2364 * to not be needed after BIOS hand-off to OS.
2365 */
2366 if (device_has_rmrr(pdev) &&
2367 (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2368 return 0;
2369
e0fc7e0b
DW
2370 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2371 return 1;
2372
2373 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2374 return 1;
2375
2376 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2377 return 0;
6941af28 2378
3dfc813d
DW
2379 /*
2380 * We want to start off with all devices in the 1:1 domain, and
2381 * take them out later if we find they can't access all of memory.
2382 *
2383 * However, we can't do this for PCI devices behind bridges,
2384 * because all PCI devices behind the same bridge will end up
2385 * with the same source-id on their transactions.
2386 *
2387 * Practically speaking, we can't change things around for these
2388 * devices at run-time, because we can't be sure there'll be no
2389 * DMA transactions in flight for any of their siblings.
2390 *
2391 * So PCI devices (unless they're on the root bus) as well as
2392 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2393 * the 1:1 domain, just in _case_ one of their siblings turns out
2394 * not to be able to map all of memory.
2395 */
5f4d91a1 2396 if (!pci_is_pcie(pdev)) {
3dfc813d
DW
2397 if (!pci_is_root_bus(pdev->bus))
2398 return 0;
2399 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2400 return 0;
62f87c0e 2401 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d
DW
2402 return 0;
2403
2404 /*
2405 * At boot time, we don't yet know if devices will be 64-bit capable.
2406 * Assume that they will -- if they turn out not to be, then we can
2407 * take them out of the 1:1 domain later.
2408 */
8fcc5372
CW
2409 if (!startup) {
2410 /*
2411 * If the device's dma_mask is less than the system's memory
2412 * size then this is not a candidate for identity mapping.
2413 */
2414 u64 dma_mask = pdev->dma_mask;
2415
2416 if (pdev->dev.coherent_dma_mask &&
2417 pdev->dev.coherent_dma_mask < dma_mask)
2418 dma_mask = pdev->dev.coherent_dma_mask;
2419
2420 return dma_mask >= dma_get_required_mask(&pdev->dev);
2421 }
6941af28
DW
2422
2423 return 1;
2424}
2425
071e1374 2426static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2427{
2c2e2c38
FY
2428 struct pci_dev *pdev = NULL;
2429 int ret;
2430
19943b0e 2431 ret = si_domain_init(hw);
2c2e2c38
FY
2432 if (ret)
2433 return -EFAULT;
2434
2c2e2c38 2435 for_each_pci_dev(pdev) {
6941af28 2436 if (iommu_should_identity_map(pdev, 1)) {
5fe60f4e 2437 ret = domain_add_dev_info(si_domain, pdev,
eae460b6
MT
2438 hw ? CONTEXT_TT_PASS_THROUGH :
2439 CONTEXT_TT_MULTI_LEVEL);
2440 if (ret) {
2441 /* device not associated with an iommu */
2442 if (ret == -ENODEV)
2443 continue;
62edf5dc 2444 return ret;
eae460b6
MT
2445 }
2446 pr_info("IOMMU: %s identity mapping for device %s\n",
2447 hw ? "hardware" : "software", pci_name(pdev));
62edf5dc 2448 }
2c2e2c38
FY
2449 }
2450
2451 return 0;
2452}
2453
b779260b 2454static int __init init_dmars(void)
ba395927
KA
2455{
2456 struct dmar_drhd_unit *drhd;
2457 struct dmar_rmrr_unit *rmrr;
2458 struct pci_dev *pdev;
2459 struct intel_iommu *iommu;
9d783ba0 2460 int i, ret;
2c2e2c38 2461
ba395927
KA
2462 /*
2463 * for each drhd
2464 * allocate root
2465 * initialize and program root entry to not present
2466 * endfor
2467 */
2468 for_each_drhd_unit(drhd) {
5e0d2a6f 2469 /*
2470 * lock not needed as this is only incremented in the single
2471 * threaded kernel __init code path all other access are read
2472 * only
2473 */
1b198bb0
MT
2474 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2475 g_num_of_iommus++;
2476 continue;
2477 }
2478 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2479 IOMMU_UNITS_SUPPORTED);
5e0d2a6f 2480 }
2481
d9630fe9
WH
2482 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2483 GFP_KERNEL);
2484 if (!g_iommus) {
2485 printk(KERN_ERR "Allocating global iommu array failed\n");
2486 ret = -ENOMEM;
2487 goto error;
2488 }
2489
80b20dd8 2490 deferred_flush = kzalloc(g_num_of_iommus *
2491 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2492 if (!deferred_flush) {
5e0d2a6f 2493 ret = -ENOMEM;
989d51fc 2494 goto free_g_iommus;
5e0d2a6f 2495 }
2496
7c919779 2497 for_each_active_iommu(iommu, drhd) {
d9630fe9 2498 g_iommus[iommu->seq_id] = iommu;
ba395927 2499
e61d98d8
SS
2500 ret = iommu_init_domains(iommu);
2501 if (ret)
989d51fc 2502 goto free_iommu;
e61d98d8 2503
ba395927
KA
2504 /*
2505 * TBD:
2506 * we could share the same root & context tables
25985edc 2507 * among all IOMMU's. Need to Split it later.
ba395927
KA
2508 */
2509 ret = iommu_alloc_root_entry(iommu);
2510 if (ret) {
2511 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
989d51fc 2512 goto free_iommu;
ba395927 2513 }
4ed0d3e6 2514 if (!ecap_pass_through(iommu->ecap))
19943b0e 2515 hw_pass_through = 0;
ba395927
KA
2516 }
2517
1531a6a6
SS
2518 /*
2519 * Start from the sane iommu hardware state.
2520 */
7c919779 2521 for_each_active_iommu(iommu, drhd) {
1531a6a6
SS
2522 /*
2523 * If the queued invalidation is already initialized by us
2524 * (for example, while enabling interrupt-remapping) then
2525 * we got the things already rolling from a sane state.
2526 */
2527 if (iommu->qi)
2528 continue;
2529
2530 /*
2531 * Clear any previous faults.
2532 */
2533 dmar_fault(-1, iommu);
2534 /*
2535 * Disable queued invalidation if supported and already enabled
2536 * before OS handover.
2537 */
2538 dmar_disable_qi(iommu);
2539 }
2540
7c919779 2541 for_each_active_iommu(iommu, drhd) {
a77b67d4
YS
2542 if (dmar_enable_qi(iommu)) {
2543 /*
2544 * Queued Invalidate not enabled, use Register Based
2545 * Invalidate
2546 */
2547 iommu->flush.flush_context = __iommu_flush_context;
2548 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2549 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2550 "invalidation\n",
680a7524 2551 iommu->seq_id,
b4e0f9eb 2552 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2553 } else {
2554 iommu->flush.flush_context = qi_flush_context;
2555 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2556 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2557 "invalidation\n",
680a7524 2558 iommu->seq_id,
b4e0f9eb 2559 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2560 }
2561 }
2562
19943b0e 2563 if (iommu_pass_through)
e0fc7e0b
DW
2564 iommu_identity_mapping |= IDENTMAP_ALL;
2565
d3f13810 2566#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 2567 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2568#endif
e0fc7e0b
DW
2569
2570 check_tylersburg_isoch();
2571
ba395927 2572 /*
19943b0e
DW
2573 * If pass through is not set or not enabled, setup context entries for
2574 * identity mappings for rmrr, gfx, and isa and may fall back to static
2575 * identity mapping if iommu_identity_mapping is set.
ba395927 2576 */
19943b0e
DW
2577 if (iommu_identity_mapping) {
2578 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2579 if (ret) {
19943b0e 2580 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
989d51fc 2581 goto free_iommu;
ba395927
KA
2582 }
2583 }
ba395927 2584 /*
19943b0e
DW
2585 * For each rmrr
2586 * for each dev attached to rmrr
2587 * do
2588 * locate drhd for dev, alloc domain for dev
2589 * allocate free domain
2590 * allocate page table entries for rmrr
2591 * if context not allocated for bus
2592 * allocate and init context
2593 * set present in root table for this bus
2594 * init context with domain, translation etc
2595 * endfor
2596 * endfor
ba395927 2597 */
19943b0e
DW
2598 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2599 for_each_rmrr_units(rmrr) {
b683b230
JL
2600 /* some BIOS lists non-exist devices in DMAR table. */
2601 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2602 i, pdev) {
19943b0e
DW
2603 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2604 if (ret)
2605 printk(KERN_ERR
2606 "IOMMU: mapping reserved region failed\n");
ba395927 2607 }
4ed0d3e6 2608 }
49a0429e 2609
19943b0e
DW
2610 iommu_prepare_isa();
2611
ba395927
KA
2612 /*
2613 * for each drhd
2614 * enable fault log
2615 * global invalidate context cache
2616 * global invalidate iotlb
2617 * enable translation
2618 */
7c919779 2619 for_each_iommu(iommu, drhd) {
51a63e67
JC
2620 if (drhd->ignored) {
2621 /*
2622 * we always have to disable PMRs or DMA may fail on
2623 * this device
2624 */
2625 if (force_on)
7c919779 2626 iommu_disable_protect_mem_regions(iommu);
ba395927 2627 continue;
51a63e67 2628 }
ba395927
KA
2629
2630 iommu_flush_write_buffer(iommu);
2631
3460a6d9
KA
2632 ret = dmar_set_interrupt(iommu);
2633 if (ret)
989d51fc 2634 goto free_iommu;
3460a6d9 2635
ba395927
KA
2636 iommu_set_root_entry(iommu);
2637
4c25a2c1 2638 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2639 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2640
ba395927
KA
2641 ret = iommu_enable_translation(iommu);
2642 if (ret)
989d51fc 2643 goto free_iommu;
b94996c9
DW
2644
2645 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2646 }
2647
2648 return 0;
989d51fc
JL
2649
2650free_iommu:
7c919779 2651 for_each_active_iommu(iommu, drhd)
a868e6b7 2652 free_dmar_iommu(iommu);
9bdc531e 2653 kfree(deferred_flush);
989d51fc 2654free_g_iommus:
d9630fe9 2655 kfree(g_iommus);
989d51fc 2656error:
ba395927
KA
2657 return ret;
2658}
2659
5a5e02a6 2660/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2661static struct iova *intel_alloc_iova(struct device *dev,
2662 struct dmar_domain *domain,
2663 unsigned long nrpages, uint64_t dma_mask)
ba395927 2664{
ba395927 2665 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2666 struct iova *iova = NULL;
ba395927 2667
875764de
DW
2668 /* Restrict dma_mask to the width that the iommu can handle */
2669 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2670
2671 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2672 /*
2673 * First try to allocate an io virtual address in
284901a9 2674 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2675 * from higher range
ba395927 2676 */
875764de
DW
2677 iova = alloc_iova(&domain->iovad, nrpages,
2678 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2679 if (iova)
2680 return iova;
2681 }
2682 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2683 if (unlikely(!iova)) {
2684 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2685 nrpages, pci_name(pdev));
f76aec76
KA
2686 return NULL;
2687 }
2688
2689 return iova;
2690}
2691
147202aa 2692static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
f76aec76
KA
2693{
2694 struct dmar_domain *domain;
2695 int ret;
2696
2697 domain = get_domain_for_dev(pdev,
2698 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2699 if (!domain) {
2700 printk(KERN_ERR
2701 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2702 return NULL;
ba395927
KA
2703 }
2704
2705 /* make sure context mapping is ok */
5331fe6f 2706 if (unlikely(!domain_context_mapped(pdev))) {
4ed0d3e6
FY
2707 ret = domain_context_mapping(domain, pdev,
2708 CONTEXT_TT_MULTI_LEVEL);
f76aec76
KA
2709 if (ret) {
2710 printk(KERN_ERR
2711 "Domain context map for %s failed",
2712 pci_name(pdev));
4fe05bbc 2713 return NULL;
f76aec76 2714 }
ba395927
KA
2715 }
2716
f76aec76
KA
2717 return domain;
2718}
2719
147202aa
DW
2720static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2721{
2722 struct device_domain_info *info;
2723
2724 /* No lock here, assumes no domain exit in normal case */
2725 info = dev->dev.archdata.iommu;
2726 if (likely(info))
2727 return info->domain;
2728
2729 return __get_valid_domain_for_dev(dev);
2730}
2731
2c2e2c38
FY
2732static int iommu_dummy(struct pci_dev *pdev)
2733{
2734 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2735}
2736
2737/* Check if the pdev needs to go through non-identity map and unmap process.*/
73676832 2738static int iommu_no_mapping(struct device *dev)
2c2e2c38 2739{
73676832 2740 struct pci_dev *pdev;
2c2e2c38
FY
2741 int found;
2742
dbad0864 2743 if (unlikely(!dev_is_pci(dev)))
73676832
DW
2744 return 1;
2745
2746 pdev = to_pci_dev(dev);
1e4c64c4
DW
2747 if (iommu_dummy(pdev))
2748 return 1;
2749
2c2e2c38 2750 if (!iommu_identity_mapping)
1e4c64c4 2751 return 0;
2c2e2c38
FY
2752
2753 found = identity_mapping(pdev);
2754 if (found) {
6941af28 2755 if (iommu_should_identity_map(pdev, 0))
2c2e2c38
FY
2756 return 1;
2757 else {
2758 /*
2759 * 32 bit DMA is removed from si_domain and fall back
2760 * to non-identity mapping.
2761 */
2762 domain_remove_one_dev_info(si_domain, pdev);
2763 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2764 pci_name(pdev));
2765 return 0;
2766 }
2767 } else {
2768 /*
2769 * In case of a detached 64 bit DMA device from vm, the device
2770 * is put into si_domain for identity mapping.
2771 */
6941af28 2772 if (iommu_should_identity_map(pdev, 0)) {
2c2e2c38 2773 int ret;
5fe60f4e
DW
2774 ret = domain_add_dev_info(si_domain, pdev,
2775 hw_pass_through ?
2776 CONTEXT_TT_PASS_THROUGH :
2777 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2778 if (!ret) {
2779 printk(KERN_INFO "64bit %s uses identity mapping\n",
2780 pci_name(pdev));
2781 return 1;
2782 }
2783 }
2784 }
2785
1e4c64c4 2786 return 0;
2c2e2c38
FY
2787}
2788
bb9e6d65
FT
2789static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2790 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2791{
2792 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2793 struct dmar_domain *domain;
5b6985ce 2794 phys_addr_t start_paddr;
f76aec76
KA
2795 struct iova *iova;
2796 int prot = 0;
6865f0d1 2797 int ret;
8c11e798 2798 struct intel_iommu *iommu;
33041ec0 2799 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
2800
2801 BUG_ON(dir == DMA_NONE);
2c2e2c38 2802
73676832 2803 if (iommu_no_mapping(hwdev))
6865f0d1 2804 return paddr;
f76aec76
KA
2805
2806 domain = get_valid_domain_for_dev(pdev);
2807 if (!domain)
2808 return 0;
2809
8c11e798 2810 iommu = domain_get_iommu(domain);
88cb6a74 2811 size = aligned_nrpages(paddr, size);
f76aec76 2812
c681d0ba 2813 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
2814 if (!iova)
2815 goto error;
2816
ba395927
KA
2817 /*
2818 * Check if DMAR supports zero-length reads on write only
2819 * mappings..
2820 */
2821 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2822 !cap_zlr(iommu->cap))
ba395927
KA
2823 prot |= DMA_PTE_READ;
2824 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2825 prot |= DMA_PTE_WRITE;
2826 /*
6865f0d1 2827 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2828 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2829 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2830 * is not a big problem
2831 */
0ab36de2 2832 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 2833 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
2834 if (ret)
2835 goto error;
2836
1f0ef2aa
DW
2837 /* it's a non-present to present mapping. Only flush if caching mode */
2838 if (cap_caching_mode(iommu->cap))
82653633 2839 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
1f0ef2aa 2840 else
8c11e798 2841 iommu_flush_write_buffer(iommu);
f76aec76 2842
03d6a246
DW
2843 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2844 start_paddr += paddr & ~PAGE_MASK;
2845 return start_paddr;
ba395927 2846
ba395927 2847error:
f76aec76
KA
2848 if (iova)
2849 __free_iova(&domain->iovad, iova);
4cf2e75d 2850 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5b6985ce 2851 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2852 return 0;
2853}
2854
ffbbef5c
FT
2855static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2856 unsigned long offset, size_t size,
2857 enum dma_data_direction dir,
2858 struct dma_attrs *attrs)
bb9e6d65 2859{
ffbbef5c
FT
2860 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2861 dir, to_pci_dev(dev)->dma_mask);
bb9e6d65
FT
2862}
2863
5e0d2a6f 2864static void flush_unmaps(void)
2865{
80b20dd8 2866 int i, j;
5e0d2a6f 2867
5e0d2a6f 2868 timer_on = 0;
2869
2870 /* just flush them all */
2871 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2872 struct intel_iommu *iommu = g_iommus[i];
2873 if (!iommu)
2874 continue;
c42d9f32 2875
9dd2fe89
YZ
2876 if (!deferred_flush[i].next)
2877 continue;
2878
78d5f0f5
NA
2879 /* In caching mode, global flushes turn emulation expensive */
2880 if (!cap_caching_mode(iommu->cap))
2881 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 2882 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 2883 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
2884 unsigned long mask;
2885 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
2886 struct dmar_domain *domain = deferred_flush[i].domain[j];
2887
2888 /* On real hardware multiple invalidations are expensive */
2889 if (cap_caching_mode(iommu->cap))
2890 iommu_flush_iotlb_psi(iommu, domain->id,
2891 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2892 else {
2893 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2894 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2895 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2896 }
93a23a72 2897 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
80b20dd8 2898 }
9dd2fe89 2899 deferred_flush[i].next = 0;
5e0d2a6f 2900 }
2901
5e0d2a6f 2902 list_size = 0;
5e0d2a6f 2903}
2904
2905static void flush_unmaps_timeout(unsigned long data)
2906{
80b20dd8 2907 unsigned long flags;
2908
2909 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2910 flush_unmaps();
80b20dd8 2911 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2912}
2913
2914static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2915{
2916 unsigned long flags;
80b20dd8 2917 int next, iommu_id;
8c11e798 2918 struct intel_iommu *iommu;
5e0d2a6f 2919
2920 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2921 if (list_size == HIGH_WATER_MARK)
2922 flush_unmaps();
2923
8c11e798
WH
2924 iommu = domain_get_iommu(dom);
2925 iommu_id = iommu->seq_id;
c42d9f32 2926
80b20dd8 2927 next = deferred_flush[iommu_id].next;
2928 deferred_flush[iommu_id].domain[next] = dom;
2929 deferred_flush[iommu_id].iova[next] = iova;
2930 deferred_flush[iommu_id].next++;
5e0d2a6f 2931
2932 if (!timer_on) {
2933 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2934 timer_on = 1;
2935 }
2936 list_size++;
2937 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2938}
2939
ffbbef5c
FT
2940static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2941 size_t size, enum dma_data_direction dir,
2942 struct dma_attrs *attrs)
ba395927 2943{
ba395927 2944 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76 2945 struct dmar_domain *domain;
d794dc9b 2946 unsigned long start_pfn, last_pfn;
ba395927 2947 struct iova *iova;
8c11e798 2948 struct intel_iommu *iommu;
ba395927 2949
73676832 2950 if (iommu_no_mapping(dev))
f76aec76 2951 return;
2c2e2c38 2952
ba395927
KA
2953 domain = find_domain(pdev);
2954 BUG_ON(!domain);
2955
8c11e798
WH
2956 iommu = domain_get_iommu(domain);
2957
ba395927 2958 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
2959 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2960 (unsigned long long)dev_addr))
ba395927 2961 return;
ba395927 2962
d794dc9b
DW
2963 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2964 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 2965
d794dc9b
DW
2966 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2967 pci_name(pdev), start_pfn, last_pfn);
ba395927 2968
f76aec76 2969 /* clear the whole page */
d794dc9b
DW
2970 dma_pte_clear_range(domain, start_pfn, last_pfn);
2971
f76aec76 2972 /* free page tables */
d794dc9b
DW
2973 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2974
5e0d2a6f 2975 if (intel_iommu_strict) {
03d6a246 2976 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2977 last_pfn - start_pfn + 1, 0);
5e0d2a6f 2978 /* free iova */
2979 __free_iova(&domain->iovad, iova);
2980 } else {
2981 add_unmap(domain, iova);
2982 /*
2983 * queue up the release of the unmap to save the 1/6th of the
2984 * cpu used up by the iotlb flush operation...
2985 */
5e0d2a6f 2986 }
ba395927
KA
2987}
2988
d7ab5c46 2989static void *intel_alloc_coherent(struct device *hwdev, size_t size,
baa676fc
AP
2990 dma_addr_t *dma_handle, gfp_t flags,
2991 struct dma_attrs *attrs)
ba395927
KA
2992{
2993 void *vaddr;
2994 int order;
2995
5b6985ce 2996 size = PAGE_ALIGN(size);
ba395927 2997 order = get_order(size);
e8bb910d
AW
2998
2999 if (!iommu_no_mapping(hwdev))
3000 flags &= ~(GFP_DMA | GFP_DMA32);
3001 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
3002 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
3003 flags |= GFP_DMA;
3004 else
3005 flags |= GFP_DMA32;
3006 }
ba395927
KA
3007
3008 vaddr = (void *)__get_free_pages(flags, order);
3009 if (!vaddr)
3010 return NULL;
3011 memset(vaddr, 0, size);
3012
bb9e6d65
FT
3013 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
3014 DMA_BIDIRECTIONAL,
3015 hwdev->coherent_dma_mask);
ba395927
KA
3016 if (*dma_handle)
3017 return vaddr;
3018 free_pages((unsigned long)vaddr, order);
3019 return NULL;
3020}
3021
d7ab5c46 3022static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
baa676fc 3023 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3024{
3025 int order;
3026
5b6985ce 3027 size = PAGE_ALIGN(size);
ba395927
KA
3028 order = get_order(size);
3029
0db9b7ae 3030 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
ba395927
KA
3031 free_pages((unsigned long)vaddr, order);
3032}
3033
d7ab5c46
FT
3034static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
3035 int nelems, enum dma_data_direction dir,
3036 struct dma_attrs *attrs)
ba395927 3037{
ba395927
KA
3038 struct pci_dev *pdev = to_pci_dev(hwdev);
3039 struct dmar_domain *domain;
d794dc9b 3040 unsigned long start_pfn, last_pfn;
f76aec76 3041 struct iova *iova;
8c11e798 3042 struct intel_iommu *iommu;
ba395927 3043
73676832 3044 if (iommu_no_mapping(hwdev))
ba395927
KA
3045 return;
3046
3047 domain = find_domain(pdev);
8c11e798
WH
3048 BUG_ON(!domain);
3049
3050 iommu = domain_get_iommu(domain);
ba395927 3051
c03ab37c 3052 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
3053 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3054 (unsigned long long)sglist[0].dma_address))
f76aec76 3055 return;
f76aec76 3056
d794dc9b
DW
3057 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3058 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76
KA
3059
3060 /* clear the whole page */
d794dc9b
DW
3061 dma_pte_clear_range(domain, start_pfn, last_pfn);
3062
f76aec76 3063 /* free page tables */
d794dc9b 3064 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
f76aec76 3065
acea0018
DW
3066 if (intel_iommu_strict) {
3067 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 3068 last_pfn - start_pfn + 1, 0);
acea0018
DW
3069 /* free iova */
3070 __free_iova(&domain->iovad, iova);
3071 } else {
3072 add_unmap(domain, iova);
3073 /*
3074 * queue up the release of the unmap to save the 1/6th of the
3075 * cpu used up by the iotlb flush operation...
3076 */
3077 }
ba395927
KA
3078}
3079
ba395927 3080static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3081 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3082{
3083 int i;
c03ab37c 3084 struct scatterlist *sg;
ba395927 3085
c03ab37c 3086 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3087 BUG_ON(!sg_page(sg));
4cf2e75d 3088 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3089 sg->dma_length = sg->length;
ba395927
KA
3090 }
3091 return nelems;
3092}
3093
d7ab5c46
FT
3094static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3095 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3096{
ba395927 3097 int i;
ba395927
KA
3098 struct pci_dev *pdev = to_pci_dev(hwdev);
3099 struct dmar_domain *domain;
f76aec76
KA
3100 size_t size = 0;
3101 int prot = 0;
f76aec76
KA
3102 struct iova *iova = NULL;
3103 int ret;
c03ab37c 3104 struct scatterlist *sg;
b536d24d 3105 unsigned long start_vpfn;
8c11e798 3106 struct intel_iommu *iommu;
ba395927
KA
3107
3108 BUG_ON(dir == DMA_NONE);
73676832 3109 if (iommu_no_mapping(hwdev))
c03ab37c 3110 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 3111
f76aec76
KA
3112 domain = get_valid_domain_for_dev(pdev);
3113 if (!domain)
3114 return 0;
3115
8c11e798
WH
3116 iommu = domain_get_iommu(domain);
3117
b536d24d 3118 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3119 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3120
5a5e02a6
DW
3121 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3122 pdev->dma_mask);
f76aec76 3123 if (!iova) {
c03ab37c 3124 sglist->dma_length = 0;
f76aec76
KA
3125 return 0;
3126 }
3127
3128 /*
3129 * Check if DMAR supports zero-length reads on write only
3130 * mappings..
3131 */
3132 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3133 !cap_zlr(iommu->cap))
f76aec76
KA
3134 prot |= DMA_PTE_READ;
3135 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3136 prot |= DMA_PTE_WRITE;
3137
b536d24d 3138 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3139
f532959b 3140 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
3141 if (unlikely(ret)) {
3142 /* clear the page */
3143 dma_pte_clear_range(domain, start_vpfn,
3144 start_vpfn + size - 1);
3145 /* free page tables */
3146 dma_pte_free_pagetable(domain, start_vpfn,
3147 start_vpfn + size - 1);
3148 /* free iova */
3149 __free_iova(&domain->iovad, iova);
3150 return 0;
ba395927
KA
3151 }
3152
1f0ef2aa
DW
3153 /* it's a non-present to present mapping. Only flush if caching mode */
3154 if (cap_caching_mode(iommu->cap))
82653633 3155 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
1f0ef2aa 3156 else
8c11e798 3157 iommu_flush_write_buffer(iommu);
1f0ef2aa 3158
ba395927
KA
3159 return nelems;
3160}
3161
dfb805e8
FT
3162static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3163{
3164 return !dma_addr;
3165}
3166
160c1d8e 3167struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3168 .alloc = intel_alloc_coherent,
3169 .free = intel_free_coherent,
ba395927
KA
3170 .map_sg = intel_map_sg,
3171 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3172 .map_page = intel_map_page,
3173 .unmap_page = intel_unmap_page,
dfb805e8 3174 .mapping_error = intel_mapping_error,
ba395927
KA
3175};
3176
3177static inline int iommu_domain_cache_init(void)
3178{
3179 int ret = 0;
3180
3181 iommu_domain_cache = kmem_cache_create("iommu_domain",
3182 sizeof(struct dmar_domain),
3183 0,
3184 SLAB_HWCACHE_ALIGN,
3185
3186 NULL);
3187 if (!iommu_domain_cache) {
3188 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3189 ret = -ENOMEM;
3190 }
3191
3192 return ret;
3193}
3194
3195static inline int iommu_devinfo_cache_init(void)
3196{
3197 int ret = 0;
3198
3199 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3200 sizeof(struct device_domain_info),
3201 0,
3202 SLAB_HWCACHE_ALIGN,
ba395927
KA
3203 NULL);
3204 if (!iommu_devinfo_cache) {
3205 printk(KERN_ERR "Couldn't create devinfo cache\n");
3206 ret = -ENOMEM;
3207 }
3208
3209 return ret;
3210}
3211
3212static inline int iommu_iova_cache_init(void)
3213{
3214 int ret = 0;
3215
3216 iommu_iova_cache = kmem_cache_create("iommu_iova",
3217 sizeof(struct iova),
3218 0,
3219 SLAB_HWCACHE_ALIGN,
ba395927
KA
3220 NULL);
3221 if (!iommu_iova_cache) {
3222 printk(KERN_ERR "Couldn't create iova cache\n");
3223 ret = -ENOMEM;
3224 }
3225
3226 return ret;
3227}
3228
3229static int __init iommu_init_mempool(void)
3230{
3231 int ret;
3232 ret = iommu_iova_cache_init();
3233 if (ret)
3234 return ret;
3235
3236 ret = iommu_domain_cache_init();
3237 if (ret)
3238 goto domain_error;
3239
3240 ret = iommu_devinfo_cache_init();
3241 if (!ret)
3242 return ret;
3243
3244 kmem_cache_destroy(iommu_domain_cache);
3245domain_error:
3246 kmem_cache_destroy(iommu_iova_cache);
3247
3248 return -ENOMEM;
3249}
3250
3251static void __init iommu_exit_mempool(void)
3252{
3253 kmem_cache_destroy(iommu_devinfo_cache);
3254 kmem_cache_destroy(iommu_domain_cache);
3255 kmem_cache_destroy(iommu_iova_cache);
3256
3257}
3258
556ab45f
DW
3259static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3260{
3261 struct dmar_drhd_unit *drhd;
3262 u32 vtbar;
3263 int rc;
3264
3265 /* We know that this device on this chipset has its own IOMMU.
3266 * If we find it under a different IOMMU, then the BIOS is lying
3267 * to us. Hope that the IOMMU for this device is actually
3268 * disabled, and it needs no translation...
3269 */
3270 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3271 if (rc) {
3272 /* "can't" happen */
3273 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3274 return;
3275 }
3276 vtbar &= 0xffff0000;
3277
3278 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3279 drhd = dmar_find_matched_drhd_unit(pdev);
3280 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3281 TAINT_FIRMWARE_WORKAROUND,
3282 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3283 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3284}
3285DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3286
ba395927
KA
3287static void __init init_no_remapping_devices(void)
3288{
3289 struct dmar_drhd_unit *drhd;
b683b230
JL
3290 struct pci_dev *dev;
3291 int i;
ba395927
KA
3292
3293 for_each_drhd_unit(drhd) {
3294 if (!drhd->include_all) {
b683b230
JL
3295 for_each_active_dev_scope(drhd->devices,
3296 drhd->devices_cnt, i, dev)
3297 break;
ba395927
KA
3298 /* ignore DMAR unit if no pci devices exist */
3299 if (i == drhd->devices_cnt)
3300 drhd->ignored = 1;
3301 }
3302 }
3303
7c919779 3304 for_each_active_drhd_unit(drhd) {
7c919779 3305 if (drhd->include_all)
ba395927
KA
3306 continue;
3307
b683b230
JL
3308 for_each_active_dev_scope(drhd->devices,
3309 drhd->devices_cnt, i, dev)
3310 if (!IS_GFX_DEVICE(dev))
ba395927 3311 break;
ba395927
KA
3312 if (i < drhd->devices_cnt)
3313 continue;
3314
c0771df8
DW
3315 /* This IOMMU has *only* gfx devices. Either bypass it or
3316 set the gfx_mapped flag, as appropriate */
3317 if (dmar_map_gfx) {
3318 intel_iommu_gfx_mapped = 1;
3319 } else {
3320 drhd->ignored = 1;
b683b230
JL
3321 for_each_active_dev_scope(drhd->devices,
3322 drhd->devices_cnt, i, dev)
3323 dev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3324 }
3325 }
3326}
3327
f59c7b69
FY
3328#ifdef CONFIG_SUSPEND
3329static int init_iommu_hw(void)
3330{
3331 struct dmar_drhd_unit *drhd;
3332 struct intel_iommu *iommu = NULL;
3333
3334 for_each_active_iommu(iommu, drhd)
3335 if (iommu->qi)
3336 dmar_reenable_qi(iommu);
3337
b779260b
JC
3338 for_each_iommu(iommu, drhd) {
3339 if (drhd->ignored) {
3340 /*
3341 * we always have to disable PMRs or DMA may fail on
3342 * this device
3343 */
3344 if (force_on)
3345 iommu_disable_protect_mem_regions(iommu);
3346 continue;
3347 }
3348
f59c7b69
FY
3349 iommu_flush_write_buffer(iommu);
3350
3351 iommu_set_root_entry(iommu);
3352
3353 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3354 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3355 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3356 DMA_TLB_GLOBAL_FLUSH);
b779260b
JC
3357 if (iommu_enable_translation(iommu))
3358 return 1;
b94996c9 3359 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3360 }
3361
3362 return 0;
3363}
3364
3365static void iommu_flush_all(void)
3366{
3367 struct dmar_drhd_unit *drhd;
3368 struct intel_iommu *iommu;
3369
3370 for_each_active_iommu(iommu, drhd) {
3371 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3372 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3373 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3374 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3375 }
3376}
3377
134fac3f 3378static int iommu_suspend(void)
f59c7b69
FY
3379{
3380 struct dmar_drhd_unit *drhd;
3381 struct intel_iommu *iommu = NULL;
3382 unsigned long flag;
3383
3384 for_each_active_iommu(iommu, drhd) {
3385 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3386 GFP_ATOMIC);
3387 if (!iommu->iommu_state)
3388 goto nomem;
3389 }
3390
3391 iommu_flush_all();
3392
3393 for_each_active_iommu(iommu, drhd) {
3394 iommu_disable_translation(iommu);
3395
1f5b3c3f 3396 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3397
3398 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3399 readl(iommu->reg + DMAR_FECTL_REG);
3400 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3401 readl(iommu->reg + DMAR_FEDATA_REG);
3402 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3403 readl(iommu->reg + DMAR_FEADDR_REG);
3404 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3405 readl(iommu->reg + DMAR_FEUADDR_REG);
3406
1f5b3c3f 3407 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3408 }
3409 return 0;
3410
3411nomem:
3412 for_each_active_iommu(iommu, drhd)
3413 kfree(iommu->iommu_state);
3414
3415 return -ENOMEM;
3416}
3417
134fac3f 3418static void iommu_resume(void)
f59c7b69
FY
3419{
3420 struct dmar_drhd_unit *drhd;
3421 struct intel_iommu *iommu = NULL;
3422 unsigned long flag;
3423
3424 if (init_iommu_hw()) {
b779260b
JC
3425 if (force_on)
3426 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3427 else
3428 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3429 return;
f59c7b69
FY
3430 }
3431
3432 for_each_active_iommu(iommu, drhd) {
3433
1f5b3c3f 3434 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3435
3436 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3437 iommu->reg + DMAR_FECTL_REG);
3438 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3439 iommu->reg + DMAR_FEDATA_REG);
3440 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3441 iommu->reg + DMAR_FEADDR_REG);
3442 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3443 iommu->reg + DMAR_FEUADDR_REG);
3444
1f5b3c3f 3445 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3446 }
3447
3448 for_each_active_iommu(iommu, drhd)
3449 kfree(iommu->iommu_state);
f59c7b69
FY
3450}
3451
134fac3f 3452static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3453 .resume = iommu_resume,
3454 .suspend = iommu_suspend,
3455};
3456
134fac3f 3457static void __init init_iommu_pm_ops(void)
f59c7b69 3458{
134fac3f 3459 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3460}
3461
3462#else
99592ba4 3463static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3464#endif /* CONFIG_PM */
3465
318fe7df
SS
3466static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3467{
3468 list_add(&rmrr->list, &dmar_rmrr_units);
3469}
3470
3471
3472int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3473{
3474 struct acpi_dmar_reserved_memory *rmrr;
3475 struct dmar_rmrr_unit *rmrru;
3476
3477 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3478 if (!rmrru)
3479 return -ENOMEM;
3480
3481 rmrru->hdr = header;
3482 rmrr = (struct acpi_dmar_reserved_memory *)header;
3483 rmrru->base_address = rmrr->base_address;
3484 rmrru->end_address = rmrr->end_address;
3485
3486 dmar_register_rmrr_unit(rmrru);
3487 return 0;
3488}
3489
3490static int __init
3491rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3492{
3493 struct acpi_dmar_reserved_memory *rmrr;
318fe7df
SS
3494
3495 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
9bdc531e
JL
3496 return dmar_parse_dev_scope((void *)(rmrr + 1),
3497 ((void *)rmrr) + rmrr->header.length,
3498 &rmrru->devices_cnt, &rmrru->devices,
3499 rmrr->segment);
318fe7df
SS
3500}
3501
318fe7df
SS
3502int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3503{
3504 struct acpi_dmar_atsr *atsr;
3505 struct dmar_atsr_unit *atsru;
3506
3507 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3508 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3509 if (!atsru)
3510 return -ENOMEM;
3511
3512 atsru->hdr = hdr;
3513 atsru->include_all = atsr->flags & 0x1;
3514
3515 list_add(&atsru->list, &dmar_atsr_units);
3516
3517 return 0;
3518}
3519
3520static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3521{
318fe7df
SS
3522 struct acpi_dmar_atsr *atsr;
3523
3524 if (atsru->include_all)
3525 return 0;
3526
3527 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
9bdc531e
JL
3528 return dmar_parse_dev_scope((void *)(atsr + 1),
3529 (void *)atsr + atsr->header.length,
3530 &atsru->devices_cnt, &atsru->devices,
3531 atsr->segment);
3532}
3533
3534static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3535{
3536 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3537 kfree(atsru);
3538}
3539
3540static void intel_iommu_free_dmars(void)
3541{
3542 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3543 struct dmar_atsr_unit *atsru, *atsr_n;
3544
3545 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3546 list_del(&rmrru->list);
3547 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3548 kfree(rmrru);
318fe7df
SS
3549 }
3550
9bdc531e
JL
3551 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3552 list_del(&atsru->list);
3553 intel_iommu_free_atsr(atsru);
3554 }
318fe7df
SS
3555}
3556
3557int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3558{
b683b230 3559 int i, ret = 1;
318fe7df 3560 struct pci_bus *bus;
b683b230 3561 struct pci_dev *bridge = NULL, *tmp;
318fe7df
SS
3562 struct acpi_dmar_atsr *atsr;
3563 struct dmar_atsr_unit *atsru;
3564
3565 dev = pci_physfn(dev);
318fe7df 3566 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 3567 bridge = bus->self;
318fe7df 3568 if (!bridge || !pci_is_pcie(bridge) ||
62f87c0e 3569 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 3570 return 0;
b5f82ddf 3571 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 3572 break;
318fe7df 3573 }
b5f82ddf
JL
3574 if (!bridge)
3575 return 0;
318fe7df 3576
b5f82ddf
JL
3577 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3578 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3579 if (atsr->segment != pci_domain_nr(dev->bus))
3580 continue;
3581
b683b230
JL
3582 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3583 if (tmp == bridge)
3584 goto out;
b5f82ddf
JL
3585
3586 if (atsru->include_all)
b683b230 3587 goto out;
b5f82ddf 3588 }
b683b230
JL
3589 ret = 0;
3590out:
318fe7df 3591
b683b230 3592 return ret;
318fe7df
SS
3593}
3594
c8f369ab 3595int __init dmar_parse_rmrr_atsr_dev(void)
318fe7df 3596{
9bdc531e
JL
3597 struct dmar_rmrr_unit *rmrr;
3598 struct dmar_atsr_unit *atsr;
b683b230 3599 int ret;
318fe7df 3600
9bdc531e 3601 list_for_each_entry(rmrr, &dmar_rmrr_units, list) {
318fe7df
SS
3602 ret = rmrr_parse_dev(rmrr);
3603 if (ret)
3604 return ret;
3605 }
3606
9bdc531e 3607 list_for_each_entry(atsr, &dmar_atsr_units, list) {
318fe7df
SS
3608 ret = atsr_parse_dev(atsr);
3609 if (ret)
3610 return ret;
3611 }
3612
b683b230 3613 return 0;
318fe7df
SS
3614}
3615
99dcaded
FY
3616/*
3617 * Here we only respond to action of unbound device from driver.
3618 *
3619 * Added device is not attached to its DMAR domain here yet. That will happen
3620 * when mapping the device to iova.
3621 */
3622static int device_notifier(struct notifier_block *nb,
3623 unsigned long action, void *data)
3624{
3625 struct device *dev = data;
3626 struct pci_dev *pdev = to_pci_dev(dev);
3627 struct dmar_domain *domain;
3628
816997d0 3629 if (iommu_dummy(pdev))
44cd613c
DW
3630 return 0;
3631
7e7dfab7
JL
3632 if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3633 action != BUS_NOTIFY_DEL_DEVICE)
3634 return 0;
3635
99dcaded
FY
3636 domain = find_domain(pdev);
3637 if (!domain)
3638 return 0;
3639
3a5670e8 3640 down_read(&dmar_global_lock);
7e7dfab7
JL
3641 domain_remove_one_dev_info(domain, pdev);
3642 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3643 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3644 list_empty(&domain->devices))
3645 domain_exit(domain);
3a5670e8 3646 up_read(&dmar_global_lock);
a97590e5 3647
99dcaded
FY
3648 return 0;
3649}
3650
3651static struct notifier_block device_nb = {
3652 .notifier_call = device_notifier,
3653};
3654
ba395927
KA
3655int __init intel_iommu_init(void)
3656{
9bdc531e 3657 int ret = -ENODEV;
3a93c841 3658 struct dmar_drhd_unit *drhd;
7c919779 3659 struct intel_iommu *iommu;
ba395927 3660
a59b50e9
JC
3661 /* VT-d is required for a TXT/tboot launch, so enforce that */
3662 force_on = tboot_force_iommu();
3663
3a5670e8
JL
3664 if (iommu_init_mempool()) {
3665 if (force_on)
3666 panic("tboot: Failed to initialize iommu memory\n");
3667 return -ENOMEM;
3668 }
3669
3670 down_write(&dmar_global_lock);
a59b50e9
JC
3671 if (dmar_table_init()) {
3672 if (force_on)
3673 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 3674 goto out_free_dmar;
a59b50e9 3675 }
ba395927 3676
3a93c841
TI
3677 /*
3678 * Disable translation if already enabled prior to OS handover.
3679 */
7c919779 3680 for_each_active_iommu(iommu, drhd)
3a93c841
TI
3681 if (iommu->gcmd & DMA_GCMD_TE)
3682 iommu_disable_translation(iommu);
3a93c841 3683
c2c7286a 3684 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
3685 if (force_on)
3686 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 3687 goto out_free_dmar;
a59b50e9 3688 }
1886e8a9 3689
75f1cdf1 3690 if (no_iommu || dmar_disabled)
9bdc531e 3691 goto out_free_dmar;
2ae21010 3692
318fe7df
SS
3693 if (list_empty(&dmar_rmrr_units))
3694 printk(KERN_INFO "DMAR: No RMRR found\n");
3695
3696 if (list_empty(&dmar_atsr_units))
3697 printk(KERN_INFO "DMAR: No ATSR found\n");
3698
51a63e67
JC
3699 if (dmar_init_reserved_ranges()) {
3700 if (force_on)
3701 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 3702 goto out_free_reserved_range;
51a63e67 3703 }
ba395927
KA
3704
3705 init_no_remapping_devices();
3706
b779260b 3707 ret = init_dmars();
ba395927 3708 if (ret) {
a59b50e9
JC
3709 if (force_on)
3710 panic("tboot: Failed to initialize DMARs\n");
ba395927 3711 printk(KERN_ERR "IOMMU: dmar init failed\n");
9bdc531e 3712 goto out_free_reserved_range;
ba395927 3713 }
3a5670e8 3714 up_write(&dmar_global_lock);
ba395927
KA
3715 printk(KERN_INFO
3716 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3717
5e0d2a6f 3718 init_timer(&unmap_timer);
75f1cdf1
FT
3719#ifdef CONFIG_SWIOTLB
3720 swiotlb = 0;
3721#endif
19943b0e 3722 dma_ops = &intel_dma_ops;
4ed0d3e6 3723
134fac3f 3724 init_iommu_pm_ops();
a8bcbb0d 3725
4236d97d 3726 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
a8bcbb0d 3727
99dcaded
FY
3728 bus_register_notifier(&pci_bus_type, &device_nb);
3729
8bc1f85c
ED
3730 intel_iommu_enabled = 1;
3731
ba395927 3732 return 0;
9bdc531e
JL
3733
3734out_free_reserved_range:
3735 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
3736out_free_dmar:
3737 intel_iommu_free_dmars();
3a5670e8
JL
3738 up_write(&dmar_global_lock);
3739 iommu_exit_mempool();
9bdc531e 3740 return ret;
ba395927 3741}
e820482c 3742
3199aa6b
HW
3743static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3744 struct pci_dev *pdev)
3745{
3746 struct pci_dev *tmp, *parent;
3747
3748 if (!iommu || !pdev)
3749 return;
3750
3751 /* dependent device detach */
3752 tmp = pci_find_upstream_pcie_bridge(pdev);
3753 /* Secondary interface's bus number and devfn 0 */
3754 if (tmp) {
3755 parent = pdev->bus->self;
3756 while (parent != tmp) {
3757 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 3758 parent->devfn);
3199aa6b
HW
3759 parent = parent->bus->self;
3760 }
45e829ea 3761 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3199aa6b
HW
3762 iommu_detach_dev(iommu,
3763 tmp->subordinate->number, 0);
3764 else /* this is a legacy PCI bridge */
276dbf99
DW
3765 iommu_detach_dev(iommu, tmp->bus->number,
3766 tmp->devfn);
3199aa6b
HW
3767 }
3768}
3769
2c2e2c38 3770static void domain_remove_one_dev_info(struct dmar_domain *domain,
c7151a8d
WH
3771 struct pci_dev *pdev)
3772{
bca2b916 3773 struct device_domain_info *info, *tmp;
c7151a8d
WH
3774 struct intel_iommu *iommu;
3775 unsigned long flags;
3776 int found = 0;
c7151a8d 3777
276dbf99
DW
3778 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3779 pdev->devfn);
c7151a8d
WH
3780 if (!iommu)
3781 return;
3782
3783 spin_lock_irqsave(&device_domain_lock, flags);
bca2b916 3784 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
8519dc44
MH
3785 if (info->segment == pci_domain_nr(pdev->bus) &&
3786 info->bus == pdev->bus->number &&
c7151a8d 3787 info->devfn == pdev->devfn) {
109b9b04 3788 unlink_domain_info(info);
c7151a8d
WH
3789 spin_unlock_irqrestore(&device_domain_lock, flags);
3790
93a23a72 3791 iommu_disable_dev_iotlb(info);
c7151a8d 3792 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3793 iommu_detach_dependent_devices(iommu, pdev);
c7151a8d
WH
3794 free_devinfo_mem(info);
3795
3796 spin_lock_irqsave(&device_domain_lock, flags);
3797
3798 if (found)
3799 break;
3800 else
3801 continue;
3802 }
3803
3804 /* if there is no other devices under the same iommu
3805 * owned by this domain, clear this iommu in iommu_bmp
3806 * update iommu count and coherency
3807 */
276dbf99
DW
3808 if (iommu == device_to_iommu(info->segment, info->bus,
3809 info->devfn))
c7151a8d
WH
3810 found = 1;
3811 }
3812
3e7abe25
RD
3813 spin_unlock_irqrestore(&device_domain_lock, flags);
3814
c7151a8d
WH
3815 if (found == 0) {
3816 unsigned long tmp_flags;
3817 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
1b198bb0 3818 clear_bit(iommu->seq_id, domain->iommu_bmp);
c7151a8d 3819 domain->iommu_count--;
58c610bd 3820 domain_update_iommu_cap(domain);
c7151a8d 3821 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
a97590e5 3822
9b4554b2
AW
3823 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3824 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3825 spin_lock_irqsave(&iommu->lock, tmp_flags);
3826 clear_bit(domain->id, iommu->domain_ids);
3827 iommu->domains[domain->id] = NULL;
3828 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3829 }
c7151a8d 3830 }
c7151a8d
WH
3831}
3832
2c2e2c38 3833static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
3834{
3835 int adjust_width;
3836
3837 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
3838 domain_reserve_special_ranges(domain);
3839
3840 /* calculate AGAW */
3841 domain->gaw = guest_width;
3842 adjust_width = guestwidth_to_adjustwidth(guest_width);
3843 domain->agaw = width_to_agaw(adjust_width);
3844
5e98c4b1 3845 domain->iommu_coherency = 0;
c5b15255 3846 domain->iommu_snooping = 0;
6dd9a7c7 3847 domain->iommu_superpage = 0;
fe40f1e0 3848 domain->max_addr = 0;
4c923d47 3849 domain->nid = -1;
5e98c4b1
WH
3850
3851 /* always allocate the top pgd */
4c923d47 3852 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
3853 if (!domain->pgd)
3854 return -ENOMEM;
3855 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3856 return 0;
3857}
3858
5d450806 3859static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 3860{
5d450806 3861 struct dmar_domain *dmar_domain;
38717946 3862
92d03cc8 3863 dmar_domain = alloc_domain(true);
5d450806 3864 if (!dmar_domain) {
38717946 3865 printk(KERN_ERR
5d450806
JR
3866 "intel_iommu_domain_init: dmar_domain == NULL\n");
3867 return -ENOMEM;
38717946 3868 }
2c2e2c38 3869 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 3870 printk(KERN_ERR
5d450806 3871 "intel_iommu_domain_init() failed\n");
92d03cc8 3872 domain_exit(dmar_domain);
5d450806 3873 return -ENOMEM;
38717946 3874 }
8140a95d 3875 domain_update_iommu_cap(dmar_domain);
5d450806 3876 domain->priv = dmar_domain;
faa3d6f5 3877
8a0e715b
JR
3878 domain->geometry.aperture_start = 0;
3879 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
3880 domain->geometry.force_aperture = true;
3881
5d450806 3882 return 0;
38717946 3883}
38717946 3884
5d450806 3885static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3886{
5d450806
JR
3887 struct dmar_domain *dmar_domain = domain->priv;
3888
3889 domain->priv = NULL;
92d03cc8 3890 domain_exit(dmar_domain);
38717946 3891}
38717946 3892
4c5478c9
JR
3893static int intel_iommu_attach_device(struct iommu_domain *domain,
3894 struct device *dev)
38717946 3895{
4c5478c9
JR
3896 struct dmar_domain *dmar_domain = domain->priv;
3897 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3898 struct intel_iommu *iommu;
3899 int addr_width;
faa3d6f5
WH
3900
3901 /* normally pdev is not mapped */
3902 if (unlikely(domain_context_mapped(pdev))) {
3903 struct dmar_domain *old_domain;
3904
3905 old_domain = find_domain(pdev);
3906 if (old_domain) {
2c2e2c38
FY
3907 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3908 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3909 domain_remove_one_dev_info(old_domain, pdev);
faa3d6f5
WH
3910 else
3911 domain_remove_dev_info(old_domain);
3912 }
3913 }
3914
276dbf99
DW
3915 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3916 pdev->devfn);
fe40f1e0
WH
3917 if (!iommu)
3918 return -ENODEV;
3919
3920 /* check if this iommu agaw is sufficient for max mapped address */
3921 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
3922 if (addr_width > cap_mgaw(iommu->cap))
3923 addr_width = cap_mgaw(iommu->cap);
3924
3925 if (dmar_domain->max_addr > (1LL << addr_width)) {
3926 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 3927 "sufficient for the mapped address (%llx)\n",
a99c47a2 3928 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
3929 return -EFAULT;
3930 }
a99c47a2
TL
3931 dmar_domain->gaw = addr_width;
3932
3933 /*
3934 * Knock out extra levels of page tables if necessary
3935 */
3936 while (iommu->agaw < dmar_domain->agaw) {
3937 struct dma_pte *pte;
3938
3939 pte = dmar_domain->pgd;
3940 if (dma_pte_present(pte)) {
25cbff16
SY
3941 dmar_domain->pgd = (struct dma_pte *)
3942 phys_to_virt(dma_pte_addr(pte));
7a661013 3943 free_pgtable_page(pte);
a99c47a2
TL
3944 }
3945 dmar_domain->agaw--;
3946 }
fe40f1e0 3947
5fe60f4e 3948 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
38717946 3949}
38717946 3950
4c5478c9
JR
3951static void intel_iommu_detach_device(struct iommu_domain *domain,
3952 struct device *dev)
38717946 3953{
4c5478c9
JR
3954 struct dmar_domain *dmar_domain = domain->priv;
3955 struct pci_dev *pdev = to_pci_dev(dev);
3956
2c2e2c38 3957 domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 3958}
c7151a8d 3959
b146a1c9
JR
3960static int intel_iommu_map(struct iommu_domain *domain,
3961 unsigned long iova, phys_addr_t hpa,
5009065d 3962 size_t size, int iommu_prot)
faa3d6f5 3963{
dde57a21 3964 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 3965 u64 max_addr;
dde57a21 3966 int prot = 0;
faa3d6f5 3967 int ret;
fe40f1e0 3968
dde57a21
JR
3969 if (iommu_prot & IOMMU_READ)
3970 prot |= DMA_PTE_READ;
3971 if (iommu_prot & IOMMU_WRITE)
3972 prot |= DMA_PTE_WRITE;
9cf06697
SY
3973 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3974 prot |= DMA_PTE_SNP;
dde57a21 3975
163cc52c 3976 max_addr = iova + size;
dde57a21 3977 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
3978 u64 end;
3979
3980 /* check if minimum agaw is sufficient for mapped address */
8954da1f 3981 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 3982 if (end < max_addr) {
8954da1f 3983 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 3984 "sufficient for the mapped address (%llx)\n",
8954da1f 3985 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
3986 return -EFAULT;
3987 }
dde57a21 3988 dmar_domain->max_addr = max_addr;
fe40f1e0 3989 }
ad051221
DW
3990 /* Round up size to next multiple of PAGE_SIZE, if it and
3991 the low bits of hpa would take us onto the next page */
88cb6a74 3992 size = aligned_nrpages(hpa, size);
ad051221
DW
3993 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3994 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 3995 return ret;
38717946 3996}
38717946 3997
5009065d
OBC
3998static size_t intel_iommu_unmap(struct iommu_domain *domain,
3999 unsigned long iova, size_t size)
38717946 4000{
dde57a21 4001 struct dmar_domain *dmar_domain = domain->priv;
292827cb 4002 int order;
4b99d352 4003
292827cb 4004 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
163cc52c 4005 (iova + size - 1) >> VTD_PAGE_SHIFT);
fe40f1e0 4006
163cc52c
DW
4007 if (dmar_domain->max_addr == iova + size)
4008 dmar_domain->max_addr = iova;
b146a1c9 4009
5009065d 4010 return PAGE_SIZE << order;
38717946 4011}
38717946 4012
d14d6577 4013static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4014 dma_addr_t iova)
38717946 4015{
d14d6577 4016 struct dmar_domain *dmar_domain = domain->priv;
38717946 4017 struct dma_pte *pte;
faa3d6f5 4018 u64 phys = 0;
38717946 4019
6dd9a7c7 4020 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
38717946 4021 if (pte)
faa3d6f5 4022 phys = dma_pte_addr(pte);
38717946 4023
faa3d6f5 4024 return phys;
38717946 4025}
a8bcbb0d 4026
dbb9fd86
SY
4027static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4028 unsigned long cap)
4029{
4030 struct dmar_domain *dmar_domain = domain->priv;
4031
4032 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4033 return dmar_domain->iommu_snooping;
323f99cb 4034 if (cap == IOMMU_CAP_INTR_REMAP)
95a02e97 4035 return irq_remapping_enabled;
dbb9fd86
SY
4036
4037 return 0;
4038}
4039
783f157b 4040#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
70ae6f0d 4041
abdfdde2
AW
4042static int intel_iommu_add_device(struct device *dev)
4043{
4044 struct pci_dev *pdev = to_pci_dev(dev);
3da4af0a 4045 struct pci_dev *bridge, *dma_pdev = NULL;
abdfdde2
AW
4046 struct iommu_group *group;
4047 int ret;
70ae6f0d 4048
abdfdde2
AW
4049 if (!device_to_iommu(pci_domain_nr(pdev->bus),
4050 pdev->bus->number, pdev->devfn))
70ae6f0d
AW
4051 return -ENODEV;
4052
4053 bridge = pci_find_upstream_pcie_bridge(pdev);
4054 if (bridge) {
abdfdde2
AW
4055 if (pci_is_pcie(bridge))
4056 dma_pdev = pci_get_domain_bus_and_slot(
4057 pci_domain_nr(pdev->bus),
4058 bridge->subordinate->number, 0);
3da4af0a 4059 if (!dma_pdev)
abdfdde2
AW
4060 dma_pdev = pci_dev_get(bridge);
4061 } else
4062 dma_pdev = pci_dev_get(pdev);
4063
a4ff1fc2 4064 /* Account for quirked devices */
783f157b
AW
4065 swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4066
a4ff1fc2
AW
4067 /*
4068 * If it's a multifunction device that does not support our
c14d2690
AW
4069 * required ACS flags, add to the same group as lowest numbered
4070 * function that also does not suport the required ACS flags.
a4ff1fc2 4071 */
783f157b 4072 if (dma_pdev->multifunction &&
c14d2690
AW
4073 !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
4074 u8 i, slot = PCI_SLOT(dma_pdev->devfn);
4075
4076 for (i = 0; i < 8; i++) {
4077 struct pci_dev *tmp;
4078
4079 tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
4080 if (!tmp)
4081 continue;
4082
4083 if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
4084 swap_pci_ref(&dma_pdev, tmp);
4085 break;
4086 }
4087 pci_dev_put(tmp);
4088 }
4089 }
783f157b 4090
a4ff1fc2
AW
4091 /*
4092 * Devices on the root bus go through the iommu. If that's not us,
4093 * find the next upstream device and test ACS up to the root bus.
4094 * Finding the next device may require skipping virtual buses.
4095 */
783f157b 4096 while (!pci_is_root_bus(dma_pdev->bus)) {
a4ff1fc2
AW
4097 struct pci_bus *bus = dma_pdev->bus;
4098
4099 while (!bus->self) {
4100 if (!pci_is_root_bus(bus))
4101 bus = bus->parent;
4102 else
4103 goto root_bus;
4104 }
4105
4106 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
783f157b
AW
4107 break;
4108
a4ff1fc2 4109 swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
783f157b
AW
4110 }
4111
a4ff1fc2 4112root_bus:
abdfdde2
AW
4113 group = iommu_group_get(&dma_pdev->dev);
4114 pci_dev_put(dma_pdev);
4115 if (!group) {
4116 group = iommu_group_alloc();
4117 if (IS_ERR(group))
4118 return PTR_ERR(group);
70ae6f0d
AW
4119 }
4120
abdfdde2 4121 ret = iommu_group_add_device(group, dev);
bcb71abe 4122
abdfdde2
AW
4123 iommu_group_put(group);
4124 return ret;
4125}
70ae6f0d 4126
abdfdde2
AW
4127static void intel_iommu_remove_device(struct device *dev)
4128{
4129 iommu_group_remove_device(dev);
70ae6f0d
AW
4130}
4131
a8bcbb0d
JR
4132static struct iommu_ops intel_iommu_ops = {
4133 .domain_init = intel_iommu_domain_init,
4134 .domain_destroy = intel_iommu_domain_destroy,
4135 .attach_dev = intel_iommu_attach_device,
4136 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4137 .map = intel_iommu_map,
4138 .unmap = intel_iommu_unmap,
a8bcbb0d 4139 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 4140 .domain_has_cap = intel_iommu_domain_has_cap,
abdfdde2
AW
4141 .add_device = intel_iommu_add_device,
4142 .remove_device = intel_iommu_remove_device,
6d1c56a9 4143 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4144};
9af88143 4145
9452618e
DV
4146static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4147{
4148 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4149 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4150 dmar_map_gfx = 0;
4151}
4152
4153DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4154DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4155DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4156DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4157DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4158DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4159DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4160
d34d6517 4161static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4162{
4163 /*
4164 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4165 * but needs it. Same seems to hold for the desktop versions.
9af88143
DW
4166 */
4167 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4168 rwbf_quirk = 1;
4169}
4170
4171DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4172DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4173DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4174DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4175DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4176DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4177DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4178
eecfd57f
AJ
4179#define GGC 0x52
4180#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4181#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4182#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4183#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4184#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4185#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4186#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4187#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4188
d34d6517 4189static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4190{
4191 unsigned short ggc;
4192
eecfd57f 4193 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4194 return;
4195
eecfd57f 4196 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
4197 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4198 dmar_map_gfx = 0;
6fbcfb3e
DW
4199 } else if (dmar_map_gfx) {
4200 /* we have to ensure the gfx device is idle before we flush */
4201 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4202 intel_iommu_strict = 1;
4203 }
9eecabcb
DW
4204}
4205DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4206DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4207DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4208DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4209
e0fc7e0b
DW
4210/* On Tylersburg chipsets, some BIOSes have been known to enable the
4211 ISOCH DMAR unit for the Azalia sound device, but not give it any
4212 TLB entries, which causes it to deadlock. Check for that. We do
4213 this in a function called from init_dmars(), instead of in a PCI
4214 quirk, because we don't want to print the obnoxious "BIOS broken"
4215 message if VT-d is actually disabled.
4216*/
4217static void __init check_tylersburg_isoch(void)
4218{
4219 struct pci_dev *pdev;
4220 uint32_t vtisochctrl;
4221
4222 /* If there's no Azalia in the system anyway, forget it. */
4223 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4224 if (!pdev)
4225 return;
4226 pci_dev_put(pdev);
4227
4228 /* System Management Registers. Might be hidden, in which case
4229 we can't do the sanity check. But that's OK, because the
4230 known-broken BIOSes _don't_ actually hide it, so far. */
4231 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4232 if (!pdev)
4233 return;
4234
4235 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4236 pci_dev_put(pdev);
4237 return;
4238 }
4239
4240 pci_dev_put(pdev);
4241
4242 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4243 if (vtisochctrl & 1)
4244 return;
4245
4246 /* Drop all bits other than the number of TLB entries */
4247 vtisochctrl &= 0x1c;
4248
4249 /* If we have the recommended number of TLB entries (16), fine. */
4250 if (vtisochctrl == 0x10)
4251 return;
4252
4253 /* Zero TLB entries? You get to ride the short bus to school. */
4254 if (!vtisochctrl) {
4255 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4256 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4257 dmi_get_system_info(DMI_BIOS_VENDOR),
4258 dmi_get_system_info(DMI_BIOS_VERSION),
4259 dmi_get_system_info(DMI_PRODUCT_VERSION));
4260 iommu_identity_mapping |= IDENTMAP_AZALIA;
4261 return;
4262 }
4263
4264 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4265 vtisochctrl);
4266}