]> git.ipfire.org Git - people/arne_f/kernel.git/blame - drivers/pci/intel-iommu.c
iommu coherency
[people/arne_f/kernel.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946
KA
36#include <linux/iova.h>
37#include <linux/intel-iommu.h>
ba395927 38#include <asm/cacheflush.h>
46a7fa27 39#include <asm/iommu.h>
ba395927
KA
40#include "pci.h"
41
5b6985ce
FY
42#define ROOT_SIZE VTD_PAGE_SIZE
43#define CONTEXT_SIZE VTD_PAGE_SIZE
44
ba395927
KA
45#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48#define IOAPIC_RANGE_START (0xfee00000)
49#define IOAPIC_RANGE_END (0xfeefffff)
50#define IOVA_START_ADDR (0x1000)
51
52#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
ba395927
KA
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
f27be03b
MM
56#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
5e0d2a6f 59
d9630fe9
WH
60/* global iommu list, set NULL for ignored DMAR units */
61static struct intel_iommu **g_iommus;
62
46b08e1a
MM
63/*
64 * 0: Present
65 * 1-11: Reserved
66 * 12-63: Context Ptr (12 - (haw-1))
67 * 64-127: Reserved
68 */
69struct root_entry {
70 u64 val;
71 u64 rsvd1;
72};
73#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
74static inline bool root_present(struct root_entry *root)
75{
76 return (root->val & 1);
77}
78static inline void set_root_present(struct root_entry *root)
79{
80 root->val |= 1;
81}
82static inline void set_root_value(struct root_entry *root, unsigned long value)
83{
84 root->val |= value & VTD_PAGE_MASK;
85}
86
87static inline struct context_entry *
88get_context_addr_from_root(struct root_entry *root)
89{
90 return (struct context_entry *)
91 (root_present(root)?phys_to_virt(
92 root->val & VTD_PAGE_MASK) :
93 NULL);
94}
95
7a8fc25e
MM
96/*
97 * low 64 bits:
98 * 0: present
99 * 1: fault processing disable
100 * 2-3: translation type
101 * 12-63: address space root
102 * high 64 bits:
103 * 0-2: address width
104 * 3-6: aval
105 * 8-23: domain id
106 */
107struct context_entry {
108 u64 lo;
109 u64 hi;
110};
c07e7d21
MM
111
112static inline bool context_present(struct context_entry *context)
113{
114 return (context->lo & 1);
115}
116static inline void context_set_present(struct context_entry *context)
117{
118 context->lo |= 1;
119}
120
121static inline void context_set_fault_enable(struct context_entry *context)
122{
123 context->lo &= (((u64)-1) << 2) | 1;
124}
125
7a8fc25e 126#define CONTEXT_TT_MULTI_LEVEL 0
c07e7d21
MM
127
128static inline void context_set_translation_type(struct context_entry *context,
129 unsigned long value)
130{
131 context->lo &= (((u64)-1) << 4) | 3;
132 context->lo |= (value & 3) << 2;
133}
134
135static inline void context_set_address_root(struct context_entry *context,
136 unsigned long value)
137{
138 context->lo |= value & VTD_PAGE_MASK;
139}
140
141static inline void context_set_address_width(struct context_entry *context,
142 unsigned long value)
143{
144 context->hi |= value & 7;
145}
146
147static inline void context_set_domain_id(struct context_entry *context,
148 unsigned long value)
149{
150 context->hi |= (value & ((1 << 16) - 1)) << 8;
151}
152
153static inline void context_clear_entry(struct context_entry *context)
154{
155 context->lo = 0;
156 context->hi = 0;
157}
7a8fc25e 158
622ba12a
MM
159/*
160 * 0: readable
161 * 1: writable
162 * 2-6: reserved
163 * 7: super page
164 * 8-11: available
165 * 12-63: Host physcial address
166 */
167struct dma_pte {
168 u64 val;
169};
622ba12a 170
19c239ce
MM
171static inline void dma_clear_pte(struct dma_pte *pte)
172{
173 pte->val = 0;
174}
175
176static inline void dma_set_pte_readable(struct dma_pte *pte)
177{
178 pte->val |= DMA_PTE_READ;
179}
180
181static inline void dma_set_pte_writable(struct dma_pte *pte)
182{
183 pte->val |= DMA_PTE_WRITE;
184}
185
186static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
187{
188 pte->val = (pte->val & ~3) | (prot & 3);
189}
190
191static inline u64 dma_pte_addr(struct dma_pte *pte)
192{
193 return (pte->val & VTD_PAGE_MASK);
194}
195
196static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
197{
198 pte->val |= (addr & VTD_PAGE_MASK);
199}
200
201static inline bool dma_pte_present(struct dma_pte *pte)
202{
203 return (pte->val & 3) != 0;
204}
622ba12a 205
3b5410e7
WH
206/* devices under the same p2p bridge are owned in one domain */
207#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
208
99126f7c
MM
209struct dmar_domain {
210 int id; /* domain id */
8c11e798 211 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
212
213 struct list_head devices; /* all devices' list */
214 struct iova_domain iovad; /* iova's that belong to this domain */
215
216 struct dma_pte *pgd; /* virtual address */
217 spinlock_t mapping_lock; /* page table lock */
218 int gaw; /* max guest address width */
219
220 /* adjusted guest address width, 0 is level 2 30-bit */
221 int agaw;
222
3b5410e7 223 int flags; /* flags to find out type of domain */
8e604097
WH
224
225 int iommu_coherency;/* indicate coherency of iommu access */
99126f7c
MM
226};
227
a647dacb
MM
228/* PCI domain-device relationship */
229struct device_domain_info {
230 struct list_head link; /* link to domain siblings */
231 struct list_head global; /* link to global list */
232 u8 bus; /* PCI bus numer */
233 u8 devfn; /* PCI devfn number */
234 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
235 struct dmar_domain *domain; /* pointer to domain */
236};
237
5e0d2a6f 238static void flush_unmaps_timeout(unsigned long data);
239
240DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
241
80b20dd8 242#define HIGH_WATER_MARK 250
243struct deferred_flush_tables {
244 int next;
245 struct iova *iova[HIGH_WATER_MARK];
246 struct dmar_domain *domain[HIGH_WATER_MARK];
247};
248
249static struct deferred_flush_tables *deferred_flush;
250
5e0d2a6f 251/* bitmap for indexing intel_iommus */
5e0d2a6f 252static int g_num_of_iommus;
253
254static DEFINE_SPINLOCK(async_umap_flush_lock);
255static LIST_HEAD(unmaps_to_do);
256
257static int timer_on;
258static long list_size;
5e0d2a6f 259
ba395927
KA
260static void domain_remove_dev_info(struct dmar_domain *domain);
261
2ae21010 262int dmar_disabled;
ba395927 263static int __initdata dmar_map_gfx = 1;
7d3b03ce 264static int dmar_forcedac;
5e0d2a6f 265static int intel_iommu_strict;
ba395927
KA
266
267#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
268static DEFINE_SPINLOCK(device_domain_lock);
269static LIST_HEAD(device_domain_list);
270
271static int __init intel_iommu_setup(char *str)
272{
273 if (!str)
274 return -EINVAL;
275 while (*str) {
276 if (!strncmp(str, "off", 3)) {
277 dmar_disabled = 1;
278 printk(KERN_INFO"Intel-IOMMU: disabled\n");
279 } else if (!strncmp(str, "igfx_off", 8)) {
280 dmar_map_gfx = 0;
281 printk(KERN_INFO
282 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 283 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 284 printk(KERN_INFO
7d3b03ce
KA
285 "Intel-IOMMU: Forcing DAC for PCI devices\n");
286 dmar_forcedac = 1;
5e0d2a6f 287 } else if (!strncmp(str, "strict", 6)) {
288 printk(KERN_INFO
289 "Intel-IOMMU: disable batched IOTLB flush\n");
290 intel_iommu_strict = 1;
ba395927
KA
291 }
292
293 str += strcspn(str, ",");
294 while (*str == ',')
295 str++;
296 }
297 return 0;
298}
299__setup("intel_iommu=", intel_iommu_setup);
300
301static struct kmem_cache *iommu_domain_cache;
302static struct kmem_cache *iommu_devinfo_cache;
303static struct kmem_cache *iommu_iova_cache;
304
eb3fa7cb
KA
305static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
306{
307 unsigned int flags;
308 void *vaddr;
309
310 /* trying to avoid low memory issues */
311 flags = current->flags & PF_MEMALLOC;
312 current->flags |= PF_MEMALLOC;
313 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
314 current->flags &= (~PF_MEMALLOC | flags);
315 return vaddr;
316}
317
318
ba395927
KA
319static inline void *alloc_pgtable_page(void)
320{
eb3fa7cb
KA
321 unsigned int flags;
322 void *vaddr;
323
324 /* trying to avoid low memory issues */
325 flags = current->flags & PF_MEMALLOC;
326 current->flags |= PF_MEMALLOC;
327 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
328 current->flags &= (~PF_MEMALLOC | flags);
329 return vaddr;
ba395927
KA
330}
331
332static inline void free_pgtable_page(void *vaddr)
333{
334 free_page((unsigned long)vaddr);
335}
336
337static inline void *alloc_domain_mem(void)
338{
eb3fa7cb 339 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
340}
341
38717946 342static void free_domain_mem(void *vaddr)
ba395927
KA
343{
344 kmem_cache_free(iommu_domain_cache, vaddr);
345}
346
347static inline void * alloc_devinfo_mem(void)
348{
eb3fa7cb 349 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
350}
351
352static inline void free_devinfo_mem(void *vaddr)
353{
354 kmem_cache_free(iommu_devinfo_cache, vaddr);
355}
356
357struct iova *alloc_iova_mem(void)
358{
eb3fa7cb 359 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
360}
361
362void free_iova_mem(struct iova *iova)
363{
364 kmem_cache_free(iommu_iova_cache, iova);
365}
366
1b573683
WH
367
368static inline int width_to_agaw(int width);
369
370/* calculate agaw for each iommu.
371 * "SAGAW" may be different across iommus, use a default agaw, and
372 * get a supported less agaw for iommus that don't support the default agaw.
373 */
374int iommu_calculate_agaw(struct intel_iommu *iommu)
375{
376 unsigned long sagaw;
377 int agaw = -1;
378
379 sagaw = cap_sagaw(iommu->cap);
380 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
381 agaw >= 0; agaw--) {
382 if (test_bit(agaw, &sagaw))
383 break;
384 }
385
386 return agaw;
387}
388
8c11e798
WH
389/* in native case, each domain is related to only one iommu */
390static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
391{
392 int iommu_id;
393
394 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
395 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
396 return NULL;
397
398 return g_iommus[iommu_id];
399}
400
8e604097
WH
401/* "Coherency" capability may be different across iommus */
402static void domain_update_iommu_coherency(struct dmar_domain *domain)
403{
404 int i;
405
406 domain->iommu_coherency = 1;
407
408 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
409 for (; i < g_num_of_iommus; ) {
410 if (!ecap_coherent(g_iommus[i]->ecap)) {
411 domain->iommu_coherency = 0;
412 break;
413 }
414 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
415 }
416}
417
ba395927
KA
418/* Gets context entry for a given bus and devfn */
419static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
420 u8 bus, u8 devfn)
421{
422 struct root_entry *root;
423 struct context_entry *context;
424 unsigned long phy_addr;
425 unsigned long flags;
426
427 spin_lock_irqsave(&iommu->lock, flags);
428 root = &iommu->root_entry[bus];
429 context = get_context_addr_from_root(root);
430 if (!context) {
431 context = (struct context_entry *)alloc_pgtable_page();
432 if (!context) {
433 spin_unlock_irqrestore(&iommu->lock, flags);
434 return NULL;
435 }
5b6985ce 436 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
437 phy_addr = virt_to_phys((void *)context);
438 set_root_value(root, phy_addr);
439 set_root_present(root);
440 __iommu_flush_cache(iommu, root, sizeof(*root));
441 }
442 spin_unlock_irqrestore(&iommu->lock, flags);
443 return &context[devfn];
444}
445
446static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
447{
448 struct root_entry *root;
449 struct context_entry *context;
450 int ret;
451 unsigned long flags;
452
453 spin_lock_irqsave(&iommu->lock, flags);
454 root = &iommu->root_entry[bus];
455 context = get_context_addr_from_root(root);
456 if (!context) {
457 ret = 0;
458 goto out;
459 }
c07e7d21 460 ret = context_present(&context[devfn]);
ba395927
KA
461out:
462 spin_unlock_irqrestore(&iommu->lock, flags);
463 return ret;
464}
465
466static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
467{
468 struct root_entry *root;
469 struct context_entry *context;
470 unsigned long flags;
471
472 spin_lock_irqsave(&iommu->lock, flags);
473 root = &iommu->root_entry[bus];
474 context = get_context_addr_from_root(root);
475 if (context) {
c07e7d21 476 context_clear_entry(&context[devfn]);
ba395927
KA
477 __iommu_flush_cache(iommu, &context[devfn], \
478 sizeof(*context));
479 }
480 spin_unlock_irqrestore(&iommu->lock, flags);
481}
482
483static void free_context_table(struct intel_iommu *iommu)
484{
485 struct root_entry *root;
486 int i;
487 unsigned long flags;
488 struct context_entry *context;
489
490 spin_lock_irqsave(&iommu->lock, flags);
491 if (!iommu->root_entry) {
492 goto out;
493 }
494 for (i = 0; i < ROOT_ENTRY_NR; i++) {
495 root = &iommu->root_entry[i];
496 context = get_context_addr_from_root(root);
497 if (context)
498 free_pgtable_page(context);
499 }
500 free_pgtable_page(iommu->root_entry);
501 iommu->root_entry = NULL;
502out:
503 spin_unlock_irqrestore(&iommu->lock, flags);
504}
505
506/* page table handling */
507#define LEVEL_STRIDE (9)
508#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
509
510static inline int agaw_to_level(int agaw)
511{
512 return agaw + 2;
513}
514
515static inline int agaw_to_width(int agaw)
516{
517 return 30 + agaw * LEVEL_STRIDE;
518
519}
520
521static inline int width_to_agaw(int width)
522{
523 return (width - 30) / LEVEL_STRIDE;
524}
525
526static inline unsigned int level_to_offset_bits(int level)
527{
528 return (12 + (level - 1) * LEVEL_STRIDE);
529}
530
531static inline int address_level_offset(u64 addr, int level)
532{
533 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
534}
535
536static inline u64 level_mask(int level)
537{
538 return ((u64)-1 << level_to_offset_bits(level));
539}
540
541static inline u64 level_size(int level)
542{
543 return ((u64)1 << level_to_offset_bits(level));
544}
545
546static inline u64 align_to_level(u64 addr, int level)
547{
548 return ((addr + level_size(level) - 1) & level_mask(level));
549}
550
551static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
552{
553 int addr_width = agaw_to_width(domain->agaw);
554 struct dma_pte *parent, *pte = NULL;
555 int level = agaw_to_level(domain->agaw);
556 int offset;
557 unsigned long flags;
8c11e798 558 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
559
560 BUG_ON(!domain->pgd);
561
562 addr &= (((u64)1) << addr_width) - 1;
563 parent = domain->pgd;
564
565 spin_lock_irqsave(&domain->mapping_lock, flags);
566 while (level > 0) {
567 void *tmp_page;
568
569 offset = address_level_offset(addr, level);
570 pte = &parent[offset];
571 if (level == 1)
572 break;
573
19c239ce 574 if (!dma_pte_present(pte)) {
ba395927
KA
575 tmp_page = alloc_pgtable_page();
576
577 if (!tmp_page) {
578 spin_unlock_irqrestore(&domain->mapping_lock,
579 flags);
580 return NULL;
581 }
8c11e798 582 __iommu_flush_cache(iommu, tmp_page,
5b6985ce 583 PAGE_SIZE);
19c239ce 584 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
ba395927
KA
585 /*
586 * high level table always sets r/w, last level page
587 * table control read/write
588 */
19c239ce
MM
589 dma_set_pte_readable(pte);
590 dma_set_pte_writable(pte);
8c11e798 591 __iommu_flush_cache(iommu, pte, sizeof(*pte));
ba395927 592 }
19c239ce 593 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
594 level--;
595 }
596
597 spin_unlock_irqrestore(&domain->mapping_lock, flags);
598 return pte;
599}
600
601/* return address's pte at specific level */
602static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
603 int level)
604{
605 struct dma_pte *parent, *pte = NULL;
606 int total = agaw_to_level(domain->agaw);
607 int offset;
608
609 parent = domain->pgd;
610 while (level <= total) {
611 offset = address_level_offset(addr, total);
612 pte = &parent[offset];
613 if (level == total)
614 return pte;
615
19c239ce 616 if (!dma_pte_present(pte))
ba395927 617 break;
19c239ce 618 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
619 total--;
620 }
621 return NULL;
622}
623
624/* clear one page's page table */
625static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
626{
627 struct dma_pte *pte = NULL;
8c11e798 628 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
629
630 /* get last level pte */
631 pte = dma_addr_level_pte(domain, addr, 1);
632
633 if (pte) {
19c239ce 634 dma_clear_pte(pte);
8c11e798 635 __iommu_flush_cache(iommu, pte, sizeof(*pte));
ba395927
KA
636 }
637}
638
639/* clear last level pte, a tlb flush should be followed */
640static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
641{
642 int addr_width = agaw_to_width(domain->agaw);
643
644 start &= (((u64)1) << addr_width) - 1;
645 end &= (((u64)1) << addr_width) - 1;
646 /* in case it's partial page */
5b6985ce
FY
647 start = PAGE_ALIGN(start);
648 end &= PAGE_MASK;
ba395927
KA
649
650 /* we don't need lock here, nobody else touches the iova range */
651 while (start < end) {
652 dma_pte_clear_one(domain, start);
5b6985ce 653 start += VTD_PAGE_SIZE;
ba395927
KA
654 }
655}
656
657/* free page table pages. last level pte should already be cleared */
658static void dma_pte_free_pagetable(struct dmar_domain *domain,
659 u64 start, u64 end)
660{
661 int addr_width = agaw_to_width(domain->agaw);
662 struct dma_pte *pte;
663 int total = agaw_to_level(domain->agaw);
664 int level;
665 u64 tmp;
8c11e798 666 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
667
668 start &= (((u64)1) << addr_width) - 1;
669 end &= (((u64)1) << addr_width) - 1;
670
671 /* we don't need lock here, nobody else touches the iova range */
672 level = 2;
673 while (level <= total) {
674 tmp = align_to_level(start, level);
675 if (tmp >= end || (tmp + level_size(level) > end))
676 return;
677
678 while (tmp < end) {
679 pte = dma_addr_level_pte(domain, tmp, level);
680 if (pte) {
681 free_pgtable_page(
19c239ce
MM
682 phys_to_virt(dma_pte_addr(pte)));
683 dma_clear_pte(pte);
8c11e798 684 __iommu_flush_cache(iommu,
ba395927
KA
685 pte, sizeof(*pte));
686 }
687 tmp += level_size(level);
688 }
689 level++;
690 }
691 /* free pgd */
692 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
693 free_pgtable_page(domain->pgd);
694 domain->pgd = NULL;
695 }
696}
697
698/* iommu handling */
699static int iommu_alloc_root_entry(struct intel_iommu *iommu)
700{
701 struct root_entry *root;
702 unsigned long flags;
703
704 root = (struct root_entry *)alloc_pgtable_page();
705 if (!root)
706 return -ENOMEM;
707
5b6985ce 708 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
709
710 spin_lock_irqsave(&iommu->lock, flags);
711 iommu->root_entry = root;
712 spin_unlock_irqrestore(&iommu->lock, flags);
713
714 return 0;
715}
716
ba395927
KA
717static void iommu_set_root_entry(struct intel_iommu *iommu)
718{
719 void *addr;
720 u32 cmd, sts;
721 unsigned long flag;
722
723 addr = iommu->root_entry;
724
725 spin_lock_irqsave(&iommu->register_lock, flag);
726 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
727
728 cmd = iommu->gcmd | DMA_GCMD_SRTP;
729 writel(cmd, iommu->reg + DMAR_GCMD_REG);
730
731 /* Make sure hardware complete it */
732 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
733 readl, (sts & DMA_GSTS_RTPS), sts);
734
735 spin_unlock_irqrestore(&iommu->register_lock, flag);
736}
737
738static void iommu_flush_write_buffer(struct intel_iommu *iommu)
739{
740 u32 val;
741 unsigned long flag;
742
743 if (!cap_rwbf(iommu->cap))
744 return;
745 val = iommu->gcmd | DMA_GCMD_WBF;
746
747 spin_lock_irqsave(&iommu->register_lock, flag);
748 writel(val, iommu->reg + DMAR_GCMD_REG);
749
750 /* Make sure hardware complete it */
751 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
752 readl, (!(val & DMA_GSTS_WBFS)), val);
753
754 spin_unlock_irqrestore(&iommu->register_lock, flag);
755}
756
757/* return value determine if we need a write buffer flush */
758static int __iommu_flush_context(struct intel_iommu *iommu,
759 u16 did, u16 source_id, u8 function_mask, u64 type,
760 int non_present_entry_flush)
761{
762 u64 val = 0;
763 unsigned long flag;
764
765 /*
766 * In the non-present entry flush case, if hardware doesn't cache
767 * non-present entry we do nothing and if hardware cache non-present
768 * entry, we flush entries of domain 0 (the domain id is used to cache
769 * any non-present entries)
770 */
771 if (non_present_entry_flush) {
772 if (!cap_caching_mode(iommu->cap))
773 return 1;
774 else
775 did = 0;
776 }
777
778 switch (type) {
779 case DMA_CCMD_GLOBAL_INVL:
780 val = DMA_CCMD_GLOBAL_INVL;
781 break;
782 case DMA_CCMD_DOMAIN_INVL:
783 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
784 break;
785 case DMA_CCMD_DEVICE_INVL:
786 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
787 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
788 break;
789 default:
790 BUG();
791 }
792 val |= DMA_CCMD_ICC;
793
794 spin_lock_irqsave(&iommu->register_lock, flag);
795 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
796
797 /* Make sure hardware complete it */
798 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
799 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
800
801 spin_unlock_irqrestore(&iommu->register_lock, flag);
802
4d235ba6 803 /* flush context entry will implicitly flush write buffer */
ba395927
KA
804 return 0;
805}
806
ba395927
KA
807/* return value determine if we need a write buffer flush */
808static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
809 u64 addr, unsigned int size_order, u64 type,
810 int non_present_entry_flush)
811{
812 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
813 u64 val = 0, val_iva = 0;
814 unsigned long flag;
815
816 /*
817 * In the non-present entry flush case, if hardware doesn't cache
818 * non-present entry we do nothing and if hardware cache non-present
819 * entry, we flush entries of domain 0 (the domain id is used to cache
820 * any non-present entries)
821 */
822 if (non_present_entry_flush) {
823 if (!cap_caching_mode(iommu->cap))
824 return 1;
825 else
826 did = 0;
827 }
828
829 switch (type) {
830 case DMA_TLB_GLOBAL_FLUSH:
831 /* global flush doesn't need set IVA_REG */
832 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
833 break;
834 case DMA_TLB_DSI_FLUSH:
835 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
836 break;
837 case DMA_TLB_PSI_FLUSH:
838 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
839 /* Note: always flush non-leaf currently */
840 val_iva = size_order | addr;
841 break;
842 default:
843 BUG();
844 }
845 /* Note: set drain read/write */
846#if 0
847 /*
848 * This is probably to be super secure.. Looks like we can
849 * ignore it without any impact.
850 */
851 if (cap_read_drain(iommu->cap))
852 val |= DMA_TLB_READ_DRAIN;
853#endif
854 if (cap_write_drain(iommu->cap))
855 val |= DMA_TLB_WRITE_DRAIN;
856
857 spin_lock_irqsave(&iommu->register_lock, flag);
858 /* Note: Only uses first TLB reg currently */
859 if (val_iva)
860 dmar_writeq(iommu->reg + tlb_offset, val_iva);
861 dmar_writeq(iommu->reg + tlb_offset + 8, val);
862
863 /* Make sure hardware complete it */
864 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
865 dmar_readq, (!(val & DMA_TLB_IVT)), val);
866
867 spin_unlock_irqrestore(&iommu->register_lock, flag);
868
869 /* check IOTLB invalidation granularity */
870 if (DMA_TLB_IAIG(val) == 0)
871 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
872 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
873 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
874 (unsigned long long)DMA_TLB_IIRG(type),
875 (unsigned long long)DMA_TLB_IAIG(val));
4d235ba6 876 /* flush iotlb entry will implicitly flush write buffer */
ba395927
KA
877 return 0;
878}
879
ba395927
KA
880static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
881 u64 addr, unsigned int pages, int non_present_entry_flush)
882{
f76aec76 883 unsigned int mask;
ba395927 884
5b6985ce 885 BUG_ON(addr & (~VTD_PAGE_MASK));
ba395927
KA
886 BUG_ON(pages == 0);
887
888 /* Fallback to domain selective flush if no PSI support */
889 if (!cap_pgsel_inv(iommu->cap))
a77b67d4
YS
890 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
891 DMA_TLB_DSI_FLUSH,
892 non_present_entry_flush);
ba395927
KA
893
894 /*
895 * PSI requires page size to be 2 ^ x, and the base address is naturally
896 * aligned to the size
897 */
f76aec76 898 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 899 /* Fallback to domain selective flush if size is too big */
f76aec76 900 if (mask > cap_max_amask_val(iommu->cap))
a77b67d4
YS
901 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
902 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
ba395927 903
a77b67d4
YS
904 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
905 DMA_TLB_PSI_FLUSH,
906 non_present_entry_flush);
ba395927
KA
907}
908
f8bab735 909static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
910{
911 u32 pmen;
912 unsigned long flags;
913
914 spin_lock_irqsave(&iommu->register_lock, flags);
915 pmen = readl(iommu->reg + DMAR_PMEN_REG);
916 pmen &= ~DMA_PMEN_EPM;
917 writel(pmen, iommu->reg + DMAR_PMEN_REG);
918
919 /* wait for the protected region status bit to clear */
920 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
921 readl, !(pmen & DMA_PMEN_PRS), pmen);
922
923 spin_unlock_irqrestore(&iommu->register_lock, flags);
924}
925
ba395927
KA
926static int iommu_enable_translation(struct intel_iommu *iommu)
927{
928 u32 sts;
929 unsigned long flags;
930
931 spin_lock_irqsave(&iommu->register_lock, flags);
932 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
933
934 /* Make sure hardware complete it */
935 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
936 readl, (sts & DMA_GSTS_TES), sts);
937
938 iommu->gcmd |= DMA_GCMD_TE;
939 spin_unlock_irqrestore(&iommu->register_lock, flags);
940 return 0;
941}
942
943static int iommu_disable_translation(struct intel_iommu *iommu)
944{
945 u32 sts;
946 unsigned long flag;
947
948 spin_lock_irqsave(&iommu->register_lock, flag);
949 iommu->gcmd &= ~DMA_GCMD_TE;
950 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
951
952 /* Make sure hardware complete it */
953 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
954 readl, (!(sts & DMA_GSTS_TES)), sts);
955
956 spin_unlock_irqrestore(&iommu->register_lock, flag);
957 return 0;
958}
959
3460a6d9
KA
960/* iommu interrupt handling. Most stuff are MSI-like. */
961
d94afc6c 962static const char *fault_reason_strings[] =
3460a6d9
KA
963{
964 "Software",
965 "Present bit in root entry is clear",
966 "Present bit in context entry is clear",
967 "Invalid context entry",
968 "Access beyond MGAW",
969 "PTE Write access is not set",
970 "PTE Read access is not set",
971 "Next page table ptr is invalid",
972 "Root table address invalid",
973 "Context table ptr is invalid",
974 "non-zero reserved fields in RTP",
975 "non-zero reserved fields in CTP",
976 "non-zero reserved fields in PTE",
3460a6d9 977};
f8bab735 978#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 979
d94afc6c 980const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 981{
d94afc6c 982 if (fault_reason > MAX_FAULT_REASON_IDX)
983 return "Unknown";
3460a6d9
KA
984 else
985 return fault_reason_strings[fault_reason];
986}
987
988void dmar_msi_unmask(unsigned int irq)
989{
990 struct intel_iommu *iommu = get_irq_data(irq);
991 unsigned long flag;
992
993 /* unmask it */
994 spin_lock_irqsave(&iommu->register_lock, flag);
995 writel(0, iommu->reg + DMAR_FECTL_REG);
996 /* Read a reg to force flush the post write */
997 readl(iommu->reg + DMAR_FECTL_REG);
998 spin_unlock_irqrestore(&iommu->register_lock, flag);
999}
1000
1001void dmar_msi_mask(unsigned int irq)
1002{
1003 unsigned long flag;
1004 struct intel_iommu *iommu = get_irq_data(irq);
1005
1006 /* mask it */
1007 spin_lock_irqsave(&iommu->register_lock, flag);
1008 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1009 /* Read a reg to force flush the post write */
1010 readl(iommu->reg + DMAR_FECTL_REG);
1011 spin_unlock_irqrestore(&iommu->register_lock, flag);
1012}
1013
1014void dmar_msi_write(int irq, struct msi_msg *msg)
1015{
1016 struct intel_iommu *iommu = get_irq_data(irq);
1017 unsigned long flag;
1018
1019 spin_lock_irqsave(&iommu->register_lock, flag);
1020 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1021 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1022 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1023 spin_unlock_irqrestore(&iommu->register_lock, flag);
1024}
1025
1026void dmar_msi_read(int irq, struct msi_msg *msg)
1027{
1028 struct intel_iommu *iommu = get_irq_data(irq);
1029 unsigned long flag;
1030
1031 spin_lock_irqsave(&iommu->register_lock, flag);
1032 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1033 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1034 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1035 spin_unlock_irqrestore(&iommu->register_lock, flag);
1036}
1037
1038static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
5b6985ce 1039 u8 fault_reason, u16 source_id, unsigned long long addr)
3460a6d9 1040{
d94afc6c 1041 const char *reason;
3460a6d9
KA
1042
1043 reason = dmar_get_fault_reason(fault_reason);
1044
1045 printk(KERN_ERR
1046 "DMAR:[%s] Request device [%02x:%02x.%d] "
1047 "fault addr %llx \n"
1048 "DMAR:[fault reason %02d] %s\n",
1049 (type ? "DMA Read" : "DMA Write"),
1050 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1051 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1052 return 0;
1053}
1054
1055#define PRIMARY_FAULT_REG_LEN (16)
1056static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1057{
1058 struct intel_iommu *iommu = dev_id;
1059 int reg, fault_index;
1060 u32 fault_status;
1061 unsigned long flag;
1062
1063 spin_lock_irqsave(&iommu->register_lock, flag);
1064 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1065
1066 /* TBD: ignore advanced fault log currently */
1067 if (!(fault_status & DMA_FSTS_PPF))
1068 goto clear_overflow;
1069
1070 fault_index = dma_fsts_fault_record_index(fault_status);
1071 reg = cap_fault_reg_offset(iommu->cap);
1072 while (1) {
1073 u8 fault_reason;
1074 u16 source_id;
1075 u64 guest_addr;
1076 int type;
1077 u32 data;
1078
1079 /* highest 32 bits */
1080 data = readl(iommu->reg + reg +
1081 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1082 if (!(data & DMA_FRCD_F))
1083 break;
1084
1085 fault_reason = dma_frcd_fault_reason(data);
1086 type = dma_frcd_type(data);
1087
1088 data = readl(iommu->reg + reg +
1089 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1090 source_id = dma_frcd_source_id(data);
1091
1092 guest_addr = dmar_readq(iommu->reg + reg +
1093 fault_index * PRIMARY_FAULT_REG_LEN);
1094 guest_addr = dma_frcd_page_addr(guest_addr);
1095 /* clear the fault */
1096 writel(DMA_FRCD_F, iommu->reg + reg +
1097 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1098
1099 spin_unlock_irqrestore(&iommu->register_lock, flag);
1100
1101 iommu_page_fault_do_one(iommu, type, fault_reason,
1102 source_id, guest_addr);
1103
1104 fault_index++;
1105 if (fault_index > cap_num_fault_regs(iommu->cap))
1106 fault_index = 0;
1107 spin_lock_irqsave(&iommu->register_lock, flag);
1108 }
1109clear_overflow:
1110 /* clear primary fault overflow */
1111 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1112 if (fault_status & DMA_FSTS_PFO)
1113 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1114
1115 spin_unlock_irqrestore(&iommu->register_lock, flag);
1116 return IRQ_HANDLED;
1117}
1118
1119int dmar_set_interrupt(struct intel_iommu *iommu)
1120{
1121 int irq, ret;
1122
1123 irq = create_irq();
1124 if (!irq) {
1125 printk(KERN_ERR "IOMMU: no free vectors\n");
1126 return -EINVAL;
1127 }
1128
1129 set_irq_data(irq, iommu);
1130 iommu->irq = irq;
1131
1132 ret = arch_setup_dmar_msi(irq);
1133 if (ret) {
1134 set_irq_data(irq, NULL);
1135 iommu->irq = 0;
1136 destroy_irq(irq);
1137 return 0;
1138 }
1139
1140 /* Force fault register is cleared */
1141 iommu_page_fault(irq, iommu);
1142
1143 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1144 if (ret)
1145 printk(KERN_ERR "IOMMU: can't request irq\n");
1146 return ret;
1147}
1148
ba395927
KA
1149static int iommu_init_domains(struct intel_iommu *iommu)
1150{
1151 unsigned long ndomains;
1152 unsigned long nlongs;
1153
1154 ndomains = cap_ndoms(iommu->cap);
1155 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1156 nlongs = BITS_TO_LONGS(ndomains);
1157
1158 /* TBD: there might be 64K domains,
1159 * consider other allocation for future chip
1160 */
1161 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1162 if (!iommu->domain_ids) {
1163 printk(KERN_ERR "Allocating domain id array failed\n");
1164 return -ENOMEM;
1165 }
1166 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1167 GFP_KERNEL);
1168 if (!iommu->domains) {
1169 printk(KERN_ERR "Allocating domain array failed\n");
1170 kfree(iommu->domain_ids);
1171 return -ENOMEM;
1172 }
1173
e61d98d8
SS
1174 spin_lock_init(&iommu->lock);
1175
ba395927
KA
1176 /*
1177 * if Caching mode is set, then invalid translations are tagged
1178 * with domainid 0. Hence we need to pre-allocate it.
1179 */
1180 if (cap_caching_mode(iommu->cap))
1181 set_bit(0, iommu->domain_ids);
1182 return 0;
1183}
ba395927 1184
ba395927
KA
1185
1186static void domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1187
1188void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1189{
1190 struct dmar_domain *domain;
1191 int i;
1192
ba395927
KA
1193 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1194 for (; i < cap_ndoms(iommu->cap); ) {
1195 domain = iommu->domains[i];
1196 clear_bit(i, iommu->domain_ids);
1197 domain_exit(domain);
1198 i = find_next_bit(iommu->domain_ids,
1199 cap_ndoms(iommu->cap), i+1);
1200 }
1201
1202 if (iommu->gcmd & DMA_GCMD_TE)
1203 iommu_disable_translation(iommu);
1204
1205 if (iommu->irq) {
1206 set_irq_data(iommu->irq, NULL);
1207 /* This will mask the irq */
1208 free_irq(iommu->irq, iommu);
1209 destroy_irq(iommu->irq);
1210 }
1211
1212 kfree(iommu->domains);
1213 kfree(iommu->domain_ids);
1214
d9630fe9
WH
1215 g_iommus[iommu->seq_id] = NULL;
1216
1217 /* if all iommus are freed, free g_iommus */
1218 for (i = 0; i < g_num_of_iommus; i++) {
1219 if (g_iommus[i])
1220 break;
1221 }
1222
1223 if (i == g_num_of_iommus)
1224 kfree(g_iommus);
1225
ba395927
KA
1226 /* free context mapping */
1227 free_context_table(iommu);
ba395927
KA
1228}
1229
1230static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1231{
1232 unsigned long num;
1233 unsigned long ndomains;
1234 struct dmar_domain *domain;
1235 unsigned long flags;
1236
1237 domain = alloc_domain_mem();
1238 if (!domain)
1239 return NULL;
1240
1241 ndomains = cap_ndoms(iommu->cap);
1242
1243 spin_lock_irqsave(&iommu->lock, flags);
1244 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1245 if (num >= ndomains) {
1246 spin_unlock_irqrestore(&iommu->lock, flags);
1247 free_domain_mem(domain);
1248 printk(KERN_ERR "IOMMU: no free domain ids\n");
1249 return NULL;
1250 }
1251
1252 set_bit(num, iommu->domain_ids);
1253 domain->id = num;
8c11e798
WH
1254 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1255 set_bit(iommu->seq_id, &domain->iommu_bmp);
d71a2f33 1256 domain->flags = 0;
ba395927
KA
1257 iommu->domains[num] = domain;
1258 spin_unlock_irqrestore(&iommu->lock, flags);
1259
1260 return domain;
1261}
1262
1263static void iommu_free_domain(struct dmar_domain *domain)
1264{
1265 unsigned long flags;
8c11e798
WH
1266 struct intel_iommu *iommu;
1267
1268 iommu = domain_get_iommu(domain);
ba395927 1269
8c11e798
WH
1270 spin_lock_irqsave(&iommu->lock, flags);
1271 clear_bit(domain->id, iommu->domain_ids);
1272 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1273}
1274
1275static struct iova_domain reserved_iova_list;
8a443df4
MG
1276static struct lock_class_key reserved_alloc_key;
1277static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1278
1279static void dmar_init_reserved_ranges(void)
1280{
1281 struct pci_dev *pdev = NULL;
1282 struct iova *iova;
1283 int i;
1284 u64 addr, size;
1285
f661197e 1286 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1287
8a443df4
MG
1288 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1289 &reserved_alloc_key);
1290 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1291 &reserved_rbtree_key);
1292
ba395927
KA
1293 /* IOAPIC ranges shouldn't be accessed by DMA */
1294 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1295 IOVA_PFN(IOAPIC_RANGE_END));
1296 if (!iova)
1297 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1298
1299 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1300 for_each_pci_dev(pdev) {
1301 struct resource *r;
1302
1303 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1304 r = &pdev->resource[i];
1305 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1306 continue;
1307 addr = r->start;
5b6985ce 1308 addr &= PAGE_MASK;
ba395927 1309 size = r->end - addr;
5b6985ce 1310 size = PAGE_ALIGN(size);
ba395927
KA
1311 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1312 IOVA_PFN(size + addr) - 1);
1313 if (!iova)
1314 printk(KERN_ERR "Reserve iova failed\n");
1315 }
1316 }
1317
1318}
1319
1320static void domain_reserve_special_ranges(struct dmar_domain *domain)
1321{
1322 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1323}
1324
1325static inline int guestwidth_to_adjustwidth(int gaw)
1326{
1327 int agaw;
1328 int r = (gaw - 12) % 9;
1329
1330 if (r == 0)
1331 agaw = gaw;
1332 else
1333 agaw = gaw + 9 - r;
1334 if (agaw > 64)
1335 agaw = 64;
1336 return agaw;
1337}
1338
1339static int domain_init(struct dmar_domain *domain, int guest_width)
1340{
1341 struct intel_iommu *iommu;
1342 int adjust_width, agaw;
1343 unsigned long sagaw;
1344
f661197e 1345 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1346 spin_lock_init(&domain->mapping_lock);
1347
1348 domain_reserve_special_ranges(domain);
1349
1350 /* calculate AGAW */
8c11e798 1351 iommu = domain_get_iommu(domain);
ba395927
KA
1352 if (guest_width > cap_mgaw(iommu->cap))
1353 guest_width = cap_mgaw(iommu->cap);
1354 domain->gaw = guest_width;
1355 adjust_width = guestwidth_to_adjustwidth(guest_width);
1356 agaw = width_to_agaw(adjust_width);
1357 sagaw = cap_sagaw(iommu->cap);
1358 if (!test_bit(agaw, &sagaw)) {
1359 /* hardware doesn't support it, choose a bigger one */
1360 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1361 agaw = find_next_bit(&sagaw, 5, agaw);
1362 if (agaw >= 5)
1363 return -ENODEV;
1364 }
1365 domain->agaw = agaw;
1366 INIT_LIST_HEAD(&domain->devices);
1367
8e604097
WH
1368 if (ecap_coherent(iommu->ecap))
1369 domain->iommu_coherency = 1;
1370 else
1371 domain->iommu_coherency = 0;
1372
ba395927
KA
1373 /* always allocate the top pgd */
1374 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1375 if (!domain->pgd)
1376 return -ENOMEM;
5b6985ce 1377 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1378 return 0;
1379}
1380
1381static void domain_exit(struct dmar_domain *domain)
1382{
1383 u64 end;
1384
1385 /* Domain 0 is reserved, so dont process it */
1386 if (!domain)
1387 return;
1388
1389 domain_remove_dev_info(domain);
1390 /* destroy iovas */
1391 put_iova_domain(&domain->iovad);
1392 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 1393 end = end & (~PAGE_MASK);
ba395927
KA
1394
1395 /* clear ptes */
1396 dma_pte_clear_range(domain, 0, end);
1397
1398 /* free page tables */
1399 dma_pte_free_pagetable(domain, 0, end);
1400
1401 iommu_free_domain(domain);
1402 free_domain_mem(domain);
1403}
1404
1405static int domain_context_mapping_one(struct dmar_domain *domain,
1406 u8 bus, u8 devfn)
1407{
1408 struct context_entry *context;
8c11e798 1409 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
1410 unsigned long flags;
1411
1412 pr_debug("Set context mapping for %02x:%02x.%d\n",
1413 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1414 BUG_ON(!domain->pgd);
1415 context = device_to_context_entry(iommu, bus, devfn);
1416 if (!context)
1417 return -ENOMEM;
1418 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1419 if (context_present(context)) {
ba395927
KA
1420 spin_unlock_irqrestore(&iommu->lock, flags);
1421 return 0;
1422 }
1423
c07e7d21
MM
1424 context_set_domain_id(context, domain->id);
1425 context_set_address_width(context, domain->agaw);
1426 context_set_address_root(context, virt_to_phys(domain->pgd));
1427 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1428 context_set_fault_enable(context);
1429 context_set_present(context);
ba395927
KA
1430 __iommu_flush_cache(iommu, context, sizeof(*context));
1431
1432 /* it's a non-present to present mapping */
a77b67d4
YS
1433 if (iommu->flush.flush_context(iommu, domain->id,
1434 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1435 DMA_CCMD_DEVICE_INVL, 1))
ba395927
KA
1436 iommu_flush_write_buffer(iommu);
1437 else
a77b67d4
YS
1438 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1439
ba395927
KA
1440 spin_unlock_irqrestore(&iommu->lock, flags);
1441 return 0;
1442}
1443
1444static int
1445domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1446{
1447 int ret;
1448 struct pci_dev *tmp, *parent;
1449
1450 ret = domain_context_mapping_one(domain, pdev->bus->number,
1451 pdev->devfn);
1452 if (ret)
1453 return ret;
1454
1455 /* dependent device mapping */
1456 tmp = pci_find_upstream_pcie_bridge(pdev);
1457 if (!tmp)
1458 return 0;
1459 /* Secondary interface's bus number and devfn 0 */
1460 parent = pdev->bus->self;
1461 while (parent != tmp) {
1462 ret = domain_context_mapping_one(domain, parent->bus->number,
1463 parent->devfn);
1464 if (ret)
1465 return ret;
1466 parent = parent->bus->self;
1467 }
1468 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1469 return domain_context_mapping_one(domain,
1470 tmp->subordinate->number, 0);
1471 else /* this is a legacy PCI bridge */
1472 return domain_context_mapping_one(domain,
1473 tmp->bus->number, tmp->devfn);
1474}
1475
1476static int domain_context_mapped(struct dmar_domain *domain,
1477 struct pci_dev *pdev)
1478{
1479 int ret;
1480 struct pci_dev *tmp, *parent;
8c11e798 1481 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927 1482
8c11e798 1483 ret = device_context_mapped(iommu,
ba395927
KA
1484 pdev->bus->number, pdev->devfn);
1485 if (!ret)
1486 return ret;
1487 /* dependent device mapping */
1488 tmp = pci_find_upstream_pcie_bridge(pdev);
1489 if (!tmp)
1490 return ret;
1491 /* Secondary interface's bus number and devfn 0 */
1492 parent = pdev->bus->self;
1493 while (parent != tmp) {
8c11e798 1494 ret = device_context_mapped(iommu, parent->bus->number,
ba395927
KA
1495 parent->devfn);
1496 if (!ret)
1497 return ret;
1498 parent = parent->bus->self;
1499 }
1500 if (tmp->is_pcie)
8c11e798 1501 return device_context_mapped(iommu,
ba395927
KA
1502 tmp->subordinate->number, 0);
1503 else
8c11e798 1504 return device_context_mapped(iommu,
ba395927
KA
1505 tmp->bus->number, tmp->devfn);
1506}
1507
1508static int
1509domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1510 u64 hpa, size_t size, int prot)
1511{
1512 u64 start_pfn, end_pfn;
1513 struct dma_pte *pte;
1514 int index;
5b6985ce 1515 int addr_width = agaw_to_width(domain->agaw);
8c11e798 1516 struct intel_iommu *iommu = domain_get_iommu(domain);
5b6985ce
FY
1517
1518 hpa &= (((u64)1) << addr_width) - 1;
ba395927
KA
1519
1520 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1521 return -EINVAL;
5b6985ce
FY
1522 iova &= PAGE_MASK;
1523 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1524 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
ba395927
KA
1525 index = 0;
1526 while (start_pfn < end_pfn) {
5b6985ce 1527 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
ba395927
KA
1528 if (!pte)
1529 return -ENOMEM;
1530 /* We don't need lock here, nobody else
1531 * touches the iova range
1532 */
19c239ce
MM
1533 BUG_ON(dma_pte_addr(pte));
1534 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1535 dma_set_pte_prot(pte, prot);
8c11e798 1536 __iommu_flush_cache(iommu, pte, sizeof(*pte));
ba395927
KA
1537 start_pfn++;
1538 index++;
1539 }
1540 return 0;
1541}
1542
1543static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1544{
8c11e798
WH
1545 struct intel_iommu *iommu = domain_get_iommu(domain);
1546
1547 clear_context_table(iommu, bus, devfn);
1548 iommu->flush.flush_context(iommu, 0, 0, 0,
a77b67d4 1549 DMA_CCMD_GLOBAL_INVL, 0);
8c11e798 1550 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
a77b67d4 1551 DMA_TLB_GLOBAL_FLUSH, 0);
ba395927
KA
1552}
1553
1554static void domain_remove_dev_info(struct dmar_domain *domain)
1555{
1556 struct device_domain_info *info;
1557 unsigned long flags;
1558
1559 spin_lock_irqsave(&device_domain_lock, flags);
1560 while (!list_empty(&domain->devices)) {
1561 info = list_entry(domain->devices.next,
1562 struct device_domain_info, link);
1563 list_del(&info->link);
1564 list_del(&info->global);
1565 if (info->dev)
358dd8ac 1566 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1567 spin_unlock_irqrestore(&device_domain_lock, flags);
1568
1569 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1570 free_devinfo_mem(info);
1571
1572 spin_lock_irqsave(&device_domain_lock, flags);
1573 }
1574 spin_unlock_irqrestore(&device_domain_lock, flags);
1575}
1576
1577/*
1578 * find_domain
358dd8ac 1579 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1580 */
38717946 1581static struct dmar_domain *
ba395927
KA
1582find_domain(struct pci_dev *pdev)
1583{
1584 struct device_domain_info *info;
1585
1586 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1587 info = pdev->dev.archdata.iommu;
ba395927
KA
1588 if (info)
1589 return info->domain;
1590 return NULL;
1591}
1592
ba395927
KA
1593/* domain is initialized */
1594static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1595{
1596 struct dmar_domain *domain, *found = NULL;
1597 struct intel_iommu *iommu;
1598 struct dmar_drhd_unit *drhd;
1599 struct device_domain_info *info, *tmp;
1600 struct pci_dev *dev_tmp;
1601 unsigned long flags;
1602 int bus = 0, devfn = 0;
1603
1604 domain = find_domain(pdev);
1605 if (domain)
1606 return domain;
1607
1608 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1609 if (dev_tmp) {
1610 if (dev_tmp->is_pcie) {
1611 bus = dev_tmp->subordinate->number;
1612 devfn = 0;
1613 } else {
1614 bus = dev_tmp->bus->number;
1615 devfn = dev_tmp->devfn;
1616 }
1617 spin_lock_irqsave(&device_domain_lock, flags);
1618 list_for_each_entry(info, &device_domain_list, global) {
1619 if (info->bus == bus && info->devfn == devfn) {
1620 found = info->domain;
1621 break;
1622 }
1623 }
1624 spin_unlock_irqrestore(&device_domain_lock, flags);
1625 /* pcie-pci bridge already has a domain, uses it */
1626 if (found) {
1627 domain = found;
1628 goto found_domain;
1629 }
1630 }
1631
1632 /* Allocate new domain for the device */
1633 drhd = dmar_find_matched_drhd_unit(pdev);
1634 if (!drhd) {
1635 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1636 pci_name(pdev));
1637 return NULL;
1638 }
1639 iommu = drhd->iommu;
1640
1641 domain = iommu_alloc_domain(iommu);
1642 if (!domain)
1643 goto error;
1644
1645 if (domain_init(domain, gaw)) {
1646 domain_exit(domain);
1647 goto error;
1648 }
1649
1650 /* register pcie-to-pci device */
1651 if (dev_tmp) {
1652 info = alloc_devinfo_mem();
1653 if (!info) {
1654 domain_exit(domain);
1655 goto error;
1656 }
1657 info->bus = bus;
1658 info->devfn = devfn;
1659 info->dev = NULL;
1660 info->domain = domain;
1661 /* This domain is shared by devices under p2p bridge */
3b5410e7 1662 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1663
1664 /* pcie-to-pci bridge already has a domain, uses it */
1665 found = NULL;
1666 spin_lock_irqsave(&device_domain_lock, flags);
1667 list_for_each_entry(tmp, &device_domain_list, global) {
1668 if (tmp->bus == bus && tmp->devfn == devfn) {
1669 found = tmp->domain;
1670 break;
1671 }
1672 }
1673 if (found) {
1674 free_devinfo_mem(info);
1675 domain_exit(domain);
1676 domain = found;
1677 } else {
1678 list_add(&info->link, &domain->devices);
1679 list_add(&info->global, &device_domain_list);
1680 }
1681 spin_unlock_irqrestore(&device_domain_lock, flags);
1682 }
1683
1684found_domain:
1685 info = alloc_devinfo_mem();
1686 if (!info)
1687 goto error;
1688 info->bus = pdev->bus->number;
1689 info->devfn = pdev->devfn;
1690 info->dev = pdev;
1691 info->domain = domain;
1692 spin_lock_irqsave(&device_domain_lock, flags);
1693 /* somebody is fast */
1694 found = find_domain(pdev);
1695 if (found != NULL) {
1696 spin_unlock_irqrestore(&device_domain_lock, flags);
1697 if (found != domain) {
1698 domain_exit(domain);
1699 domain = found;
1700 }
1701 free_devinfo_mem(info);
1702 return domain;
1703 }
1704 list_add(&info->link, &domain->devices);
1705 list_add(&info->global, &device_domain_list);
358dd8ac 1706 pdev->dev.archdata.iommu = info;
ba395927
KA
1707 spin_unlock_irqrestore(&device_domain_lock, flags);
1708 return domain;
1709error:
1710 /* recheck it here, maybe others set it */
1711 return find_domain(pdev);
1712}
1713
5b6985ce
FY
1714static int iommu_prepare_identity_map(struct pci_dev *pdev,
1715 unsigned long long start,
1716 unsigned long long end)
ba395927
KA
1717{
1718 struct dmar_domain *domain;
1719 unsigned long size;
5b6985ce 1720 unsigned long long base;
ba395927
KA
1721 int ret;
1722
1723 printk(KERN_INFO
1724 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1725 pci_name(pdev), start, end);
1726 /* page table init */
1727 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1728 if (!domain)
1729 return -ENOMEM;
1730
1731 /* The address might not be aligned */
5b6985ce 1732 base = start & PAGE_MASK;
ba395927 1733 size = end - base;
5b6985ce 1734 size = PAGE_ALIGN(size);
ba395927
KA
1735 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1736 IOVA_PFN(base + size) - 1)) {
1737 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1738 ret = -ENOMEM;
1739 goto error;
1740 }
1741
1742 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1743 size, base, pci_name(pdev));
1744 /*
1745 * RMRR range might have overlap with physical memory range,
1746 * clear it first
1747 */
1748 dma_pte_clear_range(domain, base, base + size);
1749
1750 ret = domain_page_mapping(domain, base, base, size,
1751 DMA_PTE_READ|DMA_PTE_WRITE);
1752 if (ret)
1753 goto error;
1754
1755 /* context entry init */
1756 ret = domain_context_mapping(domain, pdev);
1757 if (!ret)
1758 return 0;
1759error:
1760 domain_exit(domain);
1761 return ret;
1762
1763}
1764
1765static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1766 struct pci_dev *pdev)
1767{
358dd8ac 1768 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1769 return 0;
1770 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1771 rmrr->end_address + 1);
1772}
1773
e820482c 1774#ifdef CONFIG_DMAR_GFX_WA
d52d53b8
YL
1775struct iommu_prepare_data {
1776 struct pci_dev *pdev;
1777 int ret;
1778};
1779
1780static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1781 unsigned long end_pfn, void *datax)
1782{
1783 struct iommu_prepare_data *data;
1784
1785 data = (struct iommu_prepare_data *)datax;
1786
1787 data->ret = iommu_prepare_identity_map(data->pdev,
1788 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1789 return data->ret;
1790
1791}
1792
1793static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1794{
1795 int nid;
1796 struct iommu_prepare_data data;
1797
1798 data.pdev = pdev;
1799 data.ret = 0;
1800
1801 for_each_online_node(nid) {
1802 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1803 if (data.ret)
1804 return data.ret;
1805 }
1806 return data.ret;
1807}
1808
e820482c
KA
1809static void __init iommu_prepare_gfx_mapping(void)
1810{
1811 struct pci_dev *pdev = NULL;
e820482c
KA
1812 int ret;
1813
1814 for_each_pci_dev(pdev) {
358dd8ac 1815 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1816 !IS_GFX_DEVICE(pdev))
1817 continue;
1818 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1819 pci_name(pdev));
d52d53b8
YL
1820 ret = iommu_prepare_with_active_regions(pdev);
1821 if (ret)
1822 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
e820482c
KA
1823 }
1824}
2abd7e16
MM
1825#else /* !CONFIG_DMAR_GFX_WA */
1826static inline void iommu_prepare_gfx_mapping(void)
1827{
1828 return;
1829}
e820482c
KA
1830#endif
1831
49a0429e
KA
1832#ifdef CONFIG_DMAR_FLOPPY_WA
1833static inline void iommu_prepare_isa(void)
1834{
1835 struct pci_dev *pdev;
1836 int ret;
1837
1838 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1839 if (!pdev)
1840 return;
1841
1842 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1843 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1844
1845 if (ret)
1846 printk("IOMMU: Failed to create 0-64M identity map, "
1847 "floppy might not work\n");
1848
1849}
1850#else
1851static inline void iommu_prepare_isa(void)
1852{
1853 return;
1854}
1855#endif /* !CONFIG_DMAR_FLPY_WA */
1856
519a0549 1857static int __init init_dmars(void)
ba395927
KA
1858{
1859 struct dmar_drhd_unit *drhd;
1860 struct dmar_rmrr_unit *rmrr;
1861 struct pci_dev *pdev;
1862 struct intel_iommu *iommu;
80b20dd8 1863 int i, ret, unit = 0;
ba395927
KA
1864
1865 /*
1866 * for each drhd
1867 * allocate root
1868 * initialize and program root entry to not present
1869 * endfor
1870 */
1871 for_each_drhd_unit(drhd) {
5e0d2a6f 1872 g_num_of_iommus++;
1873 /*
1874 * lock not needed as this is only incremented in the single
1875 * threaded kernel __init code path all other access are read
1876 * only
1877 */
1878 }
1879
d9630fe9
WH
1880 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1881 GFP_KERNEL);
1882 if (!g_iommus) {
1883 printk(KERN_ERR "Allocating global iommu array failed\n");
1884 ret = -ENOMEM;
1885 goto error;
1886 }
1887
80b20dd8 1888 deferred_flush = kzalloc(g_num_of_iommus *
1889 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1890 if (!deferred_flush) {
d9630fe9 1891 kfree(g_iommus);
5e0d2a6f 1892 ret = -ENOMEM;
1893 goto error;
1894 }
1895
5e0d2a6f 1896 for_each_drhd_unit(drhd) {
1897 if (drhd->ignored)
1898 continue;
1886e8a9
SS
1899
1900 iommu = drhd->iommu;
d9630fe9 1901 g_iommus[iommu->seq_id] = iommu;
ba395927 1902
e61d98d8
SS
1903 ret = iommu_init_domains(iommu);
1904 if (ret)
1905 goto error;
1906
ba395927
KA
1907 /*
1908 * TBD:
1909 * we could share the same root & context tables
1910 * amoung all IOMMU's. Need to Split it later.
1911 */
1912 ret = iommu_alloc_root_entry(iommu);
1913 if (ret) {
1914 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1915 goto error;
1916 }
1917 }
1918
a77b67d4
YS
1919 for_each_drhd_unit(drhd) {
1920 if (drhd->ignored)
1921 continue;
1922
1923 iommu = drhd->iommu;
1924 if (dmar_enable_qi(iommu)) {
1925 /*
1926 * Queued Invalidate not enabled, use Register Based
1927 * Invalidate
1928 */
1929 iommu->flush.flush_context = __iommu_flush_context;
1930 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1931 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
1932 "invalidation\n",
1933 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1934 } else {
1935 iommu->flush.flush_context = qi_flush_context;
1936 iommu->flush.flush_iotlb = qi_flush_iotlb;
1937 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
1938 "invalidation\n",
1939 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1940 }
1941 }
1942
ba395927
KA
1943 /*
1944 * For each rmrr
1945 * for each dev attached to rmrr
1946 * do
1947 * locate drhd for dev, alloc domain for dev
1948 * allocate free domain
1949 * allocate page table entries for rmrr
1950 * if context not allocated for bus
1951 * allocate and init context
1952 * set present in root table for this bus
1953 * init context with domain, translation etc
1954 * endfor
1955 * endfor
1956 */
1957 for_each_rmrr_units(rmrr) {
ba395927
KA
1958 for (i = 0; i < rmrr->devices_cnt; i++) {
1959 pdev = rmrr->devices[i];
1960 /* some BIOS lists non-exist devices in DMAR table */
1961 if (!pdev)
1962 continue;
1963 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1964 if (ret)
1965 printk(KERN_ERR
1966 "IOMMU: mapping reserved region failed\n");
1967 }
1968 }
1969
e820482c
KA
1970 iommu_prepare_gfx_mapping();
1971
49a0429e
KA
1972 iommu_prepare_isa();
1973
ba395927
KA
1974 /*
1975 * for each drhd
1976 * enable fault log
1977 * global invalidate context cache
1978 * global invalidate iotlb
1979 * enable translation
1980 */
1981 for_each_drhd_unit(drhd) {
1982 if (drhd->ignored)
1983 continue;
1984 iommu = drhd->iommu;
1985 sprintf (iommu->name, "dmar%d", unit++);
1986
1987 iommu_flush_write_buffer(iommu);
1988
3460a6d9
KA
1989 ret = dmar_set_interrupt(iommu);
1990 if (ret)
1991 goto error;
1992
ba395927
KA
1993 iommu_set_root_entry(iommu);
1994
a77b67d4
YS
1995 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1996 0);
1997 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1998 0);
f8bab735 1999 iommu_disable_protect_mem_regions(iommu);
2000
ba395927
KA
2001 ret = iommu_enable_translation(iommu);
2002 if (ret)
2003 goto error;
2004 }
2005
2006 return 0;
2007error:
2008 for_each_drhd_unit(drhd) {
2009 if (drhd->ignored)
2010 continue;
2011 iommu = drhd->iommu;
2012 free_iommu(iommu);
2013 }
d9630fe9 2014 kfree(g_iommus);
ba395927
KA
2015 return ret;
2016}
2017
2018static inline u64 aligned_size(u64 host_addr, size_t size)
2019{
2020 u64 addr;
5b6985ce
FY
2021 addr = (host_addr & (~PAGE_MASK)) + size;
2022 return PAGE_ALIGN(addr);
ba395927
KA
2023}
2024
2025struct iova *
f76aec76 2026iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 2027{
ba395927
KA
2028 struct iova *piova;
2029
2030 /* Make sure it's in range */
ba395927 2031 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 2032 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
2033 return NULL;
2034
2035 piova = alloc_iova(&domain->iovad,
5b6985ce 2036 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
ba395927
KA
2037 return piova;
2038}
2039
f76aec76
KA
2040static struct iova *
2041__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
bb9e6d65 2042 size_t size, u64 dma_mask)
ba395927 2043{
ba395927 2044 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2045 struct iova *iova = NULL;
ba395927 2046
bb9e6d65
FT
2047 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2048 iova = iommu_alloc_iova(domain, size, dma_mask);
2049 else {
ba395927
KA
2050 /*
2051 * First try to allocate an io virtual address in
2052 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 2053 * from higher range
ba395927 2054 */
f76aec76 2055 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 2056 if (!iova)
bb9e6d65 2057 iova = iommu_alloc_iova(domain, size, dma_mask);
ba395927
KA
2058 }
2059
2060 if (!iova) {
2061 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
2062 return NULL;
2063 }
2064
2065 return iova;
2066}
2067
2068static struct dmar_domain *
2069get_valid_domain_for_dev(struct pci_dev *pdev)
2070{
2071 struct dmar_domain *domain;
2072 int ret;
2073
2074 domain = get_domain_for_dev(pdev,
2075 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2076 if (!domain) {
2077 printk(KERN_ERR
2078 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2079 return NULL;
ba395927
KA
2080 }
2081
2082 /* make sure context mapping is ok */
2083 if (unlikely(!domain_context_mapped(domain, pdev))) {
2084 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
2085 if (ret) {
2086 printk(KERN_ERR
2087 "Domain context map for %s failed",
2088 pci_name(pdev));
4fe05bbc 2089 return NULL;
f76aec76 2090 }
ba395927
KA
2091 }
2092
f76aec76
KA
2093 return domain;
2094}
2095
bb9e6d65
FT
2096static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2097 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2098{
2099 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2100 struct dmar_domain *domain;
5b6985ce 2101 phys_addr_t start_paddr;
f76aec76
KA
2102 struct iova *iova;
2103 int prot = 0;
6865f0d1 2104 int ret;
8c11e798 2105 struct intel_iommu *iommu;
f76aec76
KA
2106
2107 BUG_ON(dir == DMA_NONE);
358dd8ac 2108 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
6865f0d1 2109 return paddr;
f76aec76
KA
2110
2111 domain = get_valid_domain_for_dev(pdev);
2112 if (!domain)
2113 return 0;
2114
8c11e798 2115 iommu = domain_get_iommu(domain);
6865f0d1 2116 size = aligned_size((u64)paddr, size);
f76aec76 2117
bb9e6d65 2118 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76
KA
2119 if (!iova)
2120 goto error;
2121
5b6985ce 2122 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
f76aec76 2123
ba395927
KA
2124 /*
2125 * Check if DMAR supports zero-length reads on write only
2126 * mappings..
2127 */
2128 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2129 !cap_zlr(iommu->cap))
ba395927
KA
2130 prot |= DMA_PTE_READ;
2131 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2132 prot |= DMA_PTE_WRITE;
2133 /*
6865f0d1 2134 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2135 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2136 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2137 * is not a big problem
2138 */
6865f0d1 2139 ret = domain_page_mapping(domain, start_paddr,
5b6985ce 2140 ((u64)paddr) & PAGE_MASK, size, prot);
ba395927
KA
2141 if (ret)
2142 goto error;
2143
f76aec76 2144 /* it's a non-present to present mapping */
8c11e798 2145 ret = iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2146 start_paddr, size >> VTD_PAGE_SHIFT, 1);
f76aec76 2147 if (ret)
8c11e798 2148 iommu_flush_write_buffer(iommu);
f76aec76 2149
5b6985ce 2150 return start_paddr + ((u64)paddr & (~PAGE_MASK));
ba395927 2151
ba395927 2152error:
f76aec76
KA
2153 if (iova)
2154 __free_iova(&domain->iovad, iova);
ba395927 2155 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
5b6985ce 2156 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2157 return 0;
2158}
2159
bb9e6d65
FT
2160dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2161 size_t size, int dir)
2162{
2163 return __intel_map_single(hwdev, paddr, size, dir,
2164 to_pci_dev(hwdev)->dma_mask);
2165}
2166
5e0d2a6f 2167static void flush_unmaps(void)
2168{
80b20dd8 2169 int i, j;
5e0d2a6f 2170
5e0d2a6f 2171 timer_on = 0;
2172
2173 /* just flush them all */
2174 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2175 struct intel_iommu *iommu = g_iommus[i];
2176 if (!iommu)
2177 continue;
c42d9f32 2178
a2bb8459 2179 if (deferred_flush[i].next) {
a77b67d4
YS
2180 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2181 DMA_TLB_GLOBAL_FLUSH, 0);
80b20dd8 2182 for (j = 0; j < deferred_flush[i].next; j++) {
2183 __free_iova(&deferred_flush[i].domain[j]->iovad,
2184 deferred_flush[i].iova[j]);
2185 }
2186 deferred_flush[i].next = 0;
2187 }
5e0d2a6f 2188 }
2189
5e0d2a6f 2190 list_size = 0;
5e0d2a6f 2191}
2192
2193static void flush_unmaps_timeout(unsigned long data)
2194{
80b20dd8 2195 unsigned long flags;
2196
2197 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2198 flush_unmaps();
80b20dd8 2199 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2200}
2201
2202static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2203{
2204 unsigned long flags;
80b20dd8 2205 int next, iommu_id;
8c11e798 2206 struct intel_iommu *iommu;
5e0d2a6f 2207
2208 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2209 if (list_size == HIGH_WATER_MARK)
2210 flush_unmaps();
2211
8c11e798
WH
2212 iommu = domain_get_iommu(dom);
2213 iommu_id = iommu->seq_id;
c42d9f32 2214
80b20dd8 2215 next = deferred_flush[iommu_id].next;
2216 deferred_flush[iommu_id].domain[next] = dom;
2217 deferred_flush[iommu_id].iova[next] = iova;
2218 deferred_flush[iommu_id].next++;
5e0d2a6f 2219
2220 if (!timer_on) {
2221 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2222 timer_on = 1;
2223 }
2224 list_size++;
2225 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2226}
2227
5b6985ce
FY
2228void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2229 int dir)
ba395927 2230{
ba395927 2231 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
2232 struct dmar_domain *domain;
2233 unsigned long start_addr;
ba395927 2234 struct iova *iova;
8c11e798 2235 struct intel_iommu *iommu;
ba395927 2236
358dd8ac 2237 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 2238 return;
ba395927
KA
2239 domain = find_domain(pdev);
2240 BUG_ON(!domain);
2241
8c11e798
WH
2242 iommu = domain_get_iommu(domain);
2243
ba395927 2244 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 2245 if (!iova)
ba395927 2246 return;
ba395927 2247
5b6985ce 2248 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2249 size = aligned_size((u64)dev_addr, size);
ba395927 2250
f76aec76 2251 pr_debug("Device %s unmapping: %lx@%llx\n",
5b6985ce 2252 pci_name(pdev), size, (unsigned long long)start_addr);
ba395927 2253
f76aec76
KA
2254 /* clear the whole page */
2255 dma_pte_clear_range(domain, start_addr, start_addr + size);
2256 /* free page tables */
2257 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
5e0d2a6f 2258 if (intel_iommu_strict) {
8c11e798 2259 if (iommu_flush_iotlb_psi(iommu,
5b6985ce 2260 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
8c11e798 2261 iommu_flush_write_buffer(iommu);
5e0d2a6f 2262 /* free iova */
2263 __free_iova(&domain->iovad, iova);
2264 } else {
2265 add_unmap(domain, iova);
2266 /*
2267 * queue up the release of the unmap to save the 1/6th of the
2268 * cpu used up by the iotlb flush operation...
2269 */
5e0d2a6f 2270 }
ba395927
KA
2271}
2272
5b6985ce
FY
2273void *intel_alloc_coherent(struct device *hwdev, size_t size,
2274 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2275{
2276 void *vaddr;
2277 int order;
2278
5b6985ce 2279 size = PAGE_ALIGN(size);
ba395927
KA
2280 order = get_order(size);
2281 flags &= ~(GFP_DMA | GFP_DMA32);
2282
2283 vaddr = (void *)__get_free_pages(flags, order);
2284 if (!vaddr)
2285 return NULL;
2286 memset(vaddr, 0, size);
2287
bb9e6d65
FT
2288 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2289 DMA_BIDIRECTIONAL,
2290 hwdev->coherent_dma_mask);
ba395927
KA
2291 if (*dma_handle)
2292 return vaddr;
2293 free_pages((unsigned long)vaddr, order);
2294 return NULL;
2295}
2296
5b6985ce
FY
2297void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2298 dma_addr_t dma_handle)
ba395927
KA
2299{
2300 int order;
2301
5b6985ce 2302 size = PAGE_ALIGN(size);
ba395927
KA
2303 order = get_order(size);
2304
2305 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2306 free_pages((unsigned long)vaddr, order);
2307}
2308
12d4d40e 2309#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
5b6985ce
FY
2310
2311void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2312 int nelems, int dir)
ba395927
KA
2313{
2314 int i;
2315 struct pci_dev *pdev = to_pci_dev(hwdev);
2316 struct dmar_domain *domain;
f76aec76
KA
2317 unsigned long start_addr;
2318 struct iova *iova;
2319 size_t size = 0;
2320 void *addr;
c03ab37c 2321 struct scatterlist *sg;
8c11e798 2322 struct intel_iommu *iommu;
ba395927 2323
358dd8ac 2324 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2325 return;
2326
2327 domain = find_domain(pdev);
8c11e798
WH
2328 BUG_ON(!domain);
2329
2330 iommu = domain_get_iommu(domain);
ba395927 2331
c03ab37c 2332 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2333 if (!iova)
2334 return;
c03ab37c 2335 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2336 addr = SG_ENT_VIRT_ADDRESS(sg);
2337 size += aligned_size((u64)addr, sg->length);
2338 }
2339
5b6985ce 2340 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76
KA
2341
2342 /* clear the whole page */
2343 dma_pte_clear_range(domain, start_addr, start_addr + size);
2344 /* free page tables */
2345 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2346
8c11e798 2347 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
5b6985ce 2348 size >> VTD_PAGE_SHIFT, 0))
8c11e798 2349 iommu_flush_write_buffer(iommu);
f76aec76
KA
2350
2351 /* free iova */
2352 __free_iova(&domain->iovad, iova);
ba395927
KA
2353}
2354
ba395927 2355static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2356 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2357{
2358 int i;
c03ab37c 2359 struct scatterlist *sg;
ba395927 2360
c03ab37c 2361 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2362 BUG_ON(!sg_page(sg));
c03ab37c
FT
2363 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2364 sg->dma_length = sg->length;
ba395927
KA
2365 }
2366 return nelems;
2367}
2368
5b6985ce
FY
2369int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2370 int dir)
ba395927
KA
2371{
2372 void *addr;
2373 int i;
ba395927
KA
2374 struct pci_dev *pdev = to_pci_dev(hwdev);
2375 struct dmar_domain *domain;
f76aec76
KA
2376 size_t size = 0;
2377 int prot = 0;
2378 size_t offset = 0;
2379 struct iova *iova = NULL;
2380 int ret;
c03ab37c 2381 struct scatterlist *sg;
f76aec76 2382 unsigned long start_addr;
8c11e798 2383 struct intel_iommu *iommu;
ba395927
KA
2384
2385 BUG_ON(dir == DMA_NONE);
358dd8ac 2386 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2387 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2388
f76aec76
KA
2389 domain = get_valid_domain_for_dev(pdev);
2390 if (!domain)
2391 return 0;
2392
8c11e798
WH
2393 iommu = domain_get_iommu(domain);
2394
c03ab37c 2395 for_each_sg(sglist, sg, nelems, i) {
ba395927 2396 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2397 addr = (void *)virt_to_phys(addr);
2398 size += aligned_size((u64)addr, sg->length);
2399 }
2400
bb9e6d65 2401 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76 2402 if (!iova) {
c03ab37c 2403 sglist->dma_length = 0;
f76aec76
KA
2404 return 0;
2405 }
2406
2407 /*
2408 * Check if DMAR supports zero-length reads on write only
2409 * mappings..
2410 */
2411 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2412 !cap_zlr(iommu->cap))
f76aec76
KA
2413 prot |= DMA_PTE_READ;
2414 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2415 prot |= DMA_PTE_WRITE;
2416
5b6985ce 2417 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2418 offset = 0;
c03ab37c 2419 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2420 addr = SG_ENT_VIRT_ADDRESS(sg);
2421 addr = (void *)virt_to_phys(addr);
2422 size = aligned_size((u64)addr, sg->length);
2423 ret = domain_page_mapping(domain, start_addr + offset,
5b6985ce 2424 ((u64)addr) & PAGE_MASK,
f76aec76
KA
2425 size, prot);
2426 if (ret) {
2427 /* clear the page */
2428 dma_pte_clear_range(domain, start_addr,
2429 start_addr + offset);
2430 /* free page tables */
2431 dma_pte_free_pagetable(domain, start_addr,
2432 start_addr + offset);
2433 /* free iova */
2434 __free_iova(&domain->iovad, iova);
ba395927
KA
2435 return 0;
2436 }
f76aec76 2437 sg->dma_address = start_addr + offset +
5b6985ce 2438 ((u64)addr & (~PAGE_MASK));
ba395927 2439 sg->dma_length = sg->length;
f76aec76 2440 offset += size;
ba395927
KA
2441 }
2442
ba395927 2443 /* it's a non-present to present mapping */
8c11e798 2444 if (iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2445 start_addr, offset >> VTD_PAGE_SHIFT, 1))
8c11e798 2446 iommu_flush_write_buffer(iommu);
ba395927
KA
2447 return nelems;
2448}
2449
2450static struct dma_mapping_ops intel_dma_ops = {
2451 .alloc_coherent = intel_alloc_coherent,
2452 .free_coherent = intel_free_coherent,
2453 .map_single = intel_map_single,
2454 .unmap_single = intel_unmap_single,
2455 .map_sg = intel_map_sg,
2456 .unmap_sg = intel_unmap_sg,
2457};
2458
2459static inline int iommu_domain_cache_init(void)
2460{
2461 int ret = 0;
2462
2463 iommu_domain_cache = kmem_cache_create("iommu_domain",
2464 sizeof(struct dmar_domain),
2465 0,
2466 SLAB_HWCACHE_ALIGN,
2467
2468 NULL);
2469 if (!iommu_domain_cache) {
2470 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2471 ret = -ENOMEM;
2472 }
2473
2474 return ret;
2475}
2476
2477static inline int iommu_devinfo_cache_init(void)
2478{
2479 int ret = 0;
2480
2481 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2482 sizeof(struct device_domain_info),
2483 0,
2484 SLAB_HWCACHE_ALIGN,
ba395927
KA
2485 NULL);
2486 if (!iommu_devinfo_cache) {
2487 printk(KERN_ERR "Couldn't create devinfo cache\n");
2488 ret = -ENOMEM;
2489 }
2490
2491 return ret;
2492}
2493
2494static inline int iommu_iova_cache_init(void)
2495{
2496 int ret = 0;
2497
2498 iommu_iova_cache = kmem_cache_create("iommu_iova",
2499 sizeof(struct iova),
2500 0,
2501 SLAB_HWCACHE_ALIGN,
ba395927
KA
2502 NULL);
2503 if (!iommu_iova_cache) {
2504 printk(KERN_ERR "Couldn't create iova cache\n");
2505 ret = -ENOMEM;
2506 }
2507
2508 return ret;
2509}
2510
2511static int __init iommu_init_mempool(void)
2512{
2513 int ret;
2514 ret = iommu_iova_cache_init();
2515 if (ret)
2516 return ret;
2517
2518 ret = iommu_domain_cache_init();
2519 if (ret)
2520 goto domain_error;
2521
2522 ret = iommu_devinfo_cache_init();
2523 if (!ret)
2524 return ret;
2525
2526 kmem_cache_destroy(iommu_domain_cache);
2527domain_error:
2528 kmem_cache_destroy(iommu_iova_cache);
2529
2530 return -ENOMEM;
2531}
2532
2533static void __init iommu_exit_mempool(void)
2534{
2535 kmem_cache_destroy(iommu_devinfo_cache);
2536 kmem_cache_destroy(iommu_domain_cache);
2537 kmem_cache_destroy(iommu_iova_cache);
2538
2539}
2540
ba395927
KA
2541static void __init init_no_remapping_devices(void)
2542{
2543 struct dmar_drhd_unit *drhd;
2544
2545 for_each_drhd_unit(drhd) {
2546 if (!drhd->include_all) {
2547 int i;
2548 for (i = 0; i < drhd->devices_cnt; i++)
2549 if (drhd->devices[i] != NULL)
2550 break;
2551 /* ignore DMAR unit if no pci devices exist */
2552 if (i == drhd->devices_cnt)
2553 drhd->ignored = 1;
2554 }
2555 }
2556
2557 if (dmar_map_gfx)
2558 return;
2559
2560 for_each_drhd_unit(drhd) {
2561 int i;
2562 if (drhd->ignored || drhd->include_all)
2563 continue;
2564
2565 for (i = 0; i < drhd->devices_cnt; i++)
2566 if (drhd->devices[i] &&
2567 !IS_GFX_DEVICE(drhd->devices[i]))
2568 break;
2569
2570 if (i < drhd->devices_cnt)
2571 continue;
2572
2573 /* bypass IOMMU if it is just for gfx devices */
2574 drhd->ignored = 1;
2575 for (i = 0; i < drhd->devices_cnt; i++) {
2576 if (!drhd->devices[i])
2577 continue;
358dd8ac 2578 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2579 }
2580 }
2581}
2582
2583int __init intel_iommu_init(void)
2584{
2585 int ret = 0;
2586
ba395927
KA
2587 if (dmar_table_init())
2588 return -ENODEV;
2589
1886e8a9
SS
2590 if (dmar_dev_scope_init())
2591 return -ENODEV;
2592
2ae21010
SS
2593 /*
2594 * Check the need for DMA-remapping initialization now.
2595 * Above initialization will also be used by Interrupt-remapping.
2596 */
2597 if (no_iommu || swiotlb || dmar_disabled)
2598 return -ENODEV;
2599
ba395927
KA
2600 iommu_init_mempool();
2601 dmar_init_reserved_ranges();
2602
2603 init_no_remapping_devices();
2604
2605 ret = init_dmars();
2606 if (ret) {
2607 printk(KERN_ERR "IOMMU: dmar init failed\n");
2608 put_iova_domain(&reserved_iova_list);
2609 iommu_exit_mempool();
2610 return ret;
2611 }
2612 printk(KERN_INFO
2613 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2614
5e0d2a6f 2615 init_timer(&unmap_timer);
ba395927
KA
2616 force_iommu = 1;
2617 dma_ops = &intel_dma_ops;
2618 return 0;
2619}
e820482c 2620
38717946
KA
2621void intel_iommu_domain_exit(struct dmar_domain *domain)
2622{
2623 u64 end;
2624
2625 /* Domain 0 is reserved, so dont process it */
2626 if (!domain)
2627 return;
2628
2629 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 2630 end = end & (~VTD_PAGE_MASK);
38717946
KA
2631
2632 /* clear ptes */
2633 dma_pte_clear_range(domain, 0, end);
2634
2635 /* free page tables */
2636 dma_pte_free_pagetable(domain, 0, end);
2637
2638 iommu_free_domain(domain);
2639 free_domain_mem(domain);
2640}
2641EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2642
2643struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2644{
2645 struct dmar_drhd_unit *drhd;
2646 struct dmar_domain *domain;
2647 struct intel_iommu *iommu;
2648
2649 drhd = dmar_find_matched_drhd_unit(pdev);
2650 if (!drhd) {
2651 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2652 return NULL;
2653 }
2654
2655 iommu = drhd->iommu;
2656 if (!iommu) {
2657 printk(KERN_ERR
2658 "intel_iommu_domain_alloc: iommu == NULL\n");
2659 return NULL;
2660 }
2661 domain = iommu_alloc_domain(iommu);
2662 if (!domain) {
2663 printk(KERN_ERR
2664 "intel_iommu_domain_alloc: domain == NULL\n");
2665 return NULL;
2666 }
2667 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2668 printk(KERN_ERR
2669 "intel_iommu_domain_alloc: domain_init() failed\n");
2670 intel_iommu_domain_exit(domain);
2671 return NULL;
2672 }
2673 return domain;
2674}
2675EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2676
2677int intel_iommu_context_mapping(
2678 struct dmar_domain *domain, struct pci_dev *pdev)
2679{
2680 int rc;
2681 rc = domain_context_mapping(domain, pdev);
2682 return rc;
2683}
2684EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2685
2686int intel_iommu_page_mapping(
2687 struct dmar_domain *domain, dma_addr_t iova,
2688 u64 hpa, size_t size, int prot)
2689{
2690 int rc;
2691 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2692 return rc;
2693}
2694EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2695
2696void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2697{
2698 detach_domain_for_dev(domain, bus, devfn);
2699}
2700EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2701
2702struct dmar_domain *
2703intel_iommu_find_domain(struct pci_dev *pdev)
2704{
2705 return find_domain(pdev);
2706}
2707EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2708
2709int intel_iommu_found(void)
2710{
2711 return g_num_of_iommus;
2712}
2713EXPORT_SYMBOL_GPL(intel_iommu_found);
2714
2715u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2716{
2717 struct dma_pte *pte;
2718 u64 pfn;
2719
2720 pfn = 0;
2721 pte = addr_to_dma_pte(domain, iova);
2722
2723 if (pte)
19c239ce 2724 pfn = dma_pte_addr(pte);
38717946 2725
5b6985ce 2726 return pfn >> VTD_PAGE_SHIFT;
38717946
KA
2727}
2728EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);