]> git.ipfire.org Git - people/arne_f/kernel.git/blame - drivers/pci/intel-iommu.c
Add domain flag DOMAIN_FLAG_VIRTUAL_MACHINE
[people/arne_f/kernel.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946
KA
36#include <linux/iova.h>
37#include <linux/intel-iommu.h>
ba395927 38#include <asm/cacheflush.h>
46a7fa27 39#include <asm/iommu.h>
ba395927
KA
40#include "pci.h"
41
5b6985ce
FY
42#define ROOT_SIZE VTD_PAGE_SIZE
43#define CONTEXT_SIZE VTD_PAGE_SIZE
44
ba395927
KA
45#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48#define IOAPIC_RANGE_START (0xfee00000)
49#define IOAPIC_RANGE_END (0xfeefffff)
50#define IOVA_START_ADDR (0x1000)
51
52#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
ba395927
KA
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
f27be03b
MM
56#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
5e0d2a6f 59
d9630fe9
WH
60/* global iommu list, set NULL for ignored DMAR units */
61static struct intel_iommu **g_iommus;
62
46b08e1a
MM
63/*
64 * 0: Present
65 * 1-11: Reserved
66 * 12-63: Context Ptr (12 - (haw-1))
67 * 64-127: Reserved
68 */
69struct root_entry {
70 u64 val;
71 u64 rsvd1;
72};
73#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
74static inline bool root_present(struct root_entry *root)
75{
76 return (root->val & 1);
77}
78static inline void set_root_present(struct root_entry *root)
79{
80 root->val |= 1;
81}
82static inline void set_root_value(struct root_entry *root, unsigned long value)
83{
84 root->val |= value & VTD_PAGE_MASK;
85}
86
87static inline struct context_entry *
88get_context_addr_from_root(struct root_entry *root)
89{
90 return (struct context_entry *)
91 (root_present(root)?phys_to_virt(
92 root->val & VTD_PAGE_MASK) :
93 NULL);
94}
95
7a8fc25e
MM
96/*
97 * low 64 bits:
98 * 0: present
99 * 1: fault processing disable
100 * 2-3: translation type
101 * 12-63: address space root
102 * high 64 bits:
103 * 0-2: address width
104 * 3-6: aval
105 * 8-23: domain id
106 */
107struct context_entry {
108 u64 lo;
109 u64 hi;
110};
c07e7d21
MM
111
112static inline bool context_present(struct context_entry *context)
113{
114 return (context->lo & 1);
115}
116static inline void context_set_present(struct context_entry *context)
117{
118 context->lo |= 1;
119}
120
121static inline void context_set_fault_enable(struct context_entry *context)
122{
123 context->lo &= (((u64)-1) << 2) | 1;
124}
125
7a8fc25e 126#define CONTEXT_TT_MULTI_LEVEL 0
c07e7d21
MM
127
128static inline void context_set_translation_type(struct context_entry *context,
129 unsigned long value)
130{
131 context->lo &= (((u64)-1) << 4) | 3;
132 context->lo |= (value & 3) << 2;
133}
134
135static inline void context_set_address_root(struct context_entry *context,
136 unsigned long value)
137{
138 context->lo |= value & VTD_PAGE_MASK;
139}
140
141static inline void context_set_address_width(struct context_entry *context,
142 unsigned long value)
143{
144 context->hi |= value & 7;
145}
146
147static inline void context_set_domain_id(struct context_entry *context,
148 unsigned long value)
149{
150 context->hi |= (value & ((1 << 16) - 1)) << 8;
151}
152
153static inline void context_clear_entry(struct context_entry *context)
154{
155 context->lo = 0;
156 context->hi = 0;
157}
7a8fc25e 158
622ba12a
MM
159/*
160 * 0: readable
161 * 1: writable
162 * 2-6: reserved
163 * 7: super page
164 * 8-11: available
165 * 12-63: Host physcial address
166 */
167struct dma_pte {
168 u64 val;
169};
622ba12a 170
19c239ce
MM
171static inline void dma_clear_pte(struct dma_pte *pte)
172{
173 pte->val = 0;
174}
175
176static inline void dma_set_pte_readable(struct dma_pte *pte)
177{
178 pte->val |= DMA_PTE_READ;
179}
180
181static inline void dma_set_pte_writable(struct dma_pte *pte)
182{
183 pte->val |= DMA_PTE_WRITE;
184}
185
186static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
187{
188 pte->val = (pte->val & ~3) | (prot & 3);
189}
190
191static inline u64 dma_pte_addr(struct dma_pte *pte)
192{
193 return (pte->val & VTD_PAGE_MASK);
194}
195
196static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
197{
198 pte->val |= (addr & VTD_PAGE_MASK);
199}
200
201static inline bool dma_pte_present(struct dma_pte *pte)
202{
203 return (pte->val & 3) != 0;
204}
622ba12a 205
3b5410e7
WH
206/* devices under the same p2p bridge are owned in one domain */
207#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
208
1ce28feb
WH
209/* domain represents a virtual machine, more than one devices
210 * across iommus may be owned in one domain, e.g. kvm guest.
211 */
212#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
213
99126f7c
MM
214struct dmar_domain {
215 int id; /* domain id */
8c11e798 216 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
217
218 struct list_head devices; /* all devices' list */
219 struct iova_domain iovad; /* iova's that belong to this domain */
220
221 struct dma_pte *pgd; /* virtual address */
222 spinlock_t mapping_lock; /* page table lock */
223 int gaw; /* max guest address width */
224
225 /* adjusted guest address width, 0 is level 2 30-bit */
226 int agaw;
227
3b5410e7 228 int flags; /* flags to find out type of domain */
8e604097
WH
229
230 int iommu_coherency;/* indicate coherency of iommu access */
99126f7c
MM
231};
232
a647dacb
MM
233/* PCI domain-device relationship */
234struct device_domain_info {
235 struct list_head link; /* link to domain siblings */
236 struct list_head global; /* link to global list */
237 u8 bus; /* PCI bus numer */
238 u8 devfn; /* PCI devfn number */
239 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
240 struct dmar_domain *domain; /* pointer to domain */
241};
242
5e0d2a6f 243static void flush_unmaps_timeout(unsigned long data);
244
245DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
246
80b20dd8 247#define HIGH_WATER_MARK 250
248struct deferred_flush_tables {
249 int next;
250 struct iova *iova[HIGH_WATER_MARK];
251 struct dmar_domain *domain[HIGH_WATER_MARK];
252};
253
254static struct deferred_flush_tables *deferred_flush;
255
5e0d2a6f 256/* bitmap for indexing intel_iommus */
5e0d2a6f 257static int g_num_of_iommus;
258
259static DEFINE_SPINLOCK(async_umap_flush_lock);
260static LIST_HEAD(unmaps_to_do);
261
262static int timer_on;
263static long list_size;
5e0d2a6f 264
ba395927
KA
265static void domain_remove_dev_info(struct dmar_domain *domain);
266
2ae21010 267int dmar_disabled;
ba395927 268static int __initdata dmar_map_gfx = 1;
7d3b03ce 269static int dmar_forcedac;
5e0d2a6f 270static int intel_iommu_strict;
ba395927
KA
271
272#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
273static DEFINE_SPINLOCK(device_domain_lock);
274static LIST_HEAD(device_domain_list);
275
276static int __init intel_iommu_setup(char *str)
277{
278 if (!str)
279 return -EINVAL;
280 while (*str) {
281 if (!strncmp(str, "off", 3)) {
282 dmar_disabled = 1;
283 printk(KERN_INFO"Intel-IOMMU: disabled\n");
284 } else if (!strncmp(str, "igfx_off", 8)) {
285 dmar_map_gfx = 0;
286 printk(KERN_INFO
287 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 288 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 289 printk(KERN_INFO
7d3b03ce
KA
290 "Intel-IOMMU: Forcing DAC for PCI devices\n");
291 dmar_forcedac = 1;
5e0d2a6f 292 } else if (!strncmp(str, "strict", 6)) {
293 printk(KERN_INFO
294 "Intel-IOMMU: disable batched IOTLB flush\n");
295 intel_iommu_strict = 1;
ba395927
KA
296 }
297
298 str += strcspn(str, ",");
299 while (*str == ',')
300 str++;
301 }
302 return 0;
303}
304__setup("intel_iommu=", intel_iommu_setup);
305
306static struct kmem_cache *iommu_domain_cache;
307static struct kmem_cache *iommu_devinfo_cache;
308static struct kmem_cache *iommu_iova_cache;
309
eb3fa7cb
KA
310static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
311{
312 unsigned int flags;
313 void *vaddr;
314
315 /* trying to avoid low memory issues */
316 flags = current->flags & PF_MEMALLOC;
317 current->flags |= PF_MEMALLOC;
318 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
319 current->flags &= (~PF_MEMALLOC | flags);
320 return vaddr;
321}
322
323
ba395927
KA
324static inline void *alloc_pgtable_page(void)
325{
eb3fa7cb
KA
326 unsigned int flags;
327 void *vaddr;
328
329 /* trying to avoid low memory issues */
330 flags = current->flags & PF_MEMALLOC;
331 current->flags |= PF_MEMALLOC;
332 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
333 current->flags &= (~PF_MEMALLOC | flags);
334 return vaddr;
ba395927
KA
335}
336
337static inline void free_pgtable_page(void *vaddr)
338{
339 free_page((unsigned long)vaddr);
340}
341
342static inline void *alloc_domain_mem(void)
343{
eb3fa7cb 344 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
345}
346
38717946 347static void free_domain_mem(void *vaddr)
ba395927
KA
348{
349 kmem_cache_free(iommu_domain_cache, vaddr);
350}
351
352static inline void * alloc_devinfo_mem(void)
353{
eb3fa7cb 354 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
355}
356
357static inline void free_devinfo_mem(void *vaddr)
358{
359 kmem_cache_free(iommu_devinfo_cache, vaddr);
360}
361
362struct iova *alloc_iova_mem(void)
363{
eb3fa7cb 364 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
365}
366
367void free_iova_mem(struct iova *iova)
368{
369 kmem_cache_free(iommu_iova_cache, iova);
370}
371
1b573683
WH
372
373static inline int width_to_agaw(int width);
374
375/* calculate agaw for each iommu.
376 * "SAGAW" may be different across iommus, use a default agaw, and
377 * get a supported less agaw for iommus that don't support the default agaw.
378 */
379int iommu_calculate_agaw(struct intel_iommu *iommu)
380{
381 unsigned long sagaw;
382 int agaw = -1;
383
384 sagaw = cap_sagaw(iommu->cap);
385 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
386 agaw >= 0; agaw--) {
387 if (test_bit(agaw, &sagaw))
388 break;
389 }
390
391 return agaw;
392}
393
8c11e798
WH
394/* in native case, each domain is related to only one iommu */
395static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
396{
397 int iommu_id;
398
1ce28feb
WH
399 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
400
8c11e798
WH
401 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
402 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
403 return NULL;
404
405 return g_iommus[iommu_id];
406}
407
8e604097
WH
408/* "Coherency" capability may be different across iommus */
409static void domain_update_iommu_coherency(struct dmar_domain *domain)
410{
411 int i;
412
413 domain->iommu_coherency = 1;
414
415 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
416 for (; i < g_num_of_iommus; ) {
417 if (!ecap_coherent(g_iommus[i]->ecap)) {
418 domain->iommu_coherency = 0;
419 break;
420 }
421 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
422 }
423}
424
ba395927
KA
425/* Gets context entry for a given bus and devfn */
426static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
427 u8 bus, u8 devfn)
428{
429 struct root_entry *root;
430 struct context_entry *context;
431 unsigned long phy_addr;
432 unsigned long flags;
433
434 spin_lock_irqsave(&iommu->lock, flags);
435 root = &iommu->root_entry[bus];
436 context = get_context_addr_from_root(root);
437 if (!context) {
438 context = (struct context_entry *)alloc_pgtable_page();
439 if (!context) {
440 spin_unlock_irqrestore(&iommu->lock, flags);
441 return NULL;
442 }
5b6985ce 443 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
444 phy_addr = virt_to_phys((void *)context);
445 set_root_value(root, phy_addr);
446 set_root_present(root);
447 __iommu_flush_cache(iommu, root, sizeof(*root));
448 }
449 spin_unlock_irqrestore(&iommu->lock, flags);
450 return &context[devfn];
451}
452
453static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
454{
455 struct root_entry *root;
456 struct context_entry *context;
457 int ret;
458 unsigned long flags;
459
460 spin_lock_irqsave(&iommu->lock, flags);
461 root = &iommu->root_entry[bus];
462 context = get_context_addr_from_root(root);
463 if (!context) {
464 ret = 0;
465 goto out;
466 }
c07e7d21 467 ret = context_present(&context[devfn]);
ba395927
KA
468out:
469 spin_unlock_irqrestore(&iommu->lock, flags);
470 return ret;
471}
472
473static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
474{
475 struct root_entry *root;
476 struct context_entry *context;
477 unsigned long flags;
478
479 spin_lock_irqsave(&iommu->lock, flags);
480 root = &iommu->root_entry[bus];
481 context = get_context_addr_from_root(root);
482 if (context) {
c07e7d21 483 context_clear_entry(&context[devfn]);
ba395927
KA
484 __iommu_flush_cache(iommu, &context[devfn], \
485 sizeof(*context));
486 }
487 spin_unlock_irqrestore(&iommu->lock, flags);
488}
489
490static void free_context_table(struct intel_iommu *iommu)
491{
492 struct root_entry *root;
493 int i;
494 unsigned long flags;
495 struct context_entry *context;
496
497 spin_lock_irqsave(&iommu->lock, flags);
498 if (!iommu->root_entry) {
499 goto out;
500 }
501 for (i = 0; i < ROOT_ENTRY_NR; i++) {
502 root = &iommu->root_entry[i];
503 context = get_context_addr_from_root(root);
504 if (context)
505 free_pgtable_page(context);
506 }
507 free_pgtable_page(iommu->root_entry);
508 iommu->root_entry = NULL;
509out:
510 spin_unlock_irqrestore(&iommu->lock, flags);
511}
512
513/* page table handling */
514#define LEVEL_STRIDE (9)
515#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
516
517static inline int agaw_to_level(int agaw)
518{
519 return agaw + 2;
520}
521
522static inline int agaw_to_width(int agaw)
523{
524 return 30 + agaw * LEVEL_STRIDE;
525
526}
527
528static inline int width_to_agaw(int width)
529{
530 return (width - 30) / LEVEL_STRIDE;
531}
532
533static inline unsigned int level_to_offset_bits(int level)
534{
535 return (12 + (level - 1) * LEVEL_STRIDE);
536}
537
538static inline int address_level_offset(u64 addr, int level)
539{
540 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
541}
542
543static inline u64 level_mask(int level)
544{
545 return ((u64)-1 << level_to_offset_bits(level));
546}
547
548static inline u64 level_size(int level)
549{
550 return ((u64)1 << level_to_offset_bits(level));
551}
552
553static inline u64 align_to_level(u64 addr, int level)
554{
555 return ((addr + level_size(level) - 1) & level_mask(level));
556}
557
558static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
559{
560 int addr_width = agaw_to_width(domain->agaw);
561 struct dma_pte *parent, *pte = NULL;
562 int level = agaw_to_level(domain->agaw);
563 int offset;
564 unsigned long flags;
8c11e798 565 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
566
567 BUG_ON(!domain->pgd);
568
569 addr &= (((u64)1) << addr_width) - 1;
570 parent = domain->pgd;
571
572 spin_lock_irqsave(&domain->mapping_lock, flags);
573 while (level > 0) {
574 void *tmp_page;
575
576 offset = address_level_offset(addr, level);
577 pte = &parent[offset];
578 if (level == 1)
579 break;
580
19c239ce 581 if (!dma_pte_present(pte)) {
ba395927
KA
582 tmp_page = alloc_pgtable_page();
583
584 if (!tmp_page) {
585 spin_unlock_irqrestore(&domain->mapping_lock,
586 flags);
587 return NULL;
588 }
8c11e798 589 __iommu_flush_cache(iommu, tmp_page,
5b6985ce 590 PAGE_SIZE);
19c239ce 591 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
ba395927
KA
592 /*
593 * high level table always sets r/w, last level page
594 * table control read/write
595 */
19c239ce
MM
596 dma_set_pte_readable(pte);
597 dma_set_pte_writable(pte);
8c11e798 598 __iommu_flush_cache(iommu, pte, sizeof(*pte));
ba395927 599 }
19c239ce 600 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
601 level--;
602 }
603
604 spin_unlock_irqrestore(&domain->mapping_lock, flags);
605 return pte;
606}
607
608/* return address's pte at specific level */
609static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
610 int level)
611{
612 struct dma_pte *parent, *pte = NULL;
613 int total = agaw_to_level(domain->agaw);
614 int offset;
615
616 parent = domain->pgd;
617 while (level <= total) {
618 offset = address_level_offset(addr, total);
619 pte = &parent[offset];
620 if (level == total)
621 return pte;
622
19c239ce 623 if (!dma_pte_present(pte))
ba395927 624 break;
19c239ce 625 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
626 total--;
627 }
628 return NULL;
629}
630
631/* clear one page's page table */
632static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
633{
634 struct dma_pte *pte = NULL;
8c11e798 635 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
636
637 /* get last level pte */
638 pte = dma_addr_level_pte(domain, addr, 1);
639
640 if (pte) {
19c239ce 641 dma_clear_pte(pte);
8c11e798 642 __iommu_flush_cache(iommu, pte, sizeof(*pte));
ba395927
KA
643 }
644}
645
646/* clear last level pte, a tlb flush should be followed */
647static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
648{
649 int addr_width = agaw_to_width(domain->agaw);
650
651 start &= (((u64)1) << addr_width) - 1;
652 end &= (((u64)1) << addr_width) - 1;
653 /* in case it's partial page */
5b6985ce
FY
654 start = PAGE_ALIGN(start);
655 end &= PAGE_MASK;
ba395927
KA
656
657 /* we don't need lock here, nobody else touches the iova range */
658 while (start < end) {
659 dma_pte_clear_one(domain, start);
5b6985ce 660 start += VTD_PAGE_SIZE;
ba395927
KA
661 }
662}
663
664/* free page table pages. last level pte should already be cleared */
665static void dma_pte_free_pagetable(struct dmar_domain *domain,
666 u64 start, u64 end)
667{
668 int addr_width = agaw_to_width(domain->agaw);
669 struct dma_pte *pte;
670 int total = agaw_to_level(domain->agaw);
671 int level;
672 u64 tmp;
8c11e798 673 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
674
675 start &= (((u64)1) << addr_width) - 1;
676 end &= (((u64)1) << addr_width) - 1;
677
678 /* we don't need lock here, nobody else touches the iova range */
679 level = 2;
680 while (level <= total) {
681 tmp = align_to_level(start, level);
682 if (tmp >= end || (tmp + level_size(level) > end))
683 return;
684
685 while (tmp < end) {
686 pte = dma_addr_level_pte(domain, tmp, level);
687 if (pte) {
688 free_pgtable_page(
19c239ce
MM
689 phys_to_virt(dma_pte_addr(pte)));
690 dma_clear_pte(pte);
8c11e798 691 __iommu_flush_cache(iommu,
ba395927
KA
692 pte, sizeof(*pte));
693 }
694 tmp += level_size(level);
695 }
696 level++;
697 }
698 /* free pgd */
699 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
700 free_pgtable_page(domain->pgd);
701 domain->pgd = NULL;
702 }
703}
704
705/* iommu handling */
706static int iommu_alloc_root_entry(struct intel_iommu *iommu)
707{
708 struct root_entry *root;
709 unsigned long flags;
710
711 root = (struct root_entry *)alloc_pgtable_page();
712 if (!root)
713 return -ENOMEM;
714
5b6985ce 715 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
716
717 spin_lock_irqsave(&iommu->lock, flags);
718 iommu->root_entry = root;
719 spin_unlock_irqrestore(&iommu->lock, flags);
720
721 return 0;
722}
723
ba395927
KA
724static void iommu_set_root_entry(struct intel_iommu *iommu)
725{
726 void *addr;
727 u32 cmd, sts;
728 unsigned long flag;
729
730 addr = iommu->root_entry;
731
732 spin_lock_irqsave(&iommu->register_lock, flag);
733 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
734
735 cmd = iommu->gcmd | DMA_GCMD_SRTP;
736 writel(cmd, iommu->reg + DMAR_GCMD_REG);
737
738 /* Make sure hardware complete it */
739 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
740 readl, (sts & DMA_GSTS_RTPS), sts);
741
742 spin_unlock_irqrestore(&iommu->register_lock, flag);
743}
744
745static void iommu_flush_write_buffer(struct intel_iommu *iommu)
746{
747 u32 val;
748 unsigned long flag;
749
750 if (!cap_rwbf(iommu->cap))
751 return;
752 val = iommu->gcmd | DMA_GCMD_WBF;
753
754 spin_lock_irqsave(&iommu->register_lock, flag);
755 writel(val, iommu->reg + DMAR_GCMD_REG);
756
757 /* Make sure hardware complete it */
758 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
759 readl, (!(val & DMA_GSTS_WBFS)), val);
760
761 spin_unlock_irqrestore(&iommu->register_lock, flag);
762}
763
764/* return value determine if we need a write buffer flush */
765static int __iommu_flush_context(struct intel_iommu *iommu,
766 u16 did, u16 source_id, u8 function_mask, u64 type,
767 int non_present_entry_flush)
768{
769 u64 val = 0;
770 unsigned long flag;
771
772 /*
773 * In the non-present entry flush case, if hardware doesn't cache
774 * non-present entry we do nothing and if hardware cache non-present
775 * entry, we flush entries of domain 0 (the domain id is used to cache
776 * any non-present entries)
777 */
778 if (non_present_entry_flush) {
779 if (!cap_caching_mode(iommu->cap))
780 return 1;
781 else
782 did = 0;
783 }
784
785 switch (type) {
786 case DMA_CCMD_GLOBAL_INVL:
787 val = DMA_CCMD_GLOBAL_INVL;
788 break;
789 case DMA_CCMD_DOMAIN_INVL:
790 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
791 break;
792 case DMA_CCMD_DEVICE_INVL:
793 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
794 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
795 break;
796 default:
797 BUG();
798 }
799 val |= DMA_CCMD_ICC;
800
801 spin_lock_irqsave(&iommu->register_lock, flag);
802 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
803
804 /* Make sure hardware complete it */
805 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
806 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
807
808 spin_unlock_irqrestore(&iommu->register_lock, flag);
809
4d235ba6 810 /* flush context entry will implicitly flush write buffer */
ba395927
KA
811 return 0;
812}
813
ba395927
KA
814/* return value determine if we need a write buffer flush */
815static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
816 u64 addr, unsigned int size_order, u64 type,
817 int non_present_entry_flush)
818{
819 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
820 u64 val = 0, val_iva = 0;
821 unsigned long flag;
822
823 /*
824 * In the non-present entry flush case, if hardware doesn't cache
825 * non-present entry we do nothing and if hardware cache non-present
826 * entry, we flush entries of domain 0 (the domain id is used to cache
827 * any non-present entries)
828 */
829 if (non_present_entry_flush) {
830 if (!cap_caching_mode(iommu->cap))
831 return 1;
832 else
833 did = 0;
834 }
835
836 switch (type) {
837 case DMA_TLB_GLOBAL_FLUSH:
838 /* global flush doesn't need set IVA_REG */
839 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
840 break;
841 case DMA_TLB_DSI_FLUSH:
842 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
843 break;
844 case DMA_TLB_PSI_FLUSH:
845 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
846 /* Note: always flush non-leaf currently */
847 val_iva = size_order | addr;
848 break;
849 default:
850 BUG();
851 }
852 /* Note: set drain read/write */
853#if 0
854 /*
855 * This is probably to be super secure.. Looks like we can
856 * ignore it without any impact.
857 */
858 if (cap_read_drain(iommu->cap))
859 val |= DMA_TLB_READ_DRAIN;
860#endif
861 if (cap_write_drain(iommu->cap))
862 val |= DMA_TLB_WRITE_DRAIN;
863
864 spin_lock_irqsave(&iommu->register_lock, flag);
865 /* Note: Only uses first TLB reg currently */
866 if (val_iva)
867 dmar_writeq(iommu->reg + tlb_offset, val_iva);
868 dmar_writeq(iommu->reg + tlb_offset + 8, val);
869
870 /* Make sure hardware complete it */
871 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
872 dmar_readq, (!(val & DMA_TLB_IVT)), val);
873
874 spin_unlock_irqrestore(&iommu->register_lock, flag);
875
876 /* check IOTLB invalidation granularity */
877 if (DMA_TLB_IAIG(val) == 0)
878 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
879 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
880 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
881 (unsigned long long)DMA_TLB_IIRG(type),
882 (unsigned long long)DMA_TLB_IAIG(val));
4d235ba6 883 /* flush iotlb entry will implicitly flush write buffer */
ba395927
KA
884 return 0;
885}
886
ba395927
KA
887static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
888 u64 addr, unsigned int pages, int non_present_entry_flush)
889{
f76aec76 890 unsigned int mask;
ba395927 891
5b6985ce 892 BUG_ON(addr & (~VTD_PAGE_MASK));
ba395927
KA
893 BUG_ON(pages == 0);
894
895 /* Fallback to domain selective flush if no PSI support */
896 if (!cap_pgsel_inv(iommu->cap))
a77b67d4
YS
897 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
898 DMA_TLB_DSI_FLUSH,
899 non_present_entry_flush);
ba395927
KA
900
901 /*
902 * PSI requires page size to be 2 ^ x, and the base address is naturally
903 * aligned to the size
904 */
f76aec76 905 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 906 /* Fallback to domain selective flush if size is too big */
f76aec76 907 if (mask > cap_max_amask_val(iommu->cap))
a77b67d4
YS
908 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
909 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
ba395927 910
a77b67d4
YS
911 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
912 DMA_TLB_PSI_FLUSH,
913 non_present_entry_flush);
ba395927
KA
914}
915
f8bab735 916static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
917{
918 u32 pmen;
919 unsigned long flags;
920
921 spin_lock_irqsave(&iommu->register_lock, flags);
922 pmen = readl(iommu->reg + DMAR_PMEN_REG);
923 pmen &= ~DMA_PMEN_EPM;
924 writel(pmen, iommu->reg + DMAR_PMEN_REG);
925
926 /* wait for the protected region status bit to clear */
927 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
928 readl, !(pmen & DMA_PMEN_PRS), pmen);
929
930 spin_unlock_irqrestore(&iommu->register_lock, flags);
931}
932
ba395927
KA
933static int iommu_enable_translation(struct intel_iommu *iommu)
934{
935 u32 sts;
936 unsigned long flags;
937
938 spin_lock_irqsave(&iommu->register_lock, flags);
939 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
940
941 /* Make sure hardware complete it */
942 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
943 readl, (sts & DMA_GSTS_TES), sts);
944
945 iommu->gcmd |= DMA_GCMD_TE;
946 spin_unlock_irqrestore(&iommu->register_lock, flags);
947 return 0;
948}
949
950static int iommu_disable_translation(struct intel_iommu *iommu)
951{
952 u32 sts;
953 unsigned long flag;
954
955 spin_lock_irqsave(&iommu->register_lock, flag);
956 iommu->gcmd &= ~DMA_GCMD_TE;
957 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
958
959 /* Make sure hardware complete it */
960 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
961 readl, (!(sts & DMA_GSTS_TES)), sts);
962
963 spin_unlock_irqrestore(&iommu->register_lock, flag);
964 return 0;
965}
966
3460a6d9
KA
967/* iommu interrupt handling. Most stuff are MSI-like. */
968
d94afc6c 969static const char *fault_reason_strings[] =
3460a6d9
KA
970{
971 "Software",
972 "Present bit in root entry is clear",
973 "Present bit in context entry is clear",
974 "Invalid context entry",
975 "Access beyond MGAW",
976 "PTE Write access is not set",
977 "PTE Read access is not set",
978 "Next page table ptr is invalid",
979 "Root table address invalid",
980 "Context table ptr is invalid",
981 "non-zero reserved fields in RTP",
982 "non-zero reserved fields in CTP",
983 "non-zero reserved fields in PTE",
3460a6d9 984};
f8bab735 985#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 986
d94afc6c 987const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 988{
d94afc6c 989 if (fault_reason > MAX_FAULT_REASON_IDX)
990 return "Unknown";
3460a6d9
KA
991 else
992 return fault_reason_strings[fault_reason];
993}
994
995void dmar_msi_unmask(unsigned int irq)
996{
997 struct intel_iommu *iommu = get_irq_data(irq);
998 unsigned long flag;
999
1000 /* unmask it */
1001 spin_lock_irqsave(&iommu->register_lock, flag);
1002 writel(0, iommu->reg + DMAR_FECTL_REG);
1003 /* Read a reg to force flush the post write */
1004 readl(iommu->reg + DMAR_FECTL_REG);
1005 spin_unlock_irqrestore(&iommu->register_lock, flag);
1006}
1007
1008void dmar_msi_mask(unsigned int irq)
1009{
1010 unsigned long flag;
1011 struct intel_iommu *iommu = get_irq_data(irq);
1012
1013 /* mask it */
1014 spin_lock_irqsave(&iommu->register_lock, flag);
1015 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1016 /* Read a reg to force flush the post write */
1017 readl(iommu->reg + DMAR_FECTL_REG);
1018 spin_unlock_irqrestore(&iommu->register_lock, flag);
1019}
1020
1021void dmar_msi_write(int irq, struct msi_msg *msg)
1022{
1023 struct intel_iommu *iommu = get_irq_data(irq);
1024 unsigned long flag;
1025
1026 spin_lock_irqsave(&iommu->register_lock, flag);
1027 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1028 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1029 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1030 spin_unlock_irqrestore(&iommu->register_lock, flag);
1031}
1032
1033void dmar_msi_read(int irq, struct msi_msg *msg)
1034{
1035 struct intel_iommu *iommu = get_irq_data(irq);
1036 unsigned long flag;
1037
1038 spin_lock_irqsave(&iommu->register_lock, flag);
1039 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1040 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1041 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1042 spin_unlock_irqrestore(&iommu->register_lock, flag);
1043}
1044
1045static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
5b6985ce 1046 u8 fault_reason, u16 source_id, unsigned long long addr)
3460a6d9 1047{
d94afc6c 1048 const char *reason;
3460a6d9
KA
1049
1050 reason = dmar_get_fault_reason(fault_reason);
1051
1052 printk(KERN_ERR
1053 "DMAR:[%s] Request device [%02x:%02x.%d] "
1054 "fault addr %llx \n"
1055 "DMAR:[fault reason %02d] %s\n",
1056 (type ? "DMA Read" : "DMA Write"),
1057 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1058 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1059 return 0;
1060}
1061
1062#define PRIMARY_FAULT_REG_LEN (16)
1063static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1064{
1065 struct intel_iommu *iommu = dev_id;
1066 int reg, fault_index;
1067 u32 fault_status;
1068 unsigned long flag;
1069
1070 spin_lock_irqsave(&iommu->register_lock, flag);
1071 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1072
1073 /* TBD: ignore advanced fault log currently */
1074 if (!(fault_status & DMA_FSTS_PPF))
1075 goto clear_overflow;
1076
1077 fault_index = dma_fsts_fault_record_index(fault_status);
1078 reg = cap_fault_reg_offset(iommu->cap);
1079 while (1) {
1080 u8 fault_reason;
1081 u16 source_id;
1082 u64 guest_addr;
1083 int type;
1084 u32 data;
1085
1086 /* highest 32 bits */
1087 data = readl(iommu->reg + reg +
1088 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1089 if (!(data & DMA_FRCD_F))
1090 break;
1091
1092 fault_reason = dma_frcd_fault_reason(data);
1093 type = dma_frcd_type(data);
1094
1095 data = readl(iommu->reg + reg +
1096 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1097 source_id = dma_frcd_source_id(data);
1098
1099 guest_addr = dmar_readq(iommu->reg + reg +
1100 fault_index * PRIMARY_FAULT_REG_LEN);
1101 guest_addr = dma_frcd_page_addr(guest_addr);
1102 /* clear the fault */
1103 writel(DMA_FRCD_F, iommu->reg + reg +
1104 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1105
1106 spin_unlock_irqrestore(&iommu->register_lock, flag);
1107
1108 iommu_page_fault_do_one(iommu, type, fault_reason,
1109 source_id, guest_addr);
1110
1111 fault_index++;
1112 if (fault_index > cap_num_fault_regs(iommu->cap))
1113 fault_index = 0;
1114 spin_lock_irqsave(&iommu->register_lock, flag);
1115 }
1116clear_overflow:
1117 /* clear primary fault overflow */
1118 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1119 if (fault_status & DMA_FSTS_PFO)
1120 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1121
1122 spin_unlock_irqrestore(&iommu->register_lock, flag);
1123 return IRQ_HANDLED;
1124}
1125
1126int dmar_set_interrupt(struct intel_iommu *iommu)
1127{
1128 int irq, ret;
1129
1130 irq = create_irq();
1131 if (!irq) {
1132 printk(KERN_ERR "IOMMU: no free vectors\n");
1133 return -EINVAL;
1134 }
1135
1136 set_irq_data(irq, iommu);
1137 iommu->irq = irq;
1138
1139 ret = arch_setup_dmar_msi(irq);
1140 if (ret) {
1141 set_irq_data(irq, NULL);
1142 iommu->irq = 0;
1143 destroy_irq(irq);
1144 return 0;
1145 }
1146
1147 /* Force fault register is cleared */
1148 iommu_page_fault(irq, iommu);
1149
1150 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1151 if (ret)
1152 printk(KERN_ERR "IOMMU: can't request irq\n");
1153 return ret;
1154}
1155
ba395927
KA
1156static int iommu_init_domains(struct intel_iommu *iommu)
1157{
1158 unsigned long ndomains;
1159 unsigned long nlongs;
1160
1161 ndomains = cap_ndoms(iommu->cap);
1162 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1163 nlongs = BITS_TO_LONGS(ndomains);
1164
1165 /* TBD: there might be 64K domains,
1166 * consider other allocation for future chip
1167 */
1168 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1169 if (!iommu->domain_ids) {
1170 printk(KERN_ERR "Allocating domain id array failed\n");
1171 return -ENOMEM;
1172 }
1173 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1174 GFP_KERNEL);
1175 if (!iommu->domains) {
1176 printk(KERN_ERR "Allocating domain array failed\n");
1177 kfree(iommu->domain_ids);
1178 return -ENOMEM;
1179 }
1180
e61d98d8
SS
1181 spin_lock_init(&iommu->lock);
1182
ba395927
KA
1183 /*
1184 * if Caching mode is set, then invalid translations are tagged
1185 * with domainid 0. Hence we need to pre-allocate it.
1186 */
1187 if (cap_caching_mode(iommu->cap))
1188 set_bit(0, iommu->domain_ids);
1189 return 0;
1190}
ba395927 1191
ba395927
KA
1192
1193static void domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1194
1195void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1196{
1197 struct dmar_domain *domain;
1198 int i;
1199
ba395927
KA
1200 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1201 for (; i < cap_ndoms(iommu->cap); ) {
1202 domain = iommu->domains[i];
1203 clear_bit(i, iommu->domain_ids);
1204 domain_exit(domain);
1205 i = find_next_bit(iommu->domain_ids,
1206 cap_ndoms(iommu->cap), i+1);
1207 }
1208
1209 if (iommu->gcmd & DMA_GCMD_TE)
1210 iommu_disable_translation(iommu);
1211
1212 if (iommu->irq) {
1213 set_irq_data(iommu->irq, NULL);
1214 /* This will mask the irq */
1215 free_irq(iommu->irq, iommu);
1216 destroy_irq(iommu->irq);
1217 }
1218
1219 kfree(iommu->domains);
1220 kfree(iommu->domain_ids);
1221
d9630fe9
WH
1222 g_iommus[iommu->seq_id] = NULL;
1223
1224 /* if all iommus are freed, free g_iommus */
1225 for (i = 0; i < g_num_of_iommus; i++) {
1226 if (g_iommus[i])
1227 break;
1228 }
1229
1230 if (i == g_num_of_iommus)
1231 kfree(g_iommus);
1232
ba395927
KA
1233 /* free context mapping */
1234 free_context_table(iommu);
ba395927
KA
1235}
1236
1237static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1238{
1239 unsigned long num;
1240 unsigned long ndomains;
1241 struct dmar_domain *domain;
1242 unsigned long flags;
1243
1244 domain = alloc_domain_mem();
1245 if (!domain)
1246 return NULL;
1247
1248 ndomains = cap_ndoms(iommu->cap);
1249
1250 spin_lock_irqsave(&iommu->lock, flags);
1251 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1252 if (num >= ndomains) {
1253 spin_unlock_irqrestore(&iommu->lock, flags);
1254 free_domain_mem(domain);
1255 printk(KERN_ERR "IOMMU: no free domain ids\n");
1256 return NULL;
1257 }
1258
1259 set_bit(num, iommu->domain_ids);
1260 domain->id = num;
8c11e798
WH
1261 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1262 set_bit(iommu->seq_id, &domain->iommu_bmp);
d71a2f33 1263 domain->flags = 0;
ba395927
KA
1264 iommu->domains[num] = domain;
1265 spin_unlock_irqrestore(&iommu->lock, flags);
1266
1267 return domain;
1268}
1269
1270static void iommu_free_domain(struct dmar_domain *domain)
1271{
1272 unsigned long flags;
8c11e798
WH
1273 struct intel_iommu *iommu;
1274
1275 iommu = domain_get_iommu(domain);
ba395927 1276
8c11e798
WH
1277 spin_lock_irqsave(&iommu->lock, flags);
1278 clear_bit(domain->id, iommu->domain_ids);
1279 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1280}
1281
1282static struct iova_domain reserved_iova_list;
8a443df4
MG
1283static struct lock_class_key reserved_alloc_key;
1284static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1285
1286static void dmar_init_reserved_ranges(void)
1287{
1288 struct pci_dev *pdev = NULL;
1289 struct iova *iova;
1290 int i;
1291 u64 addr, size;
1292
f661197e 1293 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1294
8a443df4
MG
1295 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1296 &reserved_alloc_key);
1297 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1298 &reserved_rbtree_key);
1299
ba395927
KA
1300 /* IOAPIC ranges shouldn't be accessed by DMA */
1301 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1302 IOVA_PFN(IOAPIC_RANGE_END));
1303 if (!iova)
1304 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1305
1306 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1307 for_each_pci_dev(pdev) {
1308 struct resource *r;
1309
1310 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1311 r = &pdev->resource[i];
1312 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1313 continue;
1314 addr = r->start;
5b6985ce 1315 addr &= PAGE_MASK;
ba395927 1316 size = r->end - addr;
5b6985ce 1317 size = PAGE_ALIGN(size);
ba395927
KA
1318 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1319 IOVA_PFN(size + addr) - 1);
1320 if (!iova)
1321 printk(KERN_ERR "Reserve iova failed\n");
1322 }
1323 }
1324
1325}
1326
1327static void domain_reserve_special_ranges(struct dmar_domain *domain)
1328{
1329 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1330}
1331
1332static inline int guestwidth_to_adjustwidth(int gaw)
1333{
1334 int agaw;
1335 int r = (gaw - 12) % 9;
1336
1337 if (r == 0)
1338 agaw = gaw;
1339 else
1340 agaw = gaw + 9 - r;
1341 if (agaw > 64)
1342 agaw = 64;
1343 return agaw;
1344}
1345
1346static int domain_init(struct dmar_domain *domain, int guest_width)
1347{
1348 struct intel_iommu *iommu;
1349 int adjust_width, agaw;
1350 unsigned long sagaw;
1351
f661197e 1352 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1353 spin_lock_init(&domain->mapping_lock);
1354
1355 domain_reserve_special_ranges(domain);
1356
1357 /* calculate AGAW */
8c11e798 1358 iommu = domain_get_iommu(domain);
ba395927
KA
1359 if (guest_width > cap_mgaw(iommu->cap))
1360 guest_width = cap_mgaw(iommu->cap);
1361 domain->gaw = guest_width;
1362 adjust_width = guestwidth_to_adjustwidth(guest_width);
1363 agaw = width_to_agaw(adjust_width);
1364 sagaw = cap_sagaw(iommu->cap);
1365 if (!test_bit(agaw, &sagaw)) {
1366 /* hardware doesn't support it, choose a bigger one */
1367 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1368 agaw = find_next_bit(&sagaw, 5, agaw);
1369 if (agaw >= 5)
1370 return -ENODEV;
1371 }
1372 domain->agaw = agaw;
1373 INIT_LIST_HEAD(&domain->devices);
1374
8e604097
WH
1375 if (ecap_coherent(iommu->ecap))
1376 domain->iommu_coherency = 1;
1377 else
1378 domain->iommu_coherency = 0;
1379
ba395927
KA
1380 /* always allocate the top pgd */
1381 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1382 if (!domain->pgd)
1383 return -ENOMEM;
5b6985ce 1384 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1385 return 0;
1386}
1387
1388static void domain_exit(struct dmar_domain *domain)
1389{
1390 u64 end;
1391
1392 /* Domain 0 is reserved, so dont process it */
1393 if (!domain)
1394 return;
1395
1396 domain_remove_dev_info(domain);
1397 /* destroy iovas */
1398 put_iova_domain(&domain->iovad);
1399 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 1400 end = end & (~PAGE_MASK);
ba395927
KA
1401
1402 /* clear ptes */
1403 dma_pte_clear_range(domain, 0, end);
1404
1405 /* free page tables */
1406 dma_pte_free_pagetable(domain, 0, end);
1407
1408 iommu_free_domain(domain);
1409 free_domain_mem(domain);
1410}
1411
1412static int domain_context_mapping_one(struct dmar_domain *domain,
1413 u8 bus, u8 devfn)
1414{
1415 struct context_entry *context;
8c11e798 1416 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
1417 unsigned long flags;
1418
1419 pr_debug("Set context mapping for %02x:%02x.%d\n",
1420 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1421 BUG_ON(!domain->pgd);
1422 context = device_to_context_entry(iommu, bus, devfn);
1423 if (!context)
1424 return -ENOMEM;
1425 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1426 if (context_present(context)) {
ba395927
KA
1427 spin_unlock_irqrestore(&iommu->lock, flags);
1428 return 0;
1429 }
1430
c07e7d21
MM
1431 context_set_domain_id(context, domain->id);
1432 context_set_address_width(context, domain->agaw);
1433 context_set_address_root(context, virt_to_phys(domain->pgd));
1434 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1435 context_set_fault_enable(context);
1436 context_set_present(context);
ba395927
KA
1437 __iommu_flush_cache(iommu, context, sizeof(*context));
1438
1439 /* it's a non-present to present mapping */
a77b67d4
YS
1440 if (iommu->flush.flush_context(iommu, domain->id,
1441 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1442 DMA_CCMD_DEVICE_INVL, 1))
ba395927
KA
1443 iommu_flush_write_buffer(iommu);
1444 else
a77b67d4
YS
1445 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1446
ba395927
KA
1447 spin_unlock_irqrestore(&iommu->lock, flags);
1448 return 0;
1449}
1450
1451static int
1452domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1453{
1454 int ret;
1455 struct pci_dev *tmp, *parent;
1456
1457 ret = domain_context_mapping_one(domain, pdev->bus->number,
1458 pdev->devfn);
1459 if (ret)
1460 return ret;
1461
1462 /* dependent device mapping */
1463 tmp = pci_find_upstream_pcie_bridge(pdev);
1464 if (!tmp)
1465 return 0;
1466 /* Secondary interface's bus number and devfn 0 */
1467 parent = pdev->bus->self;
1468 while (parent != tmp) {
1469 ret = domain_context_mapping_one(domain, parent->bus->number,
1470 parent->devfn);
1471 if (ret)
1472 return ret;
1473 parent = parent->bus->self;
1474 }
1475 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1476 return domain_context_mapping_one(domain,
1477 tmp->subordinate->number, 0);
1478 else /* this is a legacy PCI bridge */
1479 return domain_context_mapping_one(domain,
1480 tmp->bus->number, tmp->devfn);
1481}
1482
1483static int domain_context_mapped(struct dmar_domain *domain,
1484 struct pci_dev *pdev)
1485{
1486 int ret;
1487 struct pci_dev *tmp, *parent;
8c11e798 1488 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927 1489
8c11e798 1490 ret = device_context_mapped(iommu,
ba395927
KA
1491 pdev->bus->number, pdev->devfn);
1492 if (!ret)
1493 return ret;
1494 /* dependent device mapping */
1495 tmp = pci_find_upstream_pcie_bridge(pdev);
1496 if (!tmp)
1497 return ret;
1498 /* Secondary interface's bus number and devfn 0 */
1499 parent = pdev->bus->self;
1500 while (parent != tmp) {
8c11e798 1501 ret = device_context_mapped(iommu, parent->bus->number,
ba395927
KA
1502 parent->devfn);
1503 if (!ret)
1504 return ret;
1505 parent = parent->bus->self;
1506 }
1507 if (tmp->is_pcie)
8c11e798 1508 return device_context_mapped(iommu,
ba395927
KA
1509 tmp->subordinate->number, 0);
1510 else
8c11e798 1511 return device_context_mapped(iommu,
ba395927
KA
1512 tmp->bus->number, tmp->devfn);
1513}
1514
1515static int
1516domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1517 u64 hpa, size_t size, int prot)
1518{
1519 u64 start_pfn, end_pfn;
1520 struct dma_pte *pte;
1521 int index;
5b6985ce 1522 int addr_width = agaw_to_width(domain->agaw);
8c11e798 1523 struct intel_iommu *iommu = domain_get_iommu(domain);
5b6985ce
FY
1524
1525 hpa &= (((u64)1) << addr_width) - 1;
ba395927
KA
1526
1527 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1528 return -EINVAL;
5b6985ce
FY
1529 iova &= PAGE_MASK;
1530 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1531 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
ba395927
KA
1532 index = 0;
1533 while (start_pfn < end_pfn) {
5b6985ce 1534 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
ba395927
KA
1535 if (!pte)
1536 return -ENOMEM;
1537 /* We don't need lock here, nobody else
1538 * touches the iova range
1539 */
19c239ce
MM
1540 BUG_ON(dma_pte_addr(pte));
1541 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1542 dma_set_pte_prot(pte, prot);
8c11e798 1543 __iommu_flush_cache(iommu, pte, sizeof(*pte));
ba395927
KA
1544 start_pfn++;
1545 index++;
1546 }
1547 return 0;
1548}
1549
1550static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1551{
8c11e798
WH
1552 struct intel_iommu *iommu = domain_get_iommu(domain);
1553
1554 clear_context_table(iommu, bus, devfn);
1555 iommu->flush.flush_context(iommu, 0, 0, 0,
a77b67d4 1556 DMA_CCMD_GLOBAL_INVL, 0);
8c11e798 1557 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
a77b67d4 1558 DMA_TLB_GLOBAL_FLUSH, 0);
ba395927
KA
1559}
1560
1561static void domain_remove_dev_info(struct dmar_domain *domain)
1562{
1563 struct device_domain_info *info;
1564 unsigned long flags;
1565
1566 spin_lock_irqsave(&device_domain_lock, flags);
1567 while (!list_empty(&domain->devices)) {
1568 info = list_entry(domain->devices.next,
1569 struct device_domain_info, link);
1570 list_del(&info->link);
1571 list_del(&info->global);
1572 if (info->dev)
358dd8ac 1573 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1574 spin_unlock_irqrestore(&device_domain_lock, flags);
1575
1576 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1577 free_devinfo_mem(info);
1578
1579 spin_lock_irqsave(&device_domain_lock, flags);
1580 }
1581 spin_unlock_irqrestore(&device_domain_lock, flags);
1582}
1583
1584/*
1585 * find_domain
358dd8ac 1586 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1587 */
38717946 1588static struct dmar_domain *
ba395927
KA
1589find_domain(struct pci_dev *pdev)
1590{
1591 struct device_domain_info *info;
1592
1593 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1594 info = pdev->dev.archdata.iommu;
ba395927
KA
1595 if (info)
1596 return info->domain;
1597 return NULL;
1598}
1599
ba395927
KA
1600/* domain is initialized */
1601static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1602{
1603 struct dmar_domain *domain, *found = NULL;
1604 struct intel_iommu *iommu;
1605 struct dmar_drhd_unit *drhd;
1606 struct device_domain_info *info, *tmp;
1607 struct pci_dev *dev_tmp;
1608 unsigned long flags;
1609 int bus = 0, devfn = 0;
1610
1611 domain = find_domain(pdev);
1612 if (domain)
1613 return domain;
1614
1615 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1616 if (dev_tmp) {
1617 if (dev_tmp->is_pcie) {
1618 bus = dev_tmp->subordinate->number;
1619 devfn = 0;
1620 } else {
1621 bus = dev_tmp->bus->number;
1622 devfn = dev_tmp->devfn;
1623 }
1624 spin_lock_irqsave(&device_domain_lock, flags);
1625 list_for_each_entry(info, &device_domain_list, global) {
1626 if (info->bus == bus && info->devfn == devfn) {
1627 found = info->domain;
1628 break;
1629 }
1630 }
1631 spin_unlock_irqrestore(&device_domain_lock, flags);
1632 /* pcie-pci bridge already has a domain, uses it */
1633 if (found) {
1634 domain = found;
1635 goto found_domain;
1636 }
1637 }
1638
1639 /* Allocate new domain for the device */
1640 drhd = dmar_find_matched_drhd_unit(pdev);
1641 if (!drhd) {
1642 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1643 pci_name(pdev));
1644 return NULL;
1645 }
1646 iommu = drhd->iommu;
1647
1648 domain = iommu_alloc_domain(iommu);
1649 if (!domain)
1650 goto error;
1651
1652 if (domain_init(domain, gaw)) {
1653 domain_exit(domain);
1654 goto error;
1655 }
1656
1657 /* register pcie-to-pci device */
1658 if (dev_tmp) {
1659 info = alloc_devinfo_mem();
1660 if (!info) {
1661 domain_exit(domain);
1662 goto error;
1663 }
1664 info->bus = bus;
1665 info->devfn = devfn;
1666 info->dev = NULL;
1667 info->domain = domain;
1668 /* This domain is shared by devices under p2p bridge */
3b5410e7 1669 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1670
1671 /* pcie-to-pci bridge already has a domain, uses it */
1672 found = NULL;
1673 spin_lock_irqsave(&device_domain_lock, flags);
1674 list_for_each_entry(tmp, &device_domain_list, global) {
1675 if (tmp->bus == bus && tmp->devfn == devfn) {
1676 found = tmp->domain;
1677 break;
1678 }
1679 }
1680 if (found) {
1681 free_devinfo_mem(info);
1682 domain_exit(domain);
1683 domain = found;
1684 } else {
1685 list_add(&info->link, &domain->devices);
1686 list_add(&info->global, &device_domain_list);
1687 }
1688 spin_unlock_irqrestore(&device_domain_lock, flags);
1689 }
1690
1691found_domain:
1692 info = alloc_devinfo_mem();
1693 if (!info)
1694 goto error;
1695 info->bus = pdev->bus->number;
1696 info->devfn = pdev->devfn;
1697 info->dev = pdev;
1698 info->domain = domain;
1699 spin_lock_irqsave(&device_domain_lock, flags);
1700 /* somebody is fast */
1701 found = find_domain(pdev);
1702 if (found != NULL) {
1703 spin_unlock_irqrestore(&device_domain_lock, flags);
1704 if (found != domain) {
1705 domain_exit(domain);
1706 domain = found;
1707 }
1708 free_devinfo_mem(info);
1709 return domain;
1710 }
1711 list_add(&info->link, &domain->devices);
1712 list_add(&info->global, &device_domain_list);
358dd8ac 1713 pdev->dev.archdata.iommu = info;
ba395927
KA
1714 spin_unlock_irqrestore(&device_domain_lock, flags);
1715 return domain;
1716error:
1717 /* recheck it here, maybe others set it */
1718 return find_domain(pdev);
1719}
1720
5b6985ce
FY
1721static int iommu_prepare_identity_map(struct pci_dev *pdev,
1722 unsigned long long start,
1723 unsigned long long end)
ba395927
KA
1724{
1725 struct dmar_domain *domain;
1726 unsigned long size;
5b6985ce 1727 unsigned long long base;
ba395927
KA
1728 int ret;
1729
1730 printk(KERN_INFO
1731 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1732 pci_name(pdev), start, end);
1733 /* page table init */
1734 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1735 if (!domain)
1736 return -ENOMEM;
1737
1738 /* The address might not be aligned */
5b6985ce 1739 base = start & PAGE_MASK;
ba395927 1740 size = end - base;
5b6985ce 1741 size = PAGE_ALIGN(size);
ba395927
KA
1742 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1743 IOVA_PFN(base + size) - 1)) {
1744 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1745 ret = -ENOMEM;
1746 goto error;
1747 }
1748
1749 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1750 size, base, pci_name(pdev));
1751 /*
1752 * RMRR range might have overlap with physical memory range,
1753 * clear it first
1754 */
1755 dma_pte_clear_range(domain, base, base + size);
1756
1757 ret = domain_page_mapping(domain, base, base, size,
1758 DMA_PTE_READ|DMA_PTE_WRITE);
1759 if (ret)
1760 goto error;
1761
1762 /* context entry init */
1763 ret = domain_context_mapping(domain, pdev);
1764 if (!ret)
1765 return 0;
1766error:
1767 domain_exit(domain);
1768 return ret;
1769
1770}
1771
1772static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1773 struct pci_dev *pdev)
1774{
358dd8ac 1775 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1776 return 0;
1777 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1778 rmrr->end_address + 1);
1779}
1780
e820482c 1781#ifdef CONFIG_DMAR_GFX_WA
d52d53b8
YL
1782struct iommu_prepare_data {
1783 struct pci_dev *pdev;
1784 int ret;
1785};
1786
1787static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1788 unsigned long end_pfn, void *datax)
1789{
1790 struct iommu_prepare_data *data;
1791
1792 data = (struct iommu_prepare_data *)datax;
1793
1794 data->ret = iommu_prepare_identity_map(data->pdev,
1795 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1796 return data->ret;
1797
1798}
1799
1800static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1801{
1802 int nid;
1803 struct iommu_prepare_data data;
1804
1805 data.pdev = pdev;
1806 data.ret = 0;
1807
1808 for_each_online_node(nid) {
1809 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1810 if (data.ret)
1811 return data.ret;
1812 }
1813 return data.ret;
1814}
1815
e820482c
KA
1816static void __init iommu_prepare_gfx_mapping(void)
1817{
1818 struct pci_dev *pdev = NULL;
e820482c
KA
1819 int ret;
1820
1821 for_each_pci_dev(pdev) {
358dd8ac 1822 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1823 !IS_GFX_DEVICE(pdev))
1824 continue;
1825 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1826 pci_name(pdev));
d52d53b8
YL
1827 ret = iommu_prepare_with_active_regions(pdev);
1828 if (ret)
1829 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
e820482c
KA
1830 }
1831}
2abd7e16
MM
1832#else /* !CONFIG_DMAR_GFX_WA */
1833static inline void iommu_prepare_gfx_mapping(void)
1834{
1835 return;
1836}
e820482c
KA
1837#endif
1838
49a0429e
KA
1839#ifdef CONFIG_DMAR_FLOPPY_WA
1840static inline void iommu_prepare_isa(void)
1841{
1842 struct pci_dev *pdev;
1843 int ret;
1844
1845 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1846 if (!pdev)
1847 return;
1848
1849 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1850 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1851
1852 if (ret)
1853 printk("IOMMU: Failed to create 0-64M identity map, "
1854 "floppy might not work\n");
1855
1856}
1857#else
1858static inline void iommu_prepare_isa(void)
1859{
1860 return;
1861}
1862#endif /* !CONFIG_DMAR_FLPY_WA */
1863
519a0549 1864static int __init init_dmars(void)
ba395927
KA
1865{
1866 struct dmar_drhd_unit *drhd;
1867 struct dmar_rmrr_unit *rmrr;
1868 struct pci_dev *pdev;
1869 struct intel_iommu *iommu;
80b20dd8 1870 int i, ret, unit = 0;
ba395927
KA
1871
1872 /*
1873 * for each drhd
1874 * allocate root
1875 * initialize and program root entry to not present
1876 * endfor
1877 */
1878 for_each_drhd_unit(drhd) {
5e0d2a6f 1879 g_num_of_iommus++;
1880 /*
1881 * lock not needed as this is only incremented in the single
1882 * threaded kernel __init code path all other access are read
1883 * only
1884 */
1885 }
1886
d9630fe9
WH
1887 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1888 GFP_KERNEL);
1889 if (!g_iommus) {
1890 printk(KERN_ERR "Allocating global iommu array failed\n");
1891 ret = -ENOMEM;
1892 goto error;
1893 }
1894
80b20dd8 1895 deferred_flush = kzalloc(g_num_of_iommus *
1896 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1897 if (!deferred_flush) {
d9630fe9 1898 kfree(g_iommus);
5e0d2a6f 1899 ret = -ENOMEM;
1900 goto error;
1901 }
1902
5e0d2a6f 1903 for_each_drhd_unit(drhd) {
1904 if (drhd->ignored)
1905 continue;
1886e8a9
SS
1906
1907 iommu = drhd->iommu;
d9630fe9 1908 g_iommus[iommu->seq_id] = iommu;
ba395927 1909
e61d98d8
SS
1910 ret = iommu_init_domains(iommu);
1911 if (ret)
1912 goto error;
1913
ba395927
KA
1914 /*
1915 * TBD:
1916 * we could share the same root & context tables
1917 * amoung all IOMMU's. Need to Split it later.
1918 */
1919 ret = iommu_alloc_root_entry(iommu);
1920 if (ret) {
1921 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1922 goto error;
1923 }
1924 }
1925
a77b67d4
YS
1926 for_each_drhd_unit(drhd) {
1927 if (drhd->ignored)
1928 continue;
1929
1930 iommu = drhd->iommu;
1931 if (dmar_enable_qi(iommu)) {
1932 /*
1933 * Queued Invalidate not enabled, use Register Based
1934 * Invalidate
1935 */
1936 iommu->flush.flush_context = __iommu_flush_context;
1937 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1938 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
1939 "invalidation\n",
1940 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1941 } else {
1942 iommu->flush.flush_context = qi_flush_context;
1943 iommu->flush.flush_iotlb = qi_flush_iotlb;
1944 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
1945 "invalidation\n",
1946 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1947 }
1948 }
1949
ba395927
KA
1950 /*
1951 * For each rmrr
1952 * for each dev attached to rmrr
1953 * do
1954 * locate drhd for dev, alloc domain for dev
1955 * allocate free domain
1956 * allocate page table entries for rmrr
1957 * if context not allocated for bus
1958 * allocate and init context
1959 * set present in root table for this bus
1960 * init context with domain, translation etc
1961 * endfor
1962 * endfor
1963 */
1964 for_each_rmrr_units(rmrr) {
ba395927
KA
1965 for (i = 0; i < rmrr->devices_cnt; i++) {
1966 pdev = rmrr->devices[i];
1967 /* some BIOS lists non-exist devices in DMAR table */
1968 if (!pdev)
1969 continue;
1970 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1971 if (ret)
1972 printk(KERN_ERR
1973 "IOMMU: mapping reserved region failed\n");
1974 }
1975 }
1976
e820482c
KA
1977 iommu_prepare_gfx_mapping();
1978
49a0429e
KA
1979 iommu_prepare_isa();
1980
ba395927
KA
1981 /*
1982 * for each drhd
1983 * enable fault log
1984 * global invalidate context cache
1985 * global invalidate iotlb
1986 * enable translation
1987 */
1988 for_each_drhd_unit(drhd) {
1989 if (drhd->ignored)
1990 continue;
1991 iommu = drhd->iommu;
1992 sprintf (iommu->name, "dmar%d", unit++);
1993
1994 iommu_flush_write_buffer(iommu);
1995
3460a6d9
KA
1996 ret = dmar_set_interrupt(iommu);
1997 if (ret)
1998 goto error;
1999
ba395927
KA
2000 iommu_set_root_entry(iommu);
2001
a77b67d4
YS
2002 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2003 0);
2004 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2005 0);
f8bab735 2006 iommu_disable_protect_mem_regions(iommu);
2007
ba395927
KA
2008 ret = iommu_enable_translation(iommu);
2009 if (ret)
2010 goto error;
2011 }
2012
2013 return 0;
2014error:
2015 for_each_drhd_unit(drhd) {
2016 if (drhd->ignored)
2017 continue;
2018 iommu = drhd->iommu;
2019 free_iommu(iommu);
2020 }
d9630fe9 2021 kfree(g_iommus);
ba395927
KA
2022 return ret;
2023}
2024
2025static inline u64 aligned_size(u64 host_addr, size_t size)
2026{
2027 u64 addr;
5b6985ce
FY
2028 addr = (host_addr & (~PAGE_MASK)) + size;
2029 return PAGE_ALIGN(addr);
ba395927
KA
2030}
2031
2032struct iova *
f76aec76 2033iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 2034{
ba395927
KA
2035 struct iova *piova;
2036
2037 /* Make sure it's in range */
ba395927 2038 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 2039 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
2040 return NULL;
2041
2042 piova = alloc_iova(&domain->iovad,
5b6985ce 2043 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
ba395927
KA
2044 return piova;
2045}
2046
f76aec76
KA
2047static struct iova *
2048__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
bb9e6d65 2049 size_t size, u64 dma_mask)
ba395927 2050{
ba395927 2051 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2052 struct iova *iova = NULL;
ba395927 2053
bb9e6d65
FT
2054 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2055 iova = iommu_alloc_iova(domain, size, dma_mask);
2056 else {
ba395927
KA
2057 /*
2058 * First try to allocate an io virtual address in
2059 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 2060 * from higher range
ba395927 2061 */
f76aec76 2062 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 2063 if (!iova)
bb9e6d65 2064 iova = iommu_alloc_iova(domain, size, dma_mask);
ba395927
KA
2065 }
2066
2067 if (!iova) {
2068 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
2069 return NULL;
2070 }
2071
2072 return iova;
2073}
2074
2075static struct dmar_domain *
2076get_valid_domain_for_dev(struct pci_dev *pdev)
2077{
2078 struct dmar_domain *domain;
2079 int ret;
2080
2081 domain = get_domain_for_dev(pdev,
2082 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2083 if (!domain) {
2084 printk(KERN_ERR
2085 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2086 return NULL;
ba395927
KA
2087 }
2088
2089 /* make sure context mapping is ok */
2090 if (unlikely(!domain_context_mapped(domain, pdev))) {
2091 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
2092 if (ret) {
2093 printk(KERN_ERR
2094 "Domain context map for %s failed",
2095 pci_name(pdev));
4fe05bbc 2096 return NULL;
f76aec76 2097 }
ba395927
KA
2098 }
2099
f76aec76
KA
2100 return domain;
2101}
2102
bb9e6d65
FT
2103static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2104 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2105{
2106 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2107 struct dmar_domain *domain;
5b6985ce 2108 phys_addr_t start_paddr;
f76aec76
KA
2109 struct iova *iova;
2110 int prot = 0;
6865f0d1 2111 int ret;
8c11e798 2112 struct intel_iommu *iommu;
f76aec76
KA
2113
2114 BUG_ON(dir == DMA_NONE);
358dd8ac 2115 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
6865f0d1 2116 return paddr;
f76aec76
KA
2117
2118 domain = get_valid_domain_for_dev(pdev);
2119 if (!domain)
2120 return 0;
2121
8c11e798 2122 iommu = domain_get_iommu(domain);
6865f0d1 2123 size = aligned_size((u64)paddr, size);
f76aec76 2124
bb9e6d65 2125 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76
KA
2126 if (!iova)
2127 goto error;
2128
5b6985ce 2129 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
f76aec76 2130
ba395927
KA
2131 /*
2132 * Check if DMAR supports zero-length reads on write only
2133 * mappings..
2134 */
2135 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2136 !cap_zlr(iommu->cap))
ba395927
KA
2137 prot |= DMA_PTE_READ;
2138 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2139 prot |= DMA_PTE_WRITE;
2140 /*
6865f0d1 2141 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2142 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2143 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2144 * is not a big problem
2145 */
6865f0d1 2146 ret = domain_page_mapping(domain, start_paddr,
5b6985ce 2147 ((u64)paddr) & PAGE_MASK, size, prot);
ba395927
KA
2148 if (ret)
2149 goto error;
2150
f76aec76 2151 /* it's a non-present to present mapping */
8c11e798 2152 ret = iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2153 start_paddr, size >> VTD_PAGE_SHIFT, 1);
f76aec76 2154 if (ret)
8c11e798 2155 iommu_flush_write_buffer(iommu);
f76aec76 2156
5b6985ce 2157 return start_paddr + ((u64)paddr & (~PAGE_MASK));
ba395927 2158
ba395927 2159error:
f76aec76
KA
2160 if (iova)
2161 __free_iova(&domain->iovad, iova);
ba395927 2162 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
5b6985ce 2163 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2164 return 0;
2165}
2166
bb9e6d65
FT
2167dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2168 size_t size, int dir)
2169{
2170 return __intel_map_single(hwdev, paddr, size, dir,
2171 to_pci_dev(hwdev)->dma_mask);
2172}
2173
5e0d2a6f 2174static void flush_unmaps(void)
2175{
80b20dd8 2176 int i, j;
5e0d2a6f 2177
5e0d2a6f 2178 timer_on = 0;
2179
2180 /* just flush them all */
2181 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2182 struct intel_iommu *iommu = g_iommus[i];
2183 if (!iommu)
2184 continue;
c42d9f32 2185
a2bb8459 2186 if (deferred_flush[i].next) {
a77b67d4
YS
2187 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2188 DMA_TLB_GLOBAL_FLUSH, 0);
80b20dd8 2189 for (j = 0; j < deferred_flush[i].next; j++) {
2190 __free_iova(&deferred_flush[i].domain[j]->iovad,
2191 deferred_flush[i].iova[j]);
2192 }
2193 deferred_flush[i].next = 0;
2194 }
5e0d2a6f 2195 }
2196
5e0d2a6f 2197 list_size = 0;
5e0d2a6f 2198}
2199
2200static void flush_unmaps_timeout(unsigned long data)
2201{
80b20dd8 2202 unsigned long flags;
2203
2204 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2205 flush_unmaps();
80b20dd8 2206 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2207}
2208
2209static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2210{
2211 unsigned long flags;
80b20dd8 2212 int next, iommu_id;
8c11e798 2213 struct intel_iommu *iommu;
5e0d2a6f 2214
2215 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2216 if (list_size == HIGH_WATER_MARK)
2217 flush_unmaps();
2218
8c11e798
WH
2219 iommu = domain_get_iommu(dom);
2220 iommu_id = iommu->seq_id;
c42d9f32 2221
80b20dd8 2222 next = deferred_flush[iommu_id].next;
2223 deferred_flush[iommu_id].domain[next] = dom;
2224 deferred_flush[iommu_id].iova[next] = iova;
2225 deferred_flush[iommu_id].next++;
5e0d2a6f 2226
2227 if (!timer_on) {
2228 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2229 timer_on = 1;
2230 }
2231 list_size++;
2232 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2233}
2234
5b6985ce
FY
2235void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2236 int dir)
ba395927 2237{
ba395927 2238 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
2239 struct dmar_domain *domain;
2240 unsigned long start_addr;
ba395927 2241 struct iova *iova;
8c11e798 2242 struct intel_iommu *iommu;
ba395927 2243
358dd8ac 2244 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 2245 return;
ba395927
KA
2246 domain = find_domain(pdev);
2247 BUG_ON(!domain);
2248
8c11e798
WH
2249 iommu = domain_get_iommu(domain);
2250
ba395927 2251 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 2252 if (!iova)
ba395927 2253 return;
ba395927 2254
5b6985ce 2255 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2256 size = aligned_size((u64)dev_addr, size);
ba395927 2257
f76aec76 2258 pr_debug("Device %s unmapping: %lx@%llx\n",
5b6985ce 2259 pci_name(pdev), size, (unsigned long long)start_addr);
ba395927 2260
f76aec76
KA
2261 /* clear the whole page */
2262 dma_pte_clear_range(domain, start_addr, start_addr + size);
2263 /* free page tables */
2264 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
5e0d2a6f 2265 if (intel_iommu_strict) {
8c11e798 2266 if (iommu_flush_iotlb_psi(iommu,
5b6985ce 2267 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
8c11e798 2268 iommu_flush_write_buffer(iommu);
5e0d2a6f 2269 /* free iova */
2270 __free_iova(&domain->iovad, iova);
2271 } else {
2272 add_unmap(domain, iova);
2273 /*
2274 * queue up the release of the unmap to save the 1/6th of the
2275 * cpu used up by the iotlb flush operation...
2276 */
5e0d2a6f 2277 }
ba395927
KA
2278}
2279
5b6985ce
FY
2280void *intel_alloc_coherent(struct device *hwdev, size_t size,
2281 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2282{
2283 void *vaddr;
2284 int order;
2285
5b6985ce 2286 size = PAGE_ALIGN(size);
ba395927
KA
2287 order = get_order(size);
2288 flags &= ~(GFP_DMA | GFP_DMA32);
2289
2290 vaddr = (void *)__get_free_pages(flags, order);
2291 if (!vaddr)
2292 return NULL;
2293 memset(vaddr, 0, size);
2294
bb9e6d65
FT
2295 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2296 DMA_BIDIRECTIONAL,
2297 hwdev->coherent_dma_mask);
ba395927
KA
2298 if (*dma_handle)
2299 return vaddr;
2300 free_pages((unsigned long)vaddr, order);
2301 return NULL;
2302}
2303
5b6985ce
FY
2304void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2305 dma_addr_t dma_handle)
ba395927
KA
2306{
2307 int order;
2308
5b6985ce 2309 size = PAGE_ALIGN(size);
ba395927
KA
2310 order = get_order(size);
2311
2312 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2313 free_pages((unsigned long)vaddr, order);
2314}
2315
12d4d40e 2316#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
5b6985ce
FY
2317
2318void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2319 int nelems, int dir)
ba395927
KA
2320{
2321 int i;
2322 struct pci_dev *pdev = to_pci_dev(hwdev);
2323 struct dmar_domain *domain;
f76aec76
KA
2324 unsigned long start_addr;
2325 struct iova *iova;
2326 size_t size = 0;
2327 void *addr;
c03ab37c 2328 struct scatterlist *sg;
8c11e798 2329 struct intel_iommu *iommu;
ba395927 2330
358dd8ac 2331 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2332 return;
2333
2334 domain = find_domain(pdev);
8c11e798
WH
2335 BUG_ON(!domain);
2336
2337 iommu = domain_get_iommu(domain);
ba395927 2338
c03ab37c 2339 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2340 if (!iova)
2341 return;
c03ab37c 2342 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2343 addr = SG_ENT_VIRT_ADDRESS(sg);
2344 size += aligned_size((u64)addr, sg->length);
2345 }
2346
5b6985ce 2347 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76
KA
2348
2349 /* clear the whole page */
2350 dma_pte_clear_range(domain, start_addr, start_addr + size);
2351 /* free page tables */
2352 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2353
8c11e798 2354 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
5b6985ce 2355 size >> VTD_PAGE_SHIFT, 0))
8c11e798 2356 iommu_flush_write_buffer(iommu);
f76aec76
KA
2357
2358 /* free iova */
2359 __free_iova(&domain->iovad, iova);
ba395927
KA
2360}
2361
ba395927 2362static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2363 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2364{
2365 int i;
c03ab37c 2366 struct scatterlist *sg;
ba395927 2367
c03ab37c 2368 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2369 BUG_ON(!sg_page(sg));
c03ab37c
FT
2370 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2371 sg->dma_length = sg->length;
ba395927
KA
2372 }
2373 return nelems;
2374}
2375
5b6985ce
FY
2376int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2377 int dir)
ba395927
KA
2378{
2379 void *addr;
2380 int i;
ba395927
KA
2381 struct pci_dev *pdev = to_pci_dev(hwdev);
2382 struct dmar_domain *domain;
f76aec76
KA
2383 size_t size = 0;
2384 int prot = 0;
2385 size_t offset = 0;
2386 struct iova *iova = NULL;
2387 int ret;
c03ab37c 2388 struct scatterlist *sg;
f76aec76 2389 unsigned long start_addr;
8c11e798 2390 struct intel_iommu *iommu;
ba395927
KA
2391
2392 BUG_ON(dir == DMA_NONE);
358dd8ac 2393 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2394 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2395
f76aec76
KA
2396 domain = get_valid_domain_for_dev(pdev);
2397 if (!domain)
2398 return 0;
2399
8c11e798
WH
2400 iommu = domain_get_iommu(domain);
2401
c03ab37c 2402 for_each_sg(sglist, sg, nelems, i) {
ba395927 2403 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2404 addr = (void *)virt_to_phys(addr);
2405 size += aligned_size((u64)addr, sg->length);
2406 }
2407
bb9e6d65 2408 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76 2409 if (!iova) {
c03ab37c 2410 sglist->dma_length = 0;
f76aec76
KA
2411 return 0;
2412 }
2413
2414 /*
2415 * Check if DMAR supports zero-length reads on write only
2416 * mappings..
2417 */
2418 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2419 !cap_zlr(iommu->cap))
f76aec76
KA
2420 prot |= DMA_PTE_READ;
2421 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2422 prot |= DMA_PTE_WRITE;
2423
5b6985ce 2424 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2425 offset = 0;
c03ab37c 2426 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2427 addr = SG_ENT_VIRT_ADDRESS(sg);
2428 addr = (void *)virt_to_phys(addr);
2429 size = aligned_size((u64)addr, sg->length);
2430 ret = domain_page_mapping(domain, start_addr + offset,
5b6985ce 2431 ((u64)addr) & PAGE_MASK,
f76aec76
KA
2432 size, prot);
2433 if (ret) {
2434 /* clear the page */
2435 dma_pte_clear_range(domain, start_addr,
2436 start_addr + offset);
2437 /* free page tables */
2438 dma_pte_free_pagetable(domain, start_addr,
2439 start_addr + offset);
2440 /* free iova */
2441 __free_iova(&domain->iovad, iova);
ba395927
KA
2442 return 0;
2443 }
f76aec76 2444 sg->dma_address = start_addr + offset +
5b6985ce 2445 ((u64)addr & (~PAGE_MASK));
ba395927 2446 sg->dma_length = sg->length;
f76aec76 2447 offset += size;
ba395927
KA
2448 }
2449
ba395927 2450 /* it's a non-present to present mapping */
8c11e798 2451 if (iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2452 start_addr, offset >> VTD_PAGE_SHIFT, 1))
8c11e798 2453 iommu_flush_write_buffer(iommu);
ba395927
KA
2454 return nelems;
2455}
2456
2457static struct dma_mapping_ops intel_dma_ops = {
2458 .alloc_coherent = intel_alloc_coherent,
2459 .free_coherent = intel_free_coherent,
2460 .map_single = intel_map_single,
2461 .unmap_single = intel_unmap_single,
2462 .map_sg = intel_map_sg,
2463 .unmap_sg = intel_unmap_sg,
2464};
2465
2466static inline int iommu_domain_cache_init(void)
2467{
2468 int ret = 0;
2469
2470 iommu_domain_cache = kmem_cache_create("iommu_domain",
2471 sizeof(struct dmar_domain),
2472 0,
2473 SLAB_HWCACHE_ALIGN,
2474
2475 NULL);
2476 if (!iommu_domain_cache) {
2477 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2478 ret = -ENOMEM;
2479 }
2480
2481 return ret;
2482}
2483
2484static inline int iommu_devinfo_cache_init(void)
2485{
2486 int ret = 0;
2487
2488 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2489 sizeof(struct device_domain_info),
2490 0,
2491 SLAB_HWCACHE_ALIGN,
ba395927
KA
2492 NULL);
2493 if (!iommu_devinfo_cache) {
2494 printk(KERN_ERR "Couldn't create devinfo cache\n");
2495 ret = -ENOMEM;
2496 }
2497
2498 return ret;
2499}
2500
2501static inline int iommu_iova_cache_init(void)
2502{
2503 int ret = 0;
2504
2505 iommu_iova_cache = kmem_cache_create("iommu_iova",
2506 sizeof(struct iova),
2507 0,
2508 SLAB_HWCACHE_ALIGN,
ba395927
KA
2509 NULL);
2510 if (!iommu_iova_cache) {
2511 printk(KERN_ERR "Couldn't create iova cache\n");
2512 ret = -ENOMEM;
2513 }
2514
2515 return ret;
2516}
2517
2518static int __init iommu_init_mempool(void)
2519{
2520 int ret;
2521 ret = iommu_iova_cache_init();
2522 if (ret)
2523 return ret;
2524
2525 ret = iommu_domain_cache_init();
2526 if (ret)
2527 goto domain_error;
2528
2529 ret = iommu_devinfo_cache_init();
2530 if (!ret)
2531 return ret;
2532
2533 kmem_cache_destroy(iommu_domain_cache);
2534domain_error:
2535 kmem_cache_destroy(iommu_iova_cache);
2536
2537 return -ENOMEM;
2538}
2539
2540static void __init iommu_exit_mempool(void)
2541{
2542 kmem_cache_destroy(iommu_devinfo_cache);
2543 kmem_cache_destroy(iommu_domain_cache);
2544 kmem_cache_destroy(iommu_iova_cache);
2545
2546}
2547
ba395927
KA
2548static void __init init_no_remapping_devices(void)
2549{
2550 struct dmar_drhd_unit *drhd;
2551
2552 for_each_drhd_unit(drhd) {
2553 if (!drhd->include_all) {
2554 int i;
2555 for (i = 0; i < drhd->devices_cnt; i++)
2556 if (drhd->devices[i] != NULL)
2557 break;
2558 /* ignore DMAR unit if no pci devices exist */
2559 if (i == drhd->devices_cnt)
2560 drhd->ignored = 1;
2561 }
2562 }
2563
2564 if (dmar_map_gfx)
2565 return;
2566
2567 for_each_drhd_unit(drhd) {
2568 int i;
2569 if (drhd->ignored || drhd->include_all)
2570 continue;
2571
2572 for (i = 0; i < drhd->devices_cnt; i++)
2573 if (drhd->devices[i] &&
2574 !IS_GFX_DEVICE(drhd->devices[i]))
2575 break;
2576
2577 if (i < drhd->devices_cnt)
2578 continue;
2579
2580 /* bypass IOMMU if it is just for gfx devices */
2581 drhd->ignored = 1;
2582 for (i = 0; i < drhd->devices_cnt; i++) {
2583 if (!drhd->devices[i])
2584 continue;
358dd8ac 2585 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2586 }
2587 }
2588}
2589
2590int __init intel_iommu_init(void)
2591{
2592 int ret = 0;
2593
ba395927
KA
2594 if (dmar_table_init())
2595 return -ENODEV;
2596
1886e8a9
SS
2597 if (dmar_dev_scope_init())
2598 return -ENODEV;
2599
2ae21010
SS
2600 /*
2601 * Check the need for DMA-remapping initialization now.
2602 * Above initialization will also be used by Interrupt-remapping.
2603 */
2604 if (no_iommu || swiotlb || dmar_disabled)
2605 return -ENODEV;
2606
ba395927
KA
2607 iommu_init_mempool();
2608 dmar_init_reserved_ranges();
2609
2610 init_no_remapping_devices();
2611
2612 ret = init_dmars();
2613 if (ret) {
2614 printk(KERN_ERR "IOMMU: dmar init failed\n");
2615 put_iova_domain(&reserved_iova_list);
2616 iommu_exit_mempool();
2617 return ret;
2618 }
2619 printk(KERN_INFO
2620 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2621
5e0d2a6f 2622 init_timer(&unmap_timer);
ba395927
KA
2623 force_iommu = 1;
2624 dma_ops = &intel_dma_ops;
2625 return 0;
2626}
e820482c 2627
38717946
KA
2628void intel_iommu_domain_exit(struct dmar_domain *domain)
2629{
2630 u64 end;
2631
2632 /* Domain 0 is reserved, so dont process it */
2633 if (!domain)
2634 return;
2635
2636 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 2637 end = end & (~VTD_PAGE_MASK);
38717946
KA
2638
2639 /* clear ptes */
2640 dma_pte_clear_range(domain, 0, end);
2641
2642 /* free page tables */
2643 dma_pte_free_pagetable(domain, 0, end);
2644
2645 iommu_free_domain(domain);
2646 free_domain_mem(domain);
2647}
2648EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2649
2650struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2651{
2652 struct dmar_drhd_unit *drhd;
2653 struct dmar_domain *domain;
2654 struct intel_iommu *iommu;
2655
2656 drhd = dmar_find_matched_drhd_unit(pdev);
2657 if (!drhd) {
2658 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2659 return NULL;
2660 }
2661
2662 iommu = drhd->iommu;
2663 if (!iommu) {
2664 printk(KERN_ERR
2665 "intel_iommu_domain_alloc: iommu == NULL\n");
2666 return NULL;
2667 }
2668 domain = iommu_alloc_domain(iommu);
2669 if (!domain) {
2670 printk(KERN_ERR
2671 "intel_iommu_domain_alloc: domain == NULL\n");
2672 return NULL;
2673 }
2674 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2675 printk(KERN_ERR
2676 "intel_iommu_domain_alloc: domain_init() failed\n");
2677 intel_iommu_domain_exit(domain);
2678 return NULL;
2679 }
2680 return domain;
2681}
2682EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2683
2684int intel_iommu_context_mapping(
2685 struct dmar_domain *domain, struct pci_dev *pdev)
2686{
2687 int rc;
2688 rc = domain_context_mapping(domain, pdev);
2689 return rc;
2690}
2691EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2692
2693int intel_iommu_page_mapping(
2694 struct dmar_domain *domain, dma_addr_t iova,
2695 u64 hpa, size_t size, int prot)
2696{
2697 int rc;
2698 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2699 return rc;
2700}
2701EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2702
2703void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2704{
2705 detach_domain_for_dev(domain, bus, devfn);
2706}
2707EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2708
2709struct dmar_domain *
2710intel_iommu_find_domain(struct pci_dev *pdev)
2711{
2712 return find_domain(pdev);
2713}
2714EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2715
2716int intel_iommu_found(void)
2717{
2718 return g_num_of_iommus;
2719}
2720EXPORT_SYMBOL_GPL(intel_iommu_found);
2721
2722u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2723{
2724 struct dma_pte *pte;
2725 u64 pfn;
2726
2727 pfn = 0;
2728 pte = addr_to_dma_pte(domain, iova);
2729
2730 if (pte)
19c239ce 2731 pfn = dma_pte_addr(pte);
38717946 2732
5b6985ce 2733 return pfn >> VTD_PAGE_SHIFT;
38717946
KA
2734}
2735EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);