]> git.ipfire.org Git - people/arne_f/kernel.git/blame - drivers/pci/intel-iommu.c
intel-iommu: move DMA PTE defs out of dma_remapping.h
[people/arne_f/kernel.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946
KA
36#include <linux/iova.h>
37#include <linux/intel-iommu.h>
ba395927 38#include <asm/cacheflush.h>
46a7fa27 39#include <asm/iommu.h>
ba395927
KA
40#include "pci.h"
41
5b6985ce
FY
42#define ROOT_SIZE VTD_PAGE_SIZE
43#define CONTEXT_SIZE VTD_PAGE_SIZE
44
ba395927
KA
45#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48#define IOAPIC_RANGE_START (0xfee00000)
49#define IOAPIC_RANGE_END (0xfeefffff)
50#define IOVA_START_ADDR (0x1000)
51
52#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
ba395927
KA
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
f27be03b
MM
56#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
5e0d2a6f 59
46b08e1a
MM
60/*
61 * 0: Present
62 * 1-11: Reserved
63 * 12-63: Context Ptr (12 - (haw-1))
64 * 64-127: Reserved
65 */
66struct root_entry {
67 u64 val;
68 u64 rsvd1;
69};
70#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
71static inline bool root_present(struct root_entry *root)
72{
73 return (root->val & 1);
74}
75static inline void set_root_present(struct root_entry *root)
76{
77 root->val |= 1;
78}
79static inline void set_root_value(struct root_entry *root, unsigned long value)
80{
81 root->val |= value & VTD_PAGE_MASK;
82}
83
84static inline struct context_entry *
85get_context_addr_from_root(struct root_entry *root)
86{
87 return (struct context_entry *)
88 (root_present(root)?phys_to_virt(
89 root->val & VTD_PAGE_MASK) :
90 NULL);
91}
92
7a8fc25e
MM
93/*
94 * low 64 bits:
95 * 0: present
96 * 1: fault processing disable
97 * 2-3: translation type
98 * 12-63: address space root
99 * high 64 bits:
100 * 0-2: address width
101 * 3-6: aval
102 * 8-23: domain id
103 */
104struct context_entry {
105 u64 lo;
106 u64 hi;
107};
108#define context_present(c) ((c).lo & 1)
109#define context_fault_disable(c) (((c).lo >> 1) & 1)
110#define context_translation_type(c) (((c).lo >> 2) & 3)
111#define context_address_root(c) ((c).lo & VTD_PAGE_MASK)
112#define context_address_width(c) ((c).hi & 7)
113#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
114
115#define context_set_present(c) do {(c).lo |= 1;} while (0)
116#define context_set_fault_enable(c) \
117 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
118#define context_set_translation_type(c, val) \
119 do { \
120 (c).lo &= (((u64)-1) << 4) | 3; \
121 (c).lo |= ((val) & 3) << 2; \
122 } while (0)
123#define CONTEXT_TT_MULTI_LEVEL 0
124#define context_set_address_root(c, val) \
125 do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0)
126#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
127#define context_set_domain_id(c, val) \
128 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
129#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
130
622ba12a
MM
131/*
132 * 0: readable
133 * 1: writable
134 * 2-6: reserved
135 * 7: super page
136 * 8-11: available
137 * 12-63: Host physcial address
138 */
139struct dma_pte {
140 u64 val;
141};
142#define dma_clear_pte(p) do {(p).val = 0;} while (0)
143
144#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
145#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
146#define dma_set_pte_prot(p, prot) \
147 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
148#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK)
149#define dma_set_pte_addr(p, addr) do {\
150 (p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
151#define dma_pte_present(p) (((p).val & 3) != 0)
152
5e0d2a6f 153static void flush_unmaps_timeout(unsigned long data);
154
155DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
156
80b20dd8 157#define HIGH_WATER_MARK 250
158struct deferred_flush_tables {
159 int next;
160 struct iova *iova[HIGH_WATER_MARK];
161 struct dmar_domain *domain[HIGH_WATER_MARK];
162};
163
164static struct deferred_flush_tables *deferred_flush;
165
5e0d2a6f 166/* bitmap for indexing intel_iommus */
5e0d2a6f 167static int g_num_of_iommus;
168
169static DEFINE_SPINLOCK(async_umap_flush_lock);
170static LIST_HEAD(unmaps_to_do);
171
172static int timer_on;
173static long list_size;
5e0d2a6f 174
ba395927
KA
175static void domain_remove_dev_info(struct dmar_domain *domain);
176
2ae21010 177int dmar_disabled;
ba395927 178static int __initdata dmar_map_gfx = 1;
7d3b03ce 179static int dmar_forcedac;
5e0d2a6f 180static int intel_iommu_strict;
ba395927
KA
181
182#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
183static DEFINE_SPINLOCK(device_domain_lock);
184static LIST_HEAD(device_domain_list);
185
186static int __init intel_iommu_setup(char *str)
187{
188 if (!str)
189 return -EINVAL;
190 while (*str) {
191 if (!strncmp(str, "off", 3)) {
192 dmar_disabled = 1;
193 printk(KERN_INFO"Intel-IOMMU: disabled\n");
194 } else if (!strncmp(str, "igfx_off", 8)) {
195 dmar_map_gfx = 0;
196 printk(KERN_INFO
197 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 198 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 199 printk(KERN_INFO
7d3b03ce
KA
200 "Intel-IOMMU: Forcing DAC for PCI devices\n");
201 dmar_forcedac = 1;
5e0d2a6f 202 } else if (!strncmp(str, "strict", 6)) {
203 printk(KERN_INFO
204 "Intel-IOMMU: disable batched IOTLB flush\n");
205 intel_iommu_strict = 1;
ba395927
KA
206 }
207
208 str += strcspn(str, ",");
209 while (*str == ',')
210 str++;
211 }
212 return 0;
213}
214__setup("intel_iommu=", intel_iommu_setup);
215
216static struct kmem_cache *iommu_domain_cache;
217static struct kmem_cache *iommu_devinfo_cache;
218static struct kmem_cache *iommu_iova_cache;
219
eb3fa7cb
KA
220static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
221{
222 unsigned int flags;
223 void *vaddr;
224
225 /* trying to avoid low memory issues */
226 flags = current->flags & PF_MEMALLOC;
227 current->flags |= PF_MEMALLOC;
228 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
229 current->flags &= (~PF_MEMALLOC | flags);
230 return vaddr;
231}
232
233
ba395927
KA
234static inline void *alloc_pgtable_page(void)
235{
eb3fa7cb
KA
236 unsigned int flags;
237 void *vaddr;
238
239 /* trying to avoid low memory issues */
240 flags = current->flags & PF_MEMALLOC;
241 current->flags |= PF_MEMALLOC;
242 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
243 current->flags &= (~PF_MEMALLOC | flags);
244 return vaddr;
ba395927
KA
245}
246
247static inline void free_pgtable_page(void *vaddr)
248{
249 free_page((unsigned long)vaddr);
250}
251
252static inline void *alloc_domain_mem(void)
253{
eb3fa7cb 254 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
255}
256
38717946 257static void free_domain_mem(void *vaddr)
ba395927
KA
258{
259 kmem_cache_free(iommu_domain_cache, vaddr);
260}
261
262static inline void * alloc_devinfo_mem(void)
263{
eb3fa7cb 264 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
265}
266
267static inline void free_devinfo_mem(void *vaddr)
268{
269 kmem_cache_free(iommu_devinfo_cache, vaddr);
270}
271
272struct iova *alloc_iova_mem(void)
273{
eb3fa7cb 274 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
275}
276
277void free_iova_mem(struct iova *iova)
278{
279 kmem_cache_free(iommu_iova_cache, iova);
280}
281
ba395927
KA
282/* Gets context entry for a given bus and devfn */
283static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
284 u8 bus, u8 devfn)
285{
286 struct root_entry *root;
287 struct context_entry *context;
288 unsigned long phy_addr;
289 unsigned long flags;
290
291 spin_lock_irqsave(&iommu->lock, flags);
292 root = &iommu->root_entry[bus];
293 context = get_context_addr_from_root(root);
294 if (!context) {
295 context = (struct context_entry *)alloc_pgtable_page();
296 if (!context) {
297 spin_unlock_irqrestore(&iommu->lock, flags);
298 return NULL;
299 }
5b6985ce 300 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
301 phy_addr = virt_to_phys((void *)context);
302 set_root_value(root, phy_addr);
303 set_root_present(root);
304 __iommu_flush_cache(iommu, root, sizeof(*root));
305 }
306 spin_unlock_irqrestore(&iommu->lock, flags);
307 return &context[devfn];
308}
309
310static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
311{
312 struct root_entry *root;
313 struct context_entry *context;
314 int ret;
315 unsigned long flags;
316
317 spin_lock_irqsave(&iommu->lock, flags);
318 root = &iommu->root_entry[bus];
319 context = get_context_addr_from_root(root);
320 if (!context) {
321 ret = 0;
322 goto out;
323 }
324 ret = context_present(context[devfn]);
325out:
326 spin_unlock_irqrestore(&iommu->lock, flags);
327 return ret;
328}
329
330static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
331{
332 struct root_entry *root;
333 struct context_entry *context;
334 unsigned long flags;
335
336 spin_lock_irqsave(&iommu->lock, flags);
337 root = &iommu->root_entry[bus];
338 context = get_context_addr_from_root(root);
339 if (context) {
340 context_clear_entry(context[devfn]);
341 __iommu_flush_cache(iommu, &context[devfn], \
342 sizeof(*context));
343 }
344 spin_unlock_irqrestore(&iommu->lock, flags);
345}
346
347static void free_context_table(struct intel_iommu *iommu)
348{
349 struct root_entry *root;
350 int i;
351 unsigned long flags;
352 struct context_entry *context;
353
354 spin_lock_irqsave(&iommu->lock, flags);
355 if (!iommu->root_entry) {
356 goto out;
357 }
358 for (i = 0; i < ROOT_ENTRY_NR; i++) {
359 root = &iommu->root_entry[i];
360 context = get_context_addr_from_root(root);
361 if (context)
362 free_pgtable_page(context);
363 }
364 free_pgtable_page(iommu->root_entry);
365 iommu->root_entry = NULL;
366out:
367 spin_unlock_irqrestore(&iommu->lock, flags);
368}
369
370/* page table handling */
371#define LEVEL_STRIDE (9)
372#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
373
374static inline int agaw_to_level(int agaw)
375{
376 return agaw + 2;
377}
378
379static inline int agaw_to_width(int agaw)
380{
381 return 30 + agaw * LEVEL_STRIDE;
382
383}
384
385static inline int width_to_agaw(int width)
386{
387 return (width - 30) / LEVEL_STRIDE;
388}
389
390static inline unsigned int level_to_offset_bits(int level)
391{
392 return (12 + (level - 1) * LEVEL_STRIDE);
393}
394
395static inline int address_level_offset(u64 addr, int level)
396{
397 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
398}
399
400static inline u64 level_mask(int level)
401{
402 return ((u64)-1 << level_to_offset_bits(level));
403}
404
405static inline u64 level_size(int level)
406{
407 return ((u64)1 << level_to_offset_bits(level));
408}
409
410static inline u64 align_to_level(u64 addr, int level)
411{
412 return ((addr + level_size(level) - 1) & level_mask(level));
413}
414
415static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
416{
417 int addr_width = agaw_to_width(domain->agaw);
418 struct dma_pte *parent, *pte = NULL;
419 int level = agaw_to_level(domain->agaw);
420 int offset;
421 unsigned long flags;
422
423 BUG_ON(!domain->pgd);
424
425 addr &= (((u64)1) << addr_width) - 1;
426 parent = domain->pgd;
427
428 spin_lock_irqsave(&domain->mapping_lock, flags);
429 while (level > 0) {
430 void *tmp_page;
431
432 offset = address_level_offset(addr, level);
433 pte = &parent[offset];
434 if (level == 1)
435 break;
436
437 if (!dma_pte_present(*pte)) {
438 tmp_page = alloc_pgtable_page();
439
440 if (!tmp_page) {
441 spin_unlock_irqrestore(&domain->mapping_lock,
442 flags);
443 return NULL;
444 }
445 __iommu_flush_cache(domain->iommu, tmp_page,
5b6985ce 446 PAGE_SIZE);
ba395927
KA
447 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
448 /*
449 * high level table always sets r/w, last level page
450 * table control read/write
451 */
452 dma_set_pte_readable(*pte);
453 dma_set_pte_writable(*pte);
454 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
455 }
456 parent = phys_to_virt(dma_pte_addr(*pte));
457 level--;
458 }
459
460 spin_unlock_irqrestore(&domain->mapping_lock, flags);
461 return pte;
462}
463
464/* return address's pte at specific level */
465static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
466 int level)
467{
468 struct dma_pte *parent, *pte = NULL;
469 int total = agaw_to_level(domain->agaw);
470 int offset;
471
472 parent = domain->pgd;
473 while (level <= total) {
474 offset = address_level_offset(addr, total);
475 pte = &parent[offset];
476 if (level == total)
477 return pte;
478
479 if (!dma_pte_present(*pte))
480 break;
481 parent = phys_to_virt(dma_pte_addr(*pte));
482 total--;
483 }
484 return NULL;
485}
486
487/* clear one page's page table */
488static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
489{
490 struct dma_pte *pte = NULL;
491
492 /* get last level pte */
493 pte = dma_addr_level_pte(domain, addr, 1);
494
495 if (pte) {
496 dma_clear_pte(*pte);
497 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
498 }
499}
500
501/* clear last level pte, a tlb flush should be followed */
502static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
503{
504 int addr_width = agaw_to_width(domain->agaw);
505
506 start &= (((u64)1) << addr_width) - 1;
507 end &= (((u64)1) << addr_width) - 1;
508 /* in case it's partial page */
5b6985ce
FY
509 start = PAGE_ALIGN(start);
510 end &= PAGE_MASK;
ba395927
KA
511
512 /* we don't need lock here, nobody else touches the iova range */
513 while (start < end) {
514 dma_pte_clear_one(domain, start);
5b6985ce 515 start += VTD_PAGE_SIZE;
ba395927
KA
516 }
517}
518
519/* free page table pages. last level pte should already be cleared */
520static void dma_pte_free_pagetable(struct dmar_domain *domain,
521 u64 start, u64 end)
522{
523 int addr_width = agaw_to_width(domain->agaw);
524 struct dma_pte *pte;
525 int total = agaw_to_level(domain->agaw);
526 int level;
527 u64 tmp;
528
529 start &= (((u64)1) << addr_width) - 1;
530 end &= (((u64)1) << addr_width) - 1;
531
532 /* we don't need lock here, nobody else touches the iova range */
533 level = 2;
534 while (level <= total) {
535 tmp = align_to_level(start, level);
536 if (tmp >= end || (tmp + level_size(level) > end))
537 return;
538
539 while (tmp < end) {
540 pte = dma_addr_level_pte(domain, tmp, level);
541 if (pte) {
542 free_pgtable_page(
543 phys_to_virt(dma_pte_addr(*pte)));
544 dma_clear_pte(*pte);
545 __iommu_flush_cache(domain->iommu,
546 pte, sizeof(*pte));
547 }
548 tmp += level_size(level);
549 }
550 level++;
551 }
552 /* free pgd */
553 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
554 free_pgtable_page(domain->pgd);
555 domain->pgd = NULL;
556 }
557}
558
559/* iommu handling */
560static int iommu_alloc_root_entry(struct intel_iommu *iommu)
561{
562 struct root_entry *root;
563 unsigned long flags;
564
565 root = (struct root_entry *)alloc_pgtable_page();
566 if (!root)
567 return -ENOMEM;
568
5b6985ce 569 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
570
571 spin_lock_irqsave(&iommu->lock, flags);
572 iommu->root_entry = root;
573 spin_unlock_irqrestore(&iommu->lock, flags);
574
575 return 0;
576}
577
ba395927
KA
578static void iommu_set_root_entry(struct intel_iommu *iommu)
579{
580 void *addr;
581 u32 cmd, sts;
582 unsigned long flag;
583
584 addr = iommu->root_entry;
585
586 spin_lock_irqsave(&iommu->register_lock, flag);
587 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
588
589 cmd = iommu->gcmd | DMA_GCMD_SRTP;
590 writel(cmd, iommu->reg + DMAR_GCMD_REG);
591
592 /* Make sure hardware complete it */
593 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
594 readl, (sts & DMA_GSTS_RTPS), sts);
595
596 spin_unlock_irqrestore(&iommu->register_lock, flag);
597}
598
599static void iommu_flush_write_buffer(struct intel_iommu *iommu)
600{
601 u32 val;
602 unsigned long flag;
603
604 if (!cap_rwbf(iommu->cap))
605 return;
606 val = iommu->gcmd | DMA_GCMD_WBF;
607
608 spin_lock_irqsave(&iommu->register_lock, flag);
609 writel(val, iommu->reg + DMAR_GCMD_REG);
610
611 /* Make sure hardware complete it */
612 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
613 readl, (!(val & DMA_GSTS_WBFS)), val);
614
615 spin_unlock_irqrestore(&iommu->register_lock, flag);
616}
617
618/* return value determine if we need a write buffer flush */
619static int __iommu_flush_context(struct intel_iommu *iommu,
620 u16 did, u16 source_id, u8 function_mask, u64 type,
621 int non_present_entry_flush)
622{
623 u64 val = 0;
624 unsigned long flag;
625
626 /*
627 * In the non-present entry flush case, if hardware doesn't cache
628 * non-present entry we do nothing and if hardware cache non-present
629 * entry, we flush entries of domain 0 (the domain id is used to cache
630 * any non-present entries)
631 */
632 if (non_present_entry_flush) {
633 if (!cap_caching_mode(iommu->cap))
634 return 1;
635 else
636 did = 0;
637 }
638
639 switch (type) {
640 case DMA_CCMD_GLOBAL_INVL:
641 val = DMA_CCMD_GLOBAL_INVL;
642 break;
643 case DMA_CCMD_DOMAIN_INVL:
644 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
645 break;
646 case DMA_CCMD_DEVICE_INVL:
647 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
648 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
649 break;
650 default:
651 BUG();
652 }
653 val |= DMA_CCMD_ICC;
654
655 spin_lock_irqsave(&iommu->register_lock, flag);
656 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
657
658 /* Make sure hardware complete it */
659 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
660 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
661
662 spin_unlock_irqrestore(&iommu->register_lock, flag);
663
4d235ba6 664 /* flush context entry will implicitly flush write buffer */
ba395927
KA
665 return 0;
666}
667
ba395927
KA
668/* return value determine if we need a write buffer flush */
669static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
670 u64 addr, unsigned int size_order, u64 type,
671 int non_present_entry_flush)
672{
673 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
674 u64 val = 0, val_iva = 0;
675 unsigned long flag;
676
677 /*
678 * In the non-present entry flush case, if hardware doesn't cache
679 * non-present entry we do nothing and if hardware cache non-present
680 * entry, we flush entries of domain 0 (the domain id is used to cache
681 * any non-present entries)
682 */
683 if (non_present_entry_flush) {
684 if (!cap_caching_mode(iommu->cap))
685 return 1;
686 else
687 did = 0;
688 }
689
690 switch (type) {
691 case DMA_TLB_GLOBAL_FLUSH:
692 /* global flush doesn't need set IVA_REG */
693 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
694 break;
695 case DMA_TLB_DSI_FLUSH:
696 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
697 break;
698 case DMA_TLB_PSI_FLUSH:
699 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
700 /* Note: always flush non-leaf currently */
701 val_iva = size_order | addr;
702 break;
703 default:
704 BUG();
705 }
706 /* Note: set drain read/write */
707#if 0
708 /*
709 * This is probably to be super secure.. Looks like we can
710 * ignore it without any impact.
711 */
712 if (cap_read_drain(iommu->cap))
713 val |= DMA_TLB_READ_DRAIN;
714#endif
715 if (cap_write_drain(iommu->cap))
716 val |= DMA_TLB_WRITE_DRAIN;
717
718 spin_lock_irqsave(&iommu->register_lock, flag);
719 /* Note: Only uses first TLB reg currently */
720 if (val_iva)
721 dmar_writeq(iommu->reg + tlb_offset, val_iva);
722 dmar_writeq(iommu->reg + tlb_offset + 8, val);
723
724 /* Make sure hardware complete it */
725 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
726 dmar_readq, (!(val & DMA_TLB_IVT)), val);
727
728 spin_unlock_irqrestore(&iommu->register_lock, flag);
729
730 /* check IOTLB invalidation granularity */
731 if (DMA_TLB_IAIG(val) == 0)
732 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
733 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
734 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
735 (unsigned long long)DMA_TLB_IIRG(type),
736 (unsigned long long)DMA_TLB_IAIG(val));
4d235ba6 737 /* flush iotlb entry will implicitly flush write buffer */
ba395927
KA
738 return 0;
739}
740
ba395927
KA
741static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
742 u64 addr, unsigned int pages, int non_present_entry_flush)
743{
f76aec76 744 unsigned int mask;
ba395927 745
5b6985ce 746 BUG_ON(addr & (~VTD_PAGE_MASK));
ba395927
KA
747 BUG_ON(pages == 0);
748
749 /* Fallback to domain selective flush if no PSI support */
750 if (!cap_pgsel_inv(iommu->cap))
a77b67d4
YS
751 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
752 DMA_TLB_DSI_FLUSH,
753 non_present_entry_flush);
ba395927
KA
754
755 /*
756 * PSI requires page size to be 2 ^ x, and the base address is naturally
757 * aligned to the size
758 */
f76aec76 759 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 760 /* Fallback to domain selective flush if size is too big */
f76aec76 761 if (mask > cap_max_amask_val(iommu->cap))
a77b67d4
YS
762 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
763 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
ba395927 764
a77b67d4
YS
765 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
766 DMA_TLB_PSI_FLUSH,
767 non_present_entry_flush);
ba395927
KA
768}
769
f8bab735 770static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
771{
772 u32 pmen;
773 unsigned long flags;
774
775 spin_lock_irqsave(&iommu->register_lock, flags);
776 pmen = readl(iommu->reg + DMAR_PMEN_REG);
777 pmen &= ~DMA_PMEN_EPM;
778 writel(pmen, iommu->reg + DMAR_PMEN_REG);
779
780 /* wait for the protected region status bit to clear */
781 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
782 readl, !(pmen & DMA_PMEN_PRS), pmen);
783
784 spin_unlock_irqrestore(&iommu->register_lock, flags);
785}
786
ba395927
KA
787static int iommu_enable_translation(struct intel_iommu *iommu)
788{
789 u32 sts;
790 unsigned long flags;
791
792 spin_lock_irqsave(&iommu->register_lock, flags);
793 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
794
795 /* Make sure hardware complete it */
796 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
797 readl, (sts & DMA_GSTS_TES), sts);
798
799 iommu->gcmd |= DMA_GCMD_TE;
800 spin_unlock_irqrestore(&iommu->register_lock, flags);
801 return 0;
802}
803
804static int iommu_disable_translation(struct intel_iommu *iommu)
805{
806 u32 sts;
807 unsigned long flag;
808
809 spin_lock_irqsave(&iommu->register_lock, flag);
810 iommu->gcmd &= ~DMA_GCMD_TE;
811 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
812
813 /* Make sure hardware complete it */
814 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
815 readl, (!(sts & DMA_GSTS_TES)), sts);
816
817 spin_unlock_irqrestore(&iommu->register_lock, flag);
818 return 0;
819}
820
3460a6d9
KA
821/* iommu interrupt handling. Most stuff are MSI-like. */
822
d94afc6c 823static const char *fault_reason_strings[] =
3460a6d9
KA
824{
825 "Software",
826 "Present bit in root entry is clear",
827 "Present bit in context entry is clear",
828 "Invalid context entry",
829 "Access beyond MGAW",
830 "PTE Write access is not set",
831 "PTE Read access is not set",
832 "Next page table ptr is invalid",
833 "Root table address invalid",
834 "Context table ptr is invalid",
835 "non-zero reserved fields in RTP",
836 "non-zero reserved fields in CTP",
837 "non-zero reserved fields in PTE",
3460a6d9 838};
f8bab735 839#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 840
d94afc6c 841const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 842{
d94afc6c 843 if (fault_reason > MAX_FAULT_REASON_IDX)
844 return "Unknown";
3460a6d9
KA
845 else
846 return fault_reason_strings[fault_reason];
847}
848
849void dmar_msi_unmask(unsigned int irq)
850{
851 struct intel_iommu *iommu = get_irq_data(irq);
852 unsigned long flag;
853
854 /* unmask it */
855 spin_lock_irqsave(&iommu->register_lock, flag);
856 writel(0, iommu->reg + DMAR_FECTL_REG);
857 /* Read a reg to force flush the post write */
858 readl(iommu->reg + DMAR_FECTL_REG);
859 spin_unlock_irqrestore(&iommu->register_lock, flag);
860}
861
862void dmar_msi_mask(unsigned int irq)
863{
864 unsigned long flag;
865 struct intel_iommu *iommu = get_irq_data(irq);
866
867 /* mask it */
868 spin_lock_irqsave(&iommu->register_lock, flag);
869 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
870 /* Read a reg to force flush the post write */
871 readl(iommu->reg + DMAR_FECTL_REG);
872 spin_unlock_irqrestore(&iommu->register_lock, flag);
873}
874
875void dmar_msi_write(int irq, struct msi_msg *msg)
876{
877 struct intel_iommu *iommu = get_irq_data(irq);
878 unsigned long flag;
879
880 spin_lock_irqsave(&iommu->register_lock, flag);
881 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
882 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
883 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
884 spin_unlock_irqrestore(&iommu->register_lock, flag);
885}
886
887void dmar_msi_read(int irq, struct msi_msg *msg)
888{
889 struct intel_iommu *iommu = get_irq_data(irq);
890 unsigned long flag;
891
892 spin_lock_irqsave(&iommu->register_lock, flag);
893 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
894 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
895 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
896 spin_unlock_irqrestore(&iommu->register_lock, flag);
897}
898
899static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
5b6985ce 900 u8 fault_reason, u16 source_id, unsigned long long addr)
3460a6d9 901{
d94afc6c 902 const char *reason;
3460a6d9
KA
903
904 reason = dmar_get_fault_reason(fault_reason);
905
906 printk(KERN_ERR
907 "DMAR:[%s] Request device [%02x:%02x.%d] "
908 "fault addr %llx \n"
909 "DMAR:[fault reason %02d] %s\n",
910 (type ? "DMA Read" : "DMA Write"),
911 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
912 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
913 return 0;
914}
915
916#define PRIMARY_FAULT_REG_LEN (16)
917static irqreturn_t iommu_page_fault(int irq, void *dev_id)
918{
919 struct intel_iommu *iommu = dev_id;
920 int reg, fault_index;
921 u32 fault_status;
922 unsigned long flag;
923
924 spin_lock_irqsave(&iommu->register_lock, flag);
925 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
926
927 /* TBD: ignore advanced fault log currently */
928 if (!(fault_status & DMA_FSTS_PPF))
929 goto clear_overflow;
930
931 fault_index = dma_fsts_fault_record_index(fault_status);
932 reg = cap_fault_reg_offset(iommu->cap);
933 while (1) {
934 u8 fault_reason;
935 u16 source_id;
936 u64 guest_addr;
937 int type;
938 u32 data;
939
940 /* highest 32 bits */
941 data = readl(iommu->reg + reg +
942 fault_index * PRIMARY_FAULT_REG_LEN + 12);
943 if (!(data & DMA_FRCD_F))
944 break;
945
946 fault_reason = dma_frcd_fault_reason(data);
947 type = dma_frcd_type(data);
948
949 data = readl(iommu->reg + reg +
950 fault_index * PRIMARY_FAULT_REG_LEN + 8);
951 source_id = dma_frcd_source_id(data);
952
953 guest_addr = dmar_readq(iommu->reg + reg +
954 fault_index * PRIMARY_FAULT_REG_LEN);
955 guest_addr = dma_frcd_page_addr(guest_addr);
956 /* clear the fault */
957 writel(DMA_FRCD_F, iommu->reg + reg +
958 fault_index * PRIMARY_FAULT_REG_LEN + 12);
959
960 spin_unlock_irqrestore(&iommu->register_lock, flag);
961
962 iommu_page_fault_do_one(iommu, type, fault_reason,
963 source_id, guest_addr);
964
965 fault_index++;
966 if (fault_index > cap_num_fault_regs(iommu->cap))
967 fault_index = 0;
968 spin_lock_irqsave(&iommu->register_lock, flag);
969 }
970clear_overflow:
971 /* clear primary fault overflow */
972 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
973 if (fault_status & DMA_FSTS_PFO)
974 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
975
976 spin_unlock_irqrestore(&iommu->register_lock, flag);
977 return IRQ_HANDLED;
978}
979
980int dmar_set_interrupt(struct intel_iommu *iommu)
981{
982 int irq, ret;
983
984 irq = create_irq();
985 if (!irq) {
986 printk(KERN_ERR "IOMMU: no free vectors\n");
987 return -EINVAL;
988 }
989
990 set_irq_data(irq, iommu);
991 iommu->irq = irq;
992
993 ret = arch_setup_dmar_msi(irq);
994 if (ret) {
995 set_irq_data(irq, NULL);
996 iommu->irq = 0;
997 destroy_irq(irq);
998 return 0;
999 }
1000
1001 /* Force fault register is cleared */
1002 iommu_page_fault(irq, iommu);
1003
1004 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1005 if (ret)
1006 printk(KERN_ERR "IOMMU: can't request irq\n");
1007 return ret;
1008}
1009
ba395927
KA
1010static int iommu_init_domains(struct intel_iommu *iommu)
1011{
1012 unsigned long ndomains;
1013 unsigned long nlongs;
1014
1015 ndomains = cap_ndoms(iommu->cap);
1016 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1017 nlongs = BITS_TO_LONGS(ndomains);
1018
1019 /* TBD: there might be 64K domains,
1020 * consider other allocation for future chip
1021 */
1022 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1023 if (!iommu->domain_ids) {
1024 printk(KERN_ERR "Allocating domain id array failed\n");
1025 return -ENOMEM;
1026 }
1027 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1028 GFP_KERNEL);
1029 if (!iommu->domains) {
1030 printk(KERN_ERR "Allocating domain array failed\n");
1031 kfree(iommu->domain_ids);
1032 return -ENOMEM;
1033 }
1034
e61d98d8
SS
1035 spin_lock_init(&iommu->lock);
1036
ba395927
KA
1037 /*
1038 * if Caching mode is set, then invalid translations are tagged
1039 * with domainid 0. Hence we need to pre-allocate it.
1040 */
1041 if (cap_caching_mode(iommu->cap))
1042 set_bit(0, iommu->domain_ids);
1043 return 0;
1044}
ba395927 1045
ba395927
KA
1046
1047static void domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1048
1049void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1050{
1051 struct dmar_domain *domain;
1052 int i;
1053
ba395927
KA
1054 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1055 for (; i < cap_ndoms(iommu->cap); ) {
1056 domain = iommu->domains[i];
1057 clear_bit(i, iommu->domain_ids);
1058 domain_exit(domain);
1059 i = find_next_bit(iommu->domain_ids,
1060 cap_ndoms(iommu->cap), i+1);
1061 }
1062
1063 if (iommu->gcmd & DMA_GCMD_TE)
1064 iommu_disable_translation(iommu);
1065
1066 if (iommu->irq) {
1067 set_irq_data(iommu->irq, NULL);
1068 /* This will mask the irq */
1069 free_irq(iommu->irq, iommu);
1070 destroy_irq(iommu->irq);
1071 }
1072
1073 kfree(iommu->domains);
1074 kfree(iommu->domain_ids);
1075
1076 /* free context mapping */
1077 free_context_table(iommu);
ba395927
KA
1078}
1079
1080static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1081{
1082 unsigned long num;
1083 unsigned long ndomains;
1084 struct dmar_domain *domain;
1085 unsigned long flags;
1086
1087 domain = alloc_domain_mem();
1088 if (!domain)
1089 return NULL;
1090
1091 ndomains = cap_ndoms(iommu->cap);
1092
1093 spin_lock_irqsave(&iommu->lock, flags);
1094 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1095 if (num >= ndomains) {
1096 spin_unlock_irqrestore(&iommu->lock, flags);
1097 free_domain_mem(domain);
1098 printk(KERN_ERR "IOMMU: no free domain ids\n");
1099 return NULL;
1100 }
1101
1102 set_bit(num, iommu->domain_ids);
1103 domain->id = num;
1104 domain->iommu = iommu;
1105 iommu->domains[num] = domain;
1106 spin_unlock_irqrestore(&iommu->lock, flags);
1107
1108 return domain;
1109}
1110
1111static void iommu_free_domain(struct dmar_domain *domain)
1112{
1113 unsigned long flags;
1114
1115 spin_lock_irqsave(&domain->iommu->lock, flags);
1116 clear_bit(domain->id, domain->iommu->domain_ids);
1117 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1118}
1119
1120static struct iova_domain reserved_iova_list;
8a443df4
MG
1121static struct lock_class_key reserved_alloc_key;
1122static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1123
1124static void dmar_init_reserved_ranges(void)
1125{
1126 struct pci_dev *pdev = NULL;
1127 struct iova *iova;
1128 int i;
1129 u64 addr, size;
1130
f661197e 1131 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1132
8a443df4
MG
1133 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1134 &reserved_alloc_key);
1135 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1136 &reserved_rbtree_key);
1137
ba395927
KA
1138 /* IOAPIC ranges shouldn't be accessed by DMA */
1139 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1140 IOVA_PFN(IOAPIC_RANGE_END));
1141 if (!iova)
1142 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1143
1144 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1145 for_each_pci_dev(pdev) {
1146 struct resource *r;
1147
1148 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1149 r = &pdev->resource[i];
1150 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1151 continue;
1152 addr = r->start;
5b6985ce 1153 addr &= PAGE_MASK;
ba395927 1154 size = r->end - addr;
5b6985ce 1155 size = PAGE_ALIGN(size);
ba395927
KA
1156 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1157 IOVA_PFN(size + addr) - 1);
1158 if (!iova)
1159 printk(KERN_ERR "Reserve iova failed\n");
1160 }
1161 }
1162
1163}
1164
1165static void domain_reserve_special_ranges(struct dmar_domain *domain)
1166{
1167 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1168}
1169
1170static inline int guestwidth_to_adjustwidth(int gaw)
1171{
1172 int agaw;
1173 int r = (gaw - 12) % 9;
1174
1175 if (r == 0)
1176 agaw = gaw;
1177 else
1178 agaw = gaw + 9 - r;
1179 if (agaw > 64)
1180 agaw = 64;
1181 return agaw;
1182}
1183
1184static int domain_init(struct dmar_domain *domain, int guest_width)
1185{
1186 struct intel_iommu *iommu;
1187 int adjust_width, agaw;
1188 unsigned long sagaw;
1189
f661197e 1190 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1191 spin_lock_init(&domain->mapping_lock);
1192
1193 domain_reserve_special_ranges(domain);
1194
1195 /* calculate AGAW */
1196 iommu = domain->iommu;
1197 if (guest_width > cap_mgaw(iommu->cap))
1198 guest_width = cap_mgaw(iommu->cap);
1199 domain->gaw = guest_width;
1200 adjust_width = guestwidth_to_adjustwidth(guest_width);
1201 agaw = width_to_agaw(adjust_width);
1202 sagaw = cap_sagaw(iommu->cap);
1203 if (!test_bit(agaw, &sagaw)) {
1204 /* hardware doesn't support it, choose a bigger one */
1205 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1206 agaw = find_next_bit(&sagaw, 5, agaw);
1207 if (agaw >= 5)
1208 return -ENODEV;
1209 }
1210 domain->agaw = agaw;
1211 INIT_LIST_HEAD(&domain->devices);
1212
1213 /* always allocate the top pgd */
1214 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1215 if (!domain->pgd)
1216 return -ENOMEM;
5b6985ce 1217 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1218 return 0;
1219}
1220
1221static void domain_exit(struct dmar_domain *domain)
1222{
1223 u64 end;
1224
1225 /* Domain 0 is reserved, so dont process it */
1226 if (!domain)
1227 return;
1228
1229 domain_remove_dev_info(domain);
1230 /* destroy iovas */
1231 put_iova_domain(&domain->iovad);
1232 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 1233 end = end & (~PAGE_MASK);
ba395927
KA
1234
1235 /* clear ptes */
1236 dma_pte_clear_range(domain, 0, end);
1237
1238 /* free page tables */
1239 dma_pte_free_pagetable(domain, 0, end);
1240
1241 iommu_free_domain(domain);
1242 free_domain_mem(domain);
1243}
1244
1245static int domain_context_mapping_one(struct dmar_domain *domain,
1246 u8 bus, u8 devfn)
1247{
1248 struct context_entry *context;
1249 struct intel_iommu *iommu = domain->iommu;
1250 unsigned long flags;
1251
1252 pr_debug("Set context mapping for %02x:%02x.%d\n",
1253 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1254 BUG_ON(!domain->pgd);
1255 context = device_to_context_entry(iommu, bus, devfn);
1256 if (!context)
1257 return -ENOMEM;
1258 spin_lock_irqsave(&iommu->lock, flags);
1259 if (context_present(*context)) {
1260 spin_unlock_irqrestore(&iommu->lock, flags);
1261 return 0;
1262 }
1263
1264 context_set_domain_id(*context, domain->id);
1265 context_set_address_width(*context, domain->agaw);
1266 context_set_address_root(*context, virt_to_phys(domain->pgd));
1267 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1268 context_set_fault_enable(*context);
1269 context_set_present(*context);
1270 __iommu_flush_cache(iommu, context, sizeof(*context));
1271
1272 /* it's a non-present to present mapping */
a77b67d4
YS
1273 if (iommu->flush.flush_context(iommu, domain->id,
1274 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1275 DMA_CCMD_DEVICE_INVL, 1))
ba395927
KA
1276 iommu_flush_write_buffer(iommu);
1277 else
a77b67d4
YS
1278 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1279
ba395927
KA
1280 spin_unlock_irqrestore(&iommu->lock, flags);
1281 return 0;
1282}
1283
1284static int
1285domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1286{
1287 int ret;
1288 struct pci_dev *tmp, *parent;
1289
1290 ret = domain_context_mapping_one(domain, pdev->bus->number,
1291 pdev->devfn);
1292 if (ret)
1293 return ret;
1294
1295 /* dependent device mapping */
1296 tmp = pci_find_upstream_pcie_bridge(pdev);
1297 if (!tmp)
1298 return 0;
1299 /* Secondary interface's bus number and devfn 0 */
1300 parent = pdev->bus->self;
1301 while (parent != tmp) {
1302 ret = domain_context_mapping_one(domain, parent->bus->number,
1303 parent->devfn);
1304 if (ret)
1305 return ret;
1306 parent = parent->bus->self;
1307 }
1308 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1309 return domain_context_mapping_one(domain,
1310 tmp->subordinate->number, 0);
1311 else /* this is a legacy PCI bridge */
1312 return domain_context_mapping_one(domain,
1313 tmp->bus->number, tmp->devfn);
1314}
1315
1316static int domain_context_mapped(struct dmar_domain *domain,
1317 struct pci_dev *pdev)
1318{
1319 int ret;
1320 struct pci_dev *tmp, *parent;
1321
1322 ret = device_context_mapped(domain->iommu,
1323 pdev->bus->number, pdev->devfn);
1324 if (!ret)
1325 return ret;
1326 /* dependent device mapping */
1327 tmp = pci_find_upstream_pcie_bridge(pdev);
1328 if (!tmp)
1329 return ret;
1330 /* Secondary interface's bus number and devfn 0 */
1331 parent = pdev->bus->self;
1332 while (parent != tmp) {
1333 ret = device_context_mapped(domain->iommu, parent->bus->number,
1334 parent->devfn);
1335 if (!ret)
1336 return ret;
1337 parent = parent->bus->self;
1338 }
1339 if (tmp->is_pcie)
1340 return device_context_mapped(domain->iommu,
1341 tmp->subordinate->number, 0);
1342 else
1343 return device_context_mapped(domain->iommu,
1344 tmp->bus->number, tmp->devfn);
1345}
1346
1347static int
1348domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1349 u64 hpa, size_t size, int prot)
1350{
1351 u64 start_pfn, end_pfn;
1352 struct dma_pte *pte;
1353 int index;
5b6985ce
FY
1354 int addr_width = agaw_to_width(domain->agaw);
1355
1356 hpa &= (((u64)1) << addr_width) - 1;
ba395927
KA
1357
1358 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1359 return -EINVAL;
5b6985ce
FY
1360 iova &= PAGE_MASK;
1361 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1362 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
ba395927
KA
1363 index = 0;
1364 while (start_pfn < end_pfn) {
5b6985ce 1365 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
ba395927
KA
1366 if (!pte)
1367 return -ENOMEM;
1368 /* We don't need lock here, nobody else
1369 * touches the iova range
1370 */
1371 BUG_ON(dma_pte_addr(*pte));
5b6985ce 1372 dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
ba395927
KA
1373 dma_set_pte_prot(*pte, prot);
1374 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1375 start_pfn++;
1376 index++;
1377 }
1378 return 0;
1379}
1380
1381static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1382{
1383 clear_context_table(domain->iommu, bus, devfn);
a77b67d4
YS
1384 domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
1385 DMA_CCMD_GLOBAL_INVL, 0);
1386 domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
1387 DMA_TLB_GLOBAL_FLUSH, 0);
ba395927
KA
1388}
1389
1390static void domain_remove_dev_info(struct dmar_domain *domain)
1391{
1392 struct device_domain_info *info;
1393 unsigned long flags;
1394
1395 spin_lock_irqsave(&device_domain_lock, flags);
1396 while (!list_empty(&domain->devices)) {
1397 info = list_entry(domain->devices.next,
1398 struct device_domain_info, link);
1399 list_del(&info->link);
1400 list_del(&info->global);
1401 if (info->dev)
358dd8ac 1402 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1403 spin_unlock_irqrestore(&device_domain_lock, flags);
1404
1405 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1406 free_devinfo_mem(info);
1407
1408 spin_lock_irqsave(&device_domain_lock, flags);
1409 }
1410 spin_unlock_irqrestore(&device_domain_lock, flags);
1411}
1412
1413/*
1414 * find_domain
358dd8ac 1415 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1416 */
38717946 1417static struct dmar_domain *
ba395927
KA
1418find_domain(struct pci_dev *pdev)
1419{
1420 struct device_domain_info *info;
1421
1422 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1423 info = pdev->dev.archdata.iommu;
ba395927
KA
1424 if (info)
1425 return info->domain;
1426 return NULL;
1427}
1428
ba395927
KA
1429/* domain is initialized */
1430static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1431{
1432 struct dmar_domain *domain, *found = NULL;
1433 struct intel_iommu *iommu;
1434 struct dmar_drhd_unit *drhd;
1435 struct device_domain_info *info, *tmp;
1436 struct pci_dev *dev_tmp;
1437 unsigned long flags;
1438 int bus = 0, devfn = 0;
1439
1440 domain = find_domain(pdev);
1441 if (domain)
1442 return domain;
1443
1444 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1445 if (dev_tmp) {
1446 if (dev_tmp->is_pcie) {
1447 bus = dev_tmp->subordinate->number;
1448 devfn = 0;
1449 } else {
1450 bus = dev_tmp->bus->number;
1451 devfn = dev_tmp->devfn;
1452 }
1453 spin_lock_irqsave(&device_domain_lock, flags);
1454 list_for_each_entry(info, &device_domain_list, global) {
1455 if (info->bus == bus && info->devfn == devfn) {
1456 found = info->domain;
1457 break;
1458 }
1459 }
1460 spin_unlock_irqrestore(&device_domain_lock, flags);
1461 /* pcie-pci bridge already has a domain, uses it */
1462 if (found) {
1463 domain = found;
1464 goto found_domain;
1465 }
1466 }
1467
1468 /* Allocate new domain for the device */
1469 drhd = dmar_find_matched_drhd_unit(pdev);
1470 if (!drhd) {
1471 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1472 pci_name(pdev));
1473 return NULL;
1474 }
1475 iommu = drhd->iommu;
1476
1477 domain = iommu_alloc_domain(iommu);
1478 if (!domain)
1479 goto error;
1480
1481 if (domain_init(domain, gaw)) {
1482 domain_exit(domain);
1483 goto error;
1484 }
1485
1486 /* register pcie-to-pci device */
1487 if (dev_tmp) {
1488 info = alloc_devinfo_mem();
1489 if (!info) {
1490 domain_exit(domain);
1491 goto error;
1492 }
1493 info->bus = bus;
1494 info->devfn = devfn;
1495 info->dev = NULL;
1496 info->domain = domain;
1497 /* This domain is shared by devices under p2p bridge */
1498 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1499
1500 /* pcie-to-pci bridge already has a domain, uses it */
1501 found = NULL;
1502 spin_lock_irqsave(&device_domain_lock, flags);
1503 list_for_each_entry(tmp, &device_domain_list, global) {
1504 if (tmp->bus == bus && tmp->devfn == devfn) {
1505 found = tmp->domain;
1506 break;
1507 }
1508 }
1509 if (found) {
1510 free_devinfo_mem(info);
1511 domain_exit(domain);
1512 domain = found;
1513 } else {
1514 list_add(&info->link, &domain->devices);
1515 list_add(&info->global, &device_domain_list);
1516 }
1517 spin_unlock_irqrestore(&device_domain_lock, flags);
1518 }
1519
1520found_domain:
1521 info = alloc_devinfo_mem();
1522 if (!info)
1523 goto error;
1524 info->bus = pdev->bus->number;
1525 info->devfn = pdev->devfn;
1526 info->dev = pdev;
1527 info->domain = domain;
1528 spin_lock_irqsave(&device_domain_lock, flags);
1529 /* somebody is fast */
1530 found = find_domain(pdev);
1531 if (found != NULL) {
1532 spin_unlock_irqrestore(&device_domain_lock, flags);
1533 if (found != domain) {
1534 domain_exit(domain);
1535 domain = found;
1536 }
1537 free_devinfo_mem(info);
1538 return domain;
1539 }
1540 list_add(&info->link, &domain->devices);
1541 list_add(&info->global, &device_domain_list);
358dd8ac 1542 pdev->dev.archdata.iommu = info;
ba395927
KA
1543 spin_unlock_irqrestore(&device_domain_lock, flags);
1544 return domain;
1545error:
1546 /* recheck it here, maybe others set it */
1547 return find_domain(pdev);
1548}
1549
5b6985ce
FY
1550static int iommu_prepare_identity_map(struct pci_dev *pdev,
1551 unsigned long long start,
1552 unsigned long long end)
ba395927
KA
1553{
1554 struct dmar_domain *domain;
1555 unsigned long size;
5b6985ce 1556 unsigned long long base;
ba395927
KA
1557 int ret;
1558
1559 printk(KERN_INFO
1560 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1561 pci_name(pdev), start, end);
1562 /* page table init */
1563 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1564 if (!domain)
1565 return -ENOMEM;
1566
1567 /* The address might not be aligned */
5b6985ce 1568 base = start & PAGE_MASK;
ba395927 1569 size = end - base;
5b6985ce 1570 size = PAGE_ALIGN(size);
ba395927
KA
1571 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1572 IOVA_PFN(base + size) - 1)) {
1573 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1574 ret = -ENOMEM;
1575 goto error;
1576 }
1577
1578 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1579 size, base, pci_name(pdev));
1580 /*
1581 * RMRR range might have overlap with physical memory range,
1582 * clear it first
1583 */
1584 dma_pte_clear_range(domain, base, base + size);
1585
1586 ret = domain_page_mapping(domain, base, base, size,
1587 DMA_PTE_READ|DMA_PTE_WRITE);
1588 if (ret)
1589 goto error;
1590
1591 /* context entry init */
1592 ret = domain_context_mapping(domain, pdev);
1593 if (!ret)
1594 return 0;
1595error:
1596 domain_exit(domain);
1597 return ret;
1598
1599}
1600
1601static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1602 struct pci_dev *pdev)
1603{
358dd8ac 1604 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1605 return 0;
1606 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1607 rmrr->end_address + 1);
1608}
1609
e820482c 1610#ifdef CONFIG_DMAR_GFX_WA
d52d53b8
YL
1611struct iommu_prepare_data {
1612 struct pci_dev *pdev;
1613 int ret;
1614};
1615
1616static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1617 unsigned long end_pfn, void *datax)
1618{
1619 struct iommu_prepare_data *data;
1620
1621 data = (struct iommu_prepare_data *)datax;
1622
1623 data->ret = iommu_prepare_identity_map(data->pdev,
1624 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1625 return data->ret;
1626
1627}
1628
1629static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1630{
1631 int nid;
1632 struct iommu_prepare_data data;
1633
1634 data.pdev = pdev;
1635 data.ret = 0;
1636
1637 for_each_online_node(nid) {
1638 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1639 if (data.ret)
1640 return data.ret;
1641 }
1642 return data.ret;
1643}
1644
e820482c
KA
1645static void __init iommu_prepare_gfx_mapping(void)
1646{
1647 struct pci_dev *pdev = NULL;
e820482c
KA
1648 int ret;
1649
1650 for_each_pci_dev(pdev) {
358dd8ac 1651 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1652 !IS_GFX_DEVICE(pdev))
1653 continue;
1654 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1655 pci_name(pdev));
d52d53b8
YL
1656 ret = iommu_prepare_with_active_regions(pdev);
1657 if (ret)
1658 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
e820482c
KA
1659 }
1660}
1661#endif
1662
49a0429e
KA
1663#ifdef CONFIG_DMAR_FLOPPY_WA
1664static inline void iommu_prepare_isa(void)
1665{
1666 struct pci_dev *pdev;
1667 int ret;
1668
1669 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1670 if (!pdev)
1671 return;
1672
1673 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1674 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1675
1676 if (ret)
1677 printk("IOMMU: Failed to create 0-64M identity map, "
1678 "floppy might not work\n");
1679
1680}
1681#else
1682static inline void iommu_prepare_isa(void)
1683{
1684 return;
1685}
1686#endif /* !CONFIG_DMAR_FLPY_WA */
1687
519a0549 1688static int __init init_dmars(void)
ba395927
KA
1689{
1690 struct dmar_drhd_unit *drhd;
1691 struct dmar_rmrr_unit *rmrr;
1692 struct pci_dev *pdev;
1693 struct intel_iommu *iommu;
80b20dd8 1694 int i, ret, unit = 0;
ba395927
KA
1695
1696 /*
1697 * for each drhd
1698 * allocate root
1699 * initialize and program root entry to not present
1700 * endfor
1701 */
1702 for_each_drhd_unit(drhd) {
5e0d2a6f 1703 g_num_of_iommus++;
1704 /*
1705 * lock not needed as this is only incremented in the single
1706 * threaded kernel __init code path all other access are read
1707 * only
1708 */
1709 }
1710
80b20dd8 1711 deferred_flush = kzalloc(g_num_of_iommus *
1712 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1713 if (!deferred_flush) {
5e0d2a6f 1714 ret = -ENOMEM;
1715 goto error;
1716 }
1717
5e0d2a6f 1718 for_each_drhd_unit(drhd) {
1719 if (drhd->ignored)
1720 continue;
1886e8a9
SS
1721
1722 iommu = drhd->iommu;
ba395927 1723
e61d98d8
SS
1724 ret = iommu_init_domains(iommu);
1725 if (ret)
1726 goto error;
1727
ba395927
KA
1728 /*
1729 * TBD:
1730 * we could share the same root & context tables
1731 * amoung all IOMMU's. Need to Split it later.
1732 */
1733 ret = iommu_alloc_root_entry(iommu);
1734 if (ret) {
1735 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1736 goto error;
1737 }
1738 }
1739
a77b67d4
YS
1740 for_each_drhd_unit(drhd) {
1741 if (drhd->ignored)
1742 continue;
1743
1744 iommu = drhd->iommu;
1745 if (dmar_enable_qi(iommu)) {
1746 /*
1747 * Queued Invalidate not enabled, use Register Based
1748 * Invalidate
1749 */
1750 iommu->flush.flush_context = __iommu_flush_context;
1751 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1752 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
1753 "invalidation\n",
1754 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1755 } else {
1756 iommu->flush.flush_context = qi_flush_context;
1757 iommu->flush.flush_iotlb = qi_flush_iotlb;
1758 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
1759 "invalidation\n",
1760 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1761 }
1762 }
1763
ba395927
KA
1764 /*
1765 * For each rmrr
1766 * for each dev attached to rmrr
1767 * do
1768 * locate drhd for dev, alloc domain for dev
1769 * allocate free domain
1770 * allocate page table entries for rmrr
1771 * if context not allocated for bus
1772 * allocate and init context
1773 * set present in root table for this bus
1774 * init context with domain, translation etc
1775 * endfor
1776 * endfor
1777 */
1778 for_each_rmrr_units(rmrr) {
ba395927
KA
1779 for (i = 0; i < rmrr->devices_cnt; i++) {
1780 pdev = rmrr->devices[i];
1781 /* some BIOS lists non-exist devices in DMAR table */
1782 if (!pdev)
1783 continue;
1784 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1785 if (ret)
1786 printk(KERN_ERR
1787 "IOMMU: mapping reserved region failed\n");
1788 }
1789 }
1790
e820482c
KA
1791 iommu_prepare_gfx_mapping();
1792
49a0429e
KA
1793 iommu_prepare_isa();
1794
ba395927
KA
1795 /*
1796 * for each drhd
1797 * enable fault log
1798 * global invalidate context cache
1799 * global invalidate iotlb
1800 * enable translation
1801 */
1802 for_each_drhd_unit(drhd) {
1803 if (drhd->ignored)
1804 continue;
1805 iommu = drhd->iommu;
1806 sprintf (iommu->name, "dmar%d", unit++);
1807
1808 iommu_flush_write_buffer(iommu);
1809
3460a6d9
KA
1810 ret = dmar_set_interrupt(iommu);
1811 if (ret)
1812 goto error;
1813
ba395927
KA
1814 iommu_set_root_entry(iommu);
1815
a77b67d4
YS
1816 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1817 0);
1818 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1819 0);
f8bab735 1820 iommu_disable_protect_mem_regions(iommu);
1821
ba395927
KA
1822 ret = iommu_enable_translation(iommu);
1823 if (ret)
1824 goto error;
1825 }
1826
1827 return 0;
1828error:
1829 for_each_drhd_unit(drhd) {
1830 if (drhd->ignored)
1831 continue;
1832 iommu = drhd->iommu;
1833 free_iommu(iommu);
1834 }
1835 return ret;
1836}
1837
1838static inline u64 aligned_size(u64 host_addr, size_t size)
1839{
1840 u64 addr;
5b6985ce
FY
1841 addr = (host_addr & (~PAGE_MASK)) + size;
1842 return PAGE_ALIGN(addr);
ba395927
KA
1843}
1844
1845struct iova *
f76aec76 1846iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 1847{
ba395927
KA
1848 struct iova *piova;
1849
1850 /* Make sure it's in range */
ba395927 1851 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 1852 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
1853 return NULL;
1854
1855 piova = alloc_iova(&domain->iovad,
5b6985ce 1856 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
ba395927
KA
1857 return piova;
1858}
1859
f76aec76
KA
1860static struct iova *
1861__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
bb9e6d65 1862 size_t size, u64 dma_mask)
ba395927 1863{
ba395927 1864 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 1865 struct iova *iova = NULL;
ba395927 1866
bb9e6d65
FT
1867 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
1868 iova = iommu_alloc_iova(domain, size, dma_mask);
1869 else {
ba395927
KA
1870 /*
1871 * First try to allocate an io virtual address in
1872 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 1873 * from higher range
ba395927 1874 */
f76aec76 1875 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 1876 if (!iova)
bb9e6d65 1877 iova = iommu_alloc_iova(domain, size, dma_mask);
ba395927
KA
1878 }
1879
1880 if (!iova) {
1881 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
1882 return NULL;
1883 }
1884
1885 return iova;
1886}
1887
1888static struct dmar_domain *
1889get_valid_domain_for_dev(struct pci_dev *pdev)
1890{
1891 struct dmar_domain *domain;
1892 int ret;
1893
1894 domain = get_domain_for_dev(pdev,
1895 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1896 if (!domain) {
1897 printk(KERN_ERR
1898 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 1899 return NULL;
ba395927
KA
1900 }
1901
1902 /* make sure context mapping is ok */
1903 if (unlikely(!domain_context_mapped(domain, pdev))) {
1904 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
1905 if (ret) {
1906 printk(KERN_ERR
1907 "Domain context map for %s failed",
1908 pci_name(pdev));
4fe05bbc 1909 return NULL;
f76aec76 1910 }
ba395927
KA
1911 }
1912
f76aec76
KA
1913 return domain;
1914}
1915
bb9e6d65
FT
1916static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
1917 size_t size, int dir, u64 dma_mask)
f76aec76
KA
1918{
1919 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 1920 struct dmar_domain *domain;
5b6985ce 1921 phys_addr_t start_paddr;
f76aec76
KA
1922 struct iova *iova;
1923 int prot = 0;
6865f0d1 1924 int ret;
f76aec76
KA
1925
1926 BUG_ON(dir == DMA_NONE);
358dd8ac 1927 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
6865f0d1 1928 return paddr;
f76aec76
KA
1929
1930 domain = get_valid_domain_for_dev(pdev);
1931 if (!domain)
1932 return 0;
1933
6865f0d1 1934 size = aligned_size((u64)paddr, size);
f76aec76 1935
bb9e6d65 1936 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76
KA
1937 if (!iova)
1938 goto error;
1939
5b6985ce 1940 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
f76aec76 1941
ba395927
KA
1942 /*
1943 * Check if DMAR supports zero-length reads on write only
1944 * mappings..
1945 */
1946 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1947 !cap_zlr(domain->iommu->cap))
1948 prot |= DMA_PTE_READ;
1949 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1950 prot |= DMA_PTE_WRITE;
1951 /*
6865f0d1 1952 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 1953 * page. Note: if two part of one page are separately mapped, we
6865f0d1 1954 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
1955 * is not a big problem
1956 */
6865f0d1 1957 ret = domain_page_mapping(domain, start_paddr,
5b6985ce 1958 ((u64)paddr) & PAGE_MASK, size, prot);
ba395927
KA
1959 if (ret)
1960 goto error;
1961
f76aec76
KA
1962 /* it's a non-present to present mapping */
1963 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
5b6985ce 1964 start_paddr, size >> VTD_PAGE_SHIFT, 1);
f76aec76
KA
1965 if (ret)
1966 iommu_flush_write_buffer(domain->iommu);
1967
5b6985ce 1968 return start_paddr + ((u64)paddr & (~PAGE_MASK));
ba395927 1969
ba395927 1970error:
f76aec76
KA
1971 if (iova)
1972 __free_iova(&domain->iovad, iova);
ba395927 1973 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
5b6985ce 1974 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
1975 return 0;
1976}
1977
bb9e6d65
FT
1978dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
1979 size_t size, int dir)
1980{
1981 return __intel_map_single(hwdev, paddr, size, dir,
1982 to_pci_dev(hwdev)->dma_mask);
1983}
1984
5e0d2a6f 1985static void flush_unmaps(void)
1986{
80b20dd8 1987 int i, j;
5e0d2a6f 1988
5e0d2a6f 1989 timer_on = 0;
1990
1991 /* just flush them all */
1992 for (i = 0; i < g_num_of_iommus; i++) {
80b20dd8 1993 if (deferred_flush[i].next) {
c42d9f32
SS
1994 struct intel_iommu *iommu =
1995 deferred_flush[i].domain[0]->iommu;
1996
a77b67d4
YS
1997 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1998 DMA_TLB_GLOBAL_FLUSH, 0);
80b20dd8 1999 for (j = 0; j < deferred_flush[i].next; j++) {
2000 __free_iova(&deferred_flush[i].domain[j]->iovad,
2001 deferred_flush[i].iova[j]);
2002 }
2003 deferred_flush[i].next = 0;
2004 }
5e0d2a6f 2005 }
2006
5e0d2a6f 2007 list_size = 0;
5e0d2a6f 2008}
2009
2010static void flush_unmaps_timeout(unsigned long data)
2011{
80b20dd8 2012 unsigned long flags;
2013
2014 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2015 flush_unmaps();
80b20dd8 2016 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2017}
2018
2019static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2020{
2021 unsigned long flags;
80b20dd8 2022 int next, iommu_id;
5e0d2a6f 2023
2024 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2025 if (list_size == HIGH_WATER_MARK)
2026 flush_unmaps();
2027
c42d9f32
SS
2028 iommu_id = dom->iommu->seq_id;
2029
80b20dd8 2030 next = deferred_flush[iommu_id].next;
2031 deferred_flush[iommu_id].domain[next] = dom;
2032 deferred_flush[iommu_id].iova[next] = iova;
2033 deferred_flush[iommu_id].next++;
5e0d2a6f 2034
2035 if (!timer_on) {
2036 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2037 timer_on = 1;
2038 }
2039 list_size++;
2040 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2041}
2042
5b6985ce
FY
2043void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2044 int dir)
ba395927 2045{
ba395927 2046 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
2047 struct dmar_domain *domain;
2048 unsigned long start_addr;
ba395927
KA
2049 struct iova *iova;
2050
358dd8ac 2051 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 2052 return;
ba395927
KA
2053 domain = find_domain(pdev);
2054 BUG_ON(!domain);
2055
2056 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 2057 if (!iova)
ba395927 2058 return;
ba395927 2059
5b6985ce 2060 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2061 size = aligned_size((u64)dev_addr, size);
ba395927 2062
f76aec76 2063 pr_debug("Device %s unmapping: %lx@%llx\n",
5b6985ce 2064 pci_name(pdev), size, (unsigned long long)start_addr);
ba395927 2065
f76aec76
KA
2066 /* clear the whole page */
2067 dma_pte_clear_range(domain, start_addr, start_addr + size);
2068 /* free page tables */
2069 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
5e0d2a6f 2070 if (intel_iommu_strict) {
2071 if (iommu_flush_iotlb_psi(domain->iommu,
5b6985ce 2072 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
5e0d2a6f 2073 iommu_flush_write_buffer(domain->iommu);
2074 /* free iova */
2075 __free_iova(&domain->iovad, iova);
2076 } else {
2077 add_unmap(domain, iova);
2078 /*
2079 * queue up the release of the unmap to save the 1/6th of the
2080 * cpu used up by the iotlb flush operation...
2081 */
5e0d2a6f 2082 }
ba395927
KA
2083}
2084
5b6985ce
FY
2085void *intel_alloc_coherent(struct device *hwdev, size_t size,
2086 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2087{
2088 void *vaddr;
2089 int order;
2090
5b6985ce 2091 size = PAGE_ALIGN(size);
ba395927
KA
2092 order = get_order(size);
2093 flags &= ~(GFP_DMA | GFP_DMA32);
2094
2095 vaddr = (void *)__get_free_pages(flags, order);
2096 if (!vaddr)
2097 return NULL;
2098 memset(vaddr, 0, size);
2099
bb9e6d65
FT
2100 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2101 DMA_BIDIRECTIONAL,
2102 hwdev->coherent_dma_mask);
ba395927
KA
2103 if (*dma_handle)
2104 return vaddr;
2105 free_pages((unsigned long)vaddr, order);
2106 return NULL;
2107}
2108
5b6985ce
FY
2109void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2110 dma_addr_t dma_handle)
ba395927
KA
2111{
2112 int order;
2113
5b6985ce 2114 size = PAGE_ALIGN(size);
ba395927
KA
2115 order = get_order(size);
2116
2117 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2118 free_pages((unsigned long)vaddr, order);
2119}
2120
12d4d40e 2121#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
5b6985ce
FY
2122
2123void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2124 int nelems, int dir)
ba395927
KA
2125{
2126 int i;
2127 struct pci_dev *pdev = to_pci_dev(hwdev);
2128 struct dmar_domain *domain;
f76aec76
KA
2129 unsigned long start_addr;
2130 struct iova *iova;
2131 size_t size = 0;
2132 void *addr;
c03ab37c 2133 struct scatterlist *sg;
ba395927 2134
358dd8ac 2135 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2136 return;
2137
2138 domain = find_domain(pdev);
ba395927 2139
c03ab37c 2140 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2141 if (!iova)
2142 return;
c03ab37c 2143 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2144 addr = SG_ENT_VIRT_ADDRESS(sg);
2145 size += aligned_size((u64)addr, sg->length);
2146 }
2147
5b6985ce 2148 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76
KA
2149
2150 /* clear the whole page */
2151 dma_pte_clear_range(domain, start_addr, start_addr + size);
2152 /* free page tables */
2153 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2154
2155 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
5b6985ce 2156 size >> VTD_PAGE_SHIFT, 0))
ba395927 2157 iommu_flush_write_buffer(domain->iommu);
f76aec76
KA
2158
2159 /* free iova */
2160 __free_iova(&domain->iovad, iova);
ba395927
KA
2161}
2162
ba395927 2163static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2164 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2165{
2166 int i;
c03ab37c 2167 struct scatterlist *sg;
ba395927 2168
c03ab37c 2169 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2170 BUG_ON(!sg_page(sg));
c03ab37c
FT
2171 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2172 sg->dma_length = sg->length;
ba395927
KA
2173 }
2174 return nelems;
2175}
2176
5b6985ce
FY
2177int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2178 int dir)
ba395927
KA
2179{
2180 void *addr;
2181 int i;
ba395927
KA
2182 struct pci_dev *pdev = to_pci_dev(hwdev);
2183 struct dmar_domain *domain;
f76aec76
KA
2184 size_t size = 0;
2185 int prot = 0;
2186 size_t offset = 0;
2187 struct iova *iova = NULL;
2188 int ret;
c03ab37c 2189 struct scatterlist *sg;
f76aec76 2190 unsigned long start_addr;
ba395927
KA
2191
2192 BUG_ON(dir == DMA_NONE);
358dd8ac 2193 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2194 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2195
f76aec76
KA
2196 domain = get_valid_domain_for_dev(pdev);
2197 if (!domain)
2198 return 0;
2199
c03ab37c 2200 for_each_sg(sglist, sg, nelems, i) {
ba395927 2201 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2202 addr = (void *)virt_to_phys(addr);
2203 size += aligned_size((u64)addr, sg->length);
2204 }
2205
bb9e6d65 2206 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76 2207 if (!iova) {
c03ab37c 2208 sglist->dma_length = 0;
f76aec76
KA
2209 return 0;
2210 }
2211
2212 /*
2213 * Check if DMAR supports zero-length reads on write only
2214 * mappings..
2215 */
2216 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2217 !cap_zlr(domain->iommu->cap))
2218 prot |= DMA_PTE_READ;
2219 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2220 prot |= DMA_PTE_WRITE;
2221
5b6985ce 2222 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2223 offset = 0;
c03ab37c 2224 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2225 addr = SG_ENT_VIRT_ADDRESS(sg);
2226 addr = (void *)virt_to_phys(addr);
2227 size = aligned_size((u64)addr, sg->length);
2228 ret = domain_page_mapping(domain, start_addr + offset,
5b6985ce 2229 ((u64)addr) & PAGE_MASK,
f76aec76
KA
2230 size, prot);
2231 if (ret) {
2232 /* clear the page */
2233 dma_pte_clear_range(domain, start_addr,
2234 start_addr + offset);
2235 /* free page tables */
2236 dma_pte_free_pagetable(domain, start_addr,
2237 start_addr + offset);
2238 /* free iova */
2239 __free_iova(&domain->iovad, iova);
ba395927
KA
2240 return 0;
2241 }
f76aec76 2242 sg->dma_address = start_addr + offset +
5b6985ce 2243 ((u64)addr & (~PAGE_MASK));
ba395927 2244 sg->dma_length = sg->length;
f76aec76 2245 offset += size;
ba395927
KA
2246 }
2247
ba395927 2248 /* it's a non-present to present mapping */
f76aec76 2249 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
5b6985ce 2250 start_addr, offset >> VTD_PAGE_SHIFT, 1))
ba395927
KA
2251 iommu_flush_write_buffer(domain->iommu);
2252 return nelems;
2253}
2254
2255static struct dma_mapping_ops intel_dma_ops = {
2256 .alloc_coherent = intel_alloc_coherent,
2257 .free_coherent = intel_free_coherent,
2258 .map_single = intel_map_single,
2259 .unmap_single = intel_unmap_single,
2260 .map_sg = intel_map_sg,
2261 .unmap_sg = intel_unmap_sg,
2262};
2263
2264static inline int iommu_domain_cache_init(void)
2265{
2266 int ret = 0;
2267
2268 iommu_domain_cache = kmem_cache_create("iommu_domain",
2269 sizeof(struct dmar_domain),
2270 0,
2271 SLAB_HWCACHE_ALIGN,
2272
2273 NULL);
2274 if (!iommu_domain_cache) {
2275 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2276 ret = -ENOMEM;
2277 }
2278
2279 return ret;
2280}
2281
2282static inline int iommu_devinfo_cache_init(void)
2283{
2284 int ret = 0;
2285
2286 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2287 sizeof(struct device_domain_info),
2288 0,
2289 SLAB_HWCACHE_ALIGN,
ba395927
KA
2290 NULL);
2291 if (!iommu_devinfo_cache) {
2292 printk(KERN_ERR "Couldn't create devinfo cache\n");
2293 ret = -ENOMEM;
2294 }
2295
2296 return ret;
2297}
2298
2299static inline int iommu_iova_cache_init(void)
2300{
2301 int ret = 0;
2302
2303 iommu_iova_cache = kmem_cache_create("iommu_iova",
2304 sizeof(struct iova),
2305 0,
2306 SLAB_HWCACHE_ALIGN,
ba395927
KA
2307 NULL);
2308 if (!iommu_iova_cache) {
2309 printk(KERN_ERR "Couldn't create iova cache\n");
2310 ret = -ENOMEM;
2311 }
2312
2313 return ret;
2314}
2315
2316static int __init iommu_init_mempool(void)
2317{
2318 int ret;
2319 ret = iommu_iova_cache_init();
2320 if (ret)
2321 return ret;
2322
2323 ret = iommu_domain_cache_init();
2324 if (ret)
2325 goto domain_error;
2326
2327 ret = iommu_devinfo_cache_init();
2328 if (!ret)
2329 return ret;
2330
2331 kmem_cache_destroy(iommu_domain_cache);
2332domain_error:
2333 kmem_cache_destroy(iommu_iova_cache);
2334
2335 return -ENOMEM;
2336}
2337
2338static void __init iommu_exit_mempool(void)
2339{
2340 kmem_cache_destroy(iommu_devinfo_cache);
2341 kmem_cache_destroy(iommu_domain_cache);
2342 kmem_cache_destroy(iommu_iova_cache);
2343
2344}
2345
ba395927
KA
2346static void __init init_no_remapping_devices(void)
2347{
2348 struct dmar_drhd_unit *drhd;
2349
2350 for_each_drhd_unit(drhd) {
2351 if (!drhd->include_all) {
2352 int i;
2353 for (i = 0; i < drhd->devices_cnt; i++)
2354 if (drhd->devices[i] != NULL)
2355 break;
2356 /* ignore DMAR unit if no pci devices exist */
2357 if (i == drhd->devices_cnt)
2358 drhd->ignored = 1;
2359 }
2360 }
2361
2362 if (dmar_map_gfx)
2363 return;
2364
2365 for_each_drhd_unit(drhd) {
2366 int i;
2367 if (drhd->ignored || drhd->include_all)
2368 continue;
2369
2370 for (i = 0; i < drhd->devices_cnt; i++)
2371 if (drhd->devices[i] &&
2372 !IS_GFX_DEVICE(drhd->devices[i]))
2373 break;
2374
2375 if (i < drhd->devices_cnt)
2376 continue;
2377
2378 /* bypass IOMMU if it is just for gfx devices */
2379 drhd->ignored = 1;
2380 for (i = 0; i < drhd->devices_cnt; i++) {
2381 if (!drhd->devices[i])
2382 continue;
358dd8ac 2383 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2384 }
2385 }
2386}
2387
2388int __init intel_iommu_init(void)
2389{
2390 int ret = 0;
2391
ba395927
KA
2392 if (dmar_table_init())
2393 return -ENODEV;
2394
1886e8a9
SS
2395 if (dmar_dev_scope_init())
2396 return -ENODEV;
2397
2ae21010
SS
2398 /*
2399 * Check the need for DMA-remapping initialization now.
2400 * Above initialization will also be used by Interrupt-remapping.
2401 */
2402 if (no_iommu || swiotlb || dmar_disabled)
2403 return -ENODEV;
2404
ba395927
KA
2405 iommu_init_mempool();
2406 dmar_init_reserved_ranges();
2407
2408 init_no_remapping_devices();
2409
2410 ret = init_dmars();
2411 if (ret) {
2412 printk(KERN_ERR "IOMMU: dmar init failed\n");
2413 put_iova_domain(&reserved_iova_list);
2414 iommu_exit_mempool();
2415 return ret;
2416 }
2417 printk(KERN_INFO
2418 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2419
5e0d2a6f 2420 init_timer(&unmap_timer);
ba395927
KA
2421 force_iommu = 1;
2422 dma_ops = &intel_dma_ops;
2423 return 0;
2424}
e820482c 2425
38717946
KA
2426void intel_iommu_domain_exit(struct dmar_domain *domain)
2427{
2428 u64 end;
2429
2430 /* Domain 0 is reserved, so dont process it */
2431 if (!domain)
2432 return;
2433
2434 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 2435 end = end & (~VTD_PAGE_MASK);
38717946
KA
2436
2437 /* clear ptes */
2438 dma_pte_clear_range(domain, 0, end);
2439
2440 /* free page tables */
2441 dma_pte_free_pagetable(domain, 0, end);
2442
2443 iommu_free_domain(domain);
2444 free_domain_mem(domain);
2445}
2446EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2447
2448struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2449{
2450 struct dmar_drhd_unit *drhd;
2451 struct dmar_domain *domain;
2452 struct intel_iommu *iommu;
2453
2454 drhd = dmar_find_matched_drhd_unit(pdev);
2455 if (!drhd) {
2456 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2457 return NULL;
2458 }
2459
2460 iommu = drhd->iommu;
2461 if (!iommu) {
2462 printk(KERN_ERR
2463 "intel_iommu_domain_alloc: iommu == NULL\n");
2464 return NULL;
2465 }
2466 domain = iommu_alloc_domain(iommu);
2467 if (!domain) {
2468 printk(KERN_ERR
2469 "intel_iommu_domain_alloc: domain == NULL\n");
2470 return NULL;
2471 }
2472 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2473 printk(KERN_ERR
2474 "intel_iommu_domain_alloc: domain_init() failed\n");
2475 intel_iommu_domain_exit(domain);
2476 return NULL;
2477 }
2478 return domain;
2479}
2480EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2481
2482int intel_iommu_context_mapping(
2483 struct dmar_domain *domain, struct pci_dev *pdev)
2484{
2485 int rc;
2486 rc = domain_context_mapping(domain, pdev);
2487 return rc;
2488}
2489EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2490
2491int intel_iommu_page_mapping(
2492 struct dmar_domain *domain, dma_addr_t iova,
2493 u64 hpa, size_t size, int prot)
2494{
2495 int rc;
2496 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2497 return rc;
2498}
2499EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2500
2501void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2502{
2503 detach_domain_for_dev(domain, bus, devfn);
2504}
2505EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2506
2507struct dmar_domain *
2508intel_iommu_find_domain(struct pci_dev *pdev)
2509{
2510 return find_domain(pdev);
2511}
2512EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2513
2514int intel_iommu_found(void)
2515{
2516 return g_num_of_iommus;
2517}
2518EXPORT_SYMBOL_GPL(intel_iommu_found);
2519
2520u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2521{
2522 struct dma_pte *pte;
2523 u64 pfn;
2524
2525 pfn = 0;
2526 pte = addr_to_dma_pte(domain, iova);
2527
2528 if (pte)
2529 pfn = dma_pte_addr(*pte);
2530
5b6985ce 2531 return pfn >> VTD_PAGE_SHIFT;
38717946
KA
2532}
2533EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);