]> git.ipfire.org Git - people/arne_f/kernel.git/blame - drivers/pci/intel-iommu.c
x64, x2apic/intr-remap: fix the need for sequential array allocation of iommus
[people/arne_f/kernel.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
ba395927
KA
21 */
22
23#include <linux/init.h>
24#include <linux/bitmap.h>
5e0d2a6f 25#include <linux/debugfs.h>
ba395927
KA
26#include <linux/slab.h>
27#include <linux/irq.h>
28#include <linux/interrupt.h>
29#include <linux/sysdev.h>
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
ba395927
KA
36#include "iova.h"
37#include "intel-iommu.h"
38#include <asm/proto.h> /* force_iommu in this header in x86-64*/
39#include <asm/cacheflush.h>
395624fc 40#include <asm/gart.h>
ba395927
KA
41#include "pci.h"
42
43#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
44#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
45
46#define IOAPIC_RANGE_START (0xfee00000)
47#define IOAPIC_RANGE_END (0xfeefffff)
48#define IOVA_START_ADDR (0x1000)
49
50#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
51
a7eb08c2 52#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
ba395927
KA
53
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
5e0d2a6f 56
57static void flush_unmaps_timeout(unsigned long data);
58
59DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
60
80b20dd8 61#define HIGH_WATER_MARK 250
62struct deferred_flush_tables {
63 int next;
64 struct iova *iova[HIGH_WATER_MARK];
65 struct dmar_domain *domain[HIGH_WATER_MARK];
66};
67
68static struct deferred_flush_tables *deferred_flush;
69
5e0d2a6f 70/* bitmap for indexing intel_iommus */
5e0d2a6f 71static int g_num_of_iommus;
72
73static DEFINE_SPINLOCK(async_umap_flush_lock);
74static LIST_HEAD(unmaps_to_do);
75
76static int timer_on;
77static long list_size;
5e0d2a6f 78
ba395927
KA
79static void domain_remove_dev_info(struct dmar_domain *domain);
80
81static int dmar_disabled;
82static int __initdata dmar_map_gfx = 1;
7d3b03ce 83static int dmar_forcedac;
5e0d2a6f 84static int intel_iommu_strict;
ba395927
KA
85
86#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
87static DEFINE_SPINLOCK(device_domain_lock);
88static LIST_HEAD(device_domain_list);
89
90static int __init intel_iommu_setup(char *str)
91{
92 if (!str)
93 return -EINVAL;
94 while (*str) {
95 if (!strncmp(str, "off", 3)) {
96 dmar_disabled = 1;
97 printk(KERN_INFO"Intel-IOMMU: disabled\n");
98 } else if (!strncmp(str, "igfx_off", 8)) {
99 dmar_map_gfx = 0;
100 printk(KERN_INFO
101 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 102 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 103 printk(KERN_INFO
7d3b03ce
KA
104 "Intel-IOMMU: Forcing DAC for PCI devices\n");
105 dmar_forcedac = 1;
5e0d2a6f 106 } else if (!strncmp(str, "strict", 6)) {
107 printk(KERN_INFO
108 "Intel-IOMMU: disable batched IOTLB flush\n");
109 intel_iommu_strict = 1;
ba395927
KA
110 }
111
112 str += strcspn(str, ",");
113 while (*str == ',')
114 str++;
115 }
116 return 0;
117}
118__setup("intel_iommu=", intel_iommu_setup);
119
120static struct kmem_cache *iommu_domain_cache;
121static struct kmem_cache *iommu_devinfo_cache;
122static struct kmem_cache *iommu_iova_cache;
123
eb3fa7cb
KA
124static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
125{
126 unsigned int flags;
127 void *vaddr;
128
129 /* trying to avoid low memory issues */
130 flags = current->flags & PF_MEMALLOC;
131 current->flags |= PF_MEMALLOC;
132 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
133 current->flags &= (~PF_MEMALLOC | flags);
134 return vaddr;
135}
136
137
ba395927
KA
138static inline void *alloc_pgtable_page(void)
139{
eb3fa7cb
KA
140 unsigned int flags;
141 void *vaddr;
142
143 /* trying to avoid low memory issues */
144 flags = current->flags & PF_MEMALLOC;
145 current->flags |= PF_MEMALLOC;
146 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
147 current->flags &= (~PF_MEMALLOC | flags);
148 return vaddr;
ba395927
KA
149}
150
151static inline void free_pgtable_page(void *vaddr)
152{
153 free_page((unsigned long)vaddr);
154}
155
156static inline void *alloc_domain_mem(void)
157{
eb3fa7cb 158 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
159}
160
161static inline void free_domain_mem(void *vaddr)
162{
163 kmem_cache_free(iommu_domain_cache, vaddr);
164}
165
166static inline void * alloc_devinfo_mem(void)
167{
eb3fa7cb 168 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
169}
170
171static inline void free_devinfo_mem(void *vaddr)
172{
173 kmem_cache_free(iommu_devinfo_cache, vaddr);
174}
175
176struct iova *alloc_iova_mem(void)
177{
eb3fa7cb 178 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
179}
180
181void free_iova_mem(struct iova *iova)
182{
183 kmem_cache_free(iommu_iova_cache, iova);
184}
185
186static inline void __iommu_flush_cache(
187 struct intel_iommu *iommu, void *addr, int size)
188{
189 if (!ecap_coherent(iommu->ecap))
190 clflush_cache_range(addr, size);
191}
192
193/* Gets context entry for a given bus and devfn */
194static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
195 u8 bus, u8 devfn)
196{
197 struct root_entry *root;
198 struct context_entry *context;
199 unsigned long phy_addr;
200 unsigned long flags;
201
202 spin_lock_irqsave(&iommu->lock, flags);
203 root = &iommu->root_entry[bus];
204 context = get_context_addr_from_root(root);
205 if (!context) {
206 context = (struct context_entry *)alloc_pgtable_page();
207 if (!context) {
208 spin_unlock_irqrestore(&iommu->lock, flags);
209 return NULL;
210 }
211 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
212 phy_addr = virt_to_phys((void *)context);
213 set_root_value(root, phy_addr);
214 set_root_present(root);
215 __iommu_flush_cache(iommu, root, sizeof(*root));
216 }
217 spin_unlock_irqrestore(&iommu->lock, flags);
218 return &context[devfn];
219}
220
221static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
222{
223 struct root_entry *root;
224 struct context_entry *context;
225 int ret;
226 unsigned long flags;
227
228 spin_lock_irqsave(&iommu->lock, flags);
229 root = &iommu->root_entry[bus];
230 context = get_context_addr_from_root(root);
231 if (!context) {
232 ret = 0;
233 goto out;
234 }
235 ret = context_present(context[devfn]);
236out:
237 spin_unlock_irqrestore(&iommu->lock, flags);
238 return ret;
239}
240
241static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
242{
243 struct root_entry *root;
244 struct context_entry *context;
245 unsigned long flags;
246
247 spin_lock_irqsave(&iommu->lock, flags);
248 root = &iommu->root_entry[bus];
249 context = get_context_addr_from_root(root);
250 if (context) {
251 context_clear_entry(context[devfn]);
252 __iommu_flush_cache(iommu, &context[devfn], \
253 sizeof(*context));
254 }
255 spin_unlock_irqrestore(&iommu->lock, flags);
256}
257
258static void free_context_table(struct intel_iommu *iommu)
259{
260 struct root_entry *root;
261 int i;
262 unsigned long flags;
263 struct context_entry *context;
264
265 spin_lock_irqsave(&iommu->lock, flags);
266 if (!iommu->root_entry) {
267 goto out;
268 }
269 for (i = 0; i < ROOT_ENTRY_NR; i++) {
270 root = &iommu->root_entry[i];
271 context = get_context_addr_from_root(root);
272 if (context)
273 free_pgtable_page(context);
274 }
275 free_pgtable_page(iommu->root_entry);
276 iommu->root_entry = NULL;
277out:
278 spin_unlock_irqrestore(&iommu->lock, flags);
279}
280
281/* page table handling */
282#define LEVEL_STRIDE (9)
283#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
284
285static inline int agaw_to_level(int agaw)
286{
287 return agaw + 2;
288}
289
290static inline int agaw_to_width(int agaw)
291{
292 return 30 + agaw * LEVEL_STRIDE;
293
294}
295
296static inline int width_to_agaw(int width)
297{
298 return (width - 30) / LEVEL_STRIDE;
299}
300
301static inline unsigned int level_to_offset_bits(int level)
302{
303 return (12 + (level - 1) * LEVEL_STRIDE);
304}
305
306static inline int address_level_offset(u64 addr, int level)
307{
308 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
309}
310
311static inline u64 level_mask(int level)
312{
313 return ((u64)-1 << level_to_offset_bits(level));
314}
315
316static inline u64 level_size(int level)
317{
318 return ((u64)1 << level_to_offset_bits(level));
319}
320
321static inline u64 align_to_level(u64 addr, int level)
322{
323 return ((addr + level_size(level) - 1) & level_mask(level));
324}
325
326static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
327{
328 int addr_width = agaw_to_width(domain->agaw);
329 struct dma_pte *parent, *pte = NULL;
330 int level = agaw_to_level(domain->agaw);
331 int offset;
332 unsigned long flags;
333
334 BUG_ON(!domain->pgd);
335
336 addr &= (((u64)1) << addr_width) - 1;
337 parent = domain->pgd;
338
339 spin_lock_irqsave(&domain->mapping_lock, flags);
340 while (level > 0) {
341 void *tmp_page;
342
343 offset = address_level_offset(addr, level);
344 pte = &parent[offset];
345 if (level == 1)
346 break;
347
348 if (!dma_pte_present(*pte)) {
349 tmp_page = alloc_pgtable_page();
350
351 if (!tmp_page) {
352 spin_unlock_irqrestore(&domain->mapping_lock,
353 flags);
354 return NULL;
355 }
356 __iommu_flush_cache(domain->iommu, tmp_page,
357 PAGE_SIZE_4K);
358 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
359 /*
360 * high level table always sets r/w, last level page
361 * table control read/write
362 */
363 dma_set_pte_readable(*pte);
364 dma_set_pte_writable(*pte);
365 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
366 }
367 parent = phys_to_virt(dma_pte_addr(*pte));
368 level--;
369 }
370
371 spin_unlock_irqrestore(&domain->mapping_lock, flags);
372 return pte;
373}
374
375/* return address's pte at specific level */
376static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
377 int level)
378{
379 struct dma_pte *parent, *pte = NULL;
380 int total = agaw_to_level(domain->agaw);
381 int offset;
382
383 parent = domain->pgd;
384 while (level <= total) {
385 offset = address_level_offset(addr, total);
386 pte = &parent[offset];
387 if (level == total)
388 return pte;
389
390 if (!dma_pte_present(*pte))
391 break;
392 parent = phys_to_virt(dma_pte_addr(*pte));
393 total--;
394 }
395 return NULL;
396}
397
398/* clear one page's page table */
399static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
400{
401 struct dma_pte *pte = NULL;
402
403 /* get last level pte */
404 pte = dma_addr_level_pte(domain, addr, 1);
405
406 if (pte) {
407 dma_clear_pte(*pte);
408 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
409 }
410}
411
412/* clear last level pte, a tlb flush should be followed */
413static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
414{
415 int addr_width = agaw_to_width(domain->agaw);
416
417 start &= (((u64)1) << addr_width) - 1;
418 end &= (((u64)1) << addr_width) - 1;
419 /* in case it's partial page */
420 start = PAGE_ALIGN_4K(start);
421 end &= PAGE_MASK_4K;
422
423 /* we don't need lock here, nobody else touches the iova range */
424 while (start < end) {
425 dma_pte_clear_one(domain, start);
426 start += PAGE_SIZE_4K;
427 }
428}
429
430/* free page table pages. last level pte should already be cleared */
431static void dma_pte_free_pagetable(struct dmar_domain *domain,
432 u64 start, u64 end)
433{
434 int addr_width = agaw_to_width(domain->agaw);
435 struct dma_pte *pte;
436 int total = agaw_to_level(domain->agaw);
437 int level;
438 u64 tmp;
439
440 start &= (((u64)1) << addr_width) - 1;
441 end &= (((u64)1) << addr_width) - 1;
442
443 /* we don't need lock here, nobody else touches the iova range */
444 level = 2;
445 while (level <= total) {
446 tmp = align_to_level(start, level);
447 if (tmp >= end || (tmp + level_size(level) > end))
448 return;
449
450 while (tmp < end) {
451 pte = dma_addr_level_pte(domain, tmp, level);
452 if (pte) {
453 free_pgtable_page(
454 phys_to_virt(dma_pte_addr(*pte)));
455 dma_clear_pte(*pte);
456 __iommu_flush_cache(domain->iommu,
457 pte, sizeof(*pte));
458 }
459 tmp += level_size(level);
460 }
461 level++;
462 }
463 /* free pgd */
464 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
465 free_pgtable_page(domain->pgd);
466 domain->pgd = NULL;
467 }
468}
469
470/* iommu handling */
471static int iommu_alloc_root_entry(struct intel_iommu *iommu)
472{
473 struct root_entry *root;
474 unsigned long flags;
475
476 root = (struct root_entry *)alloc_pgtable_page();
477 if (!root)
478 return -ENOMEM;
479
480 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
481
482 spin_lock_irqsave(&iommu->lock, flags);
483 iommu->root_entry = root;
484 spin_unlock_irqrestore(&iommu->lock, flags);
485
486 return 0;
487}
488
489#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
490{\
a7eb08c2 491 cycles_t start_time = get_cycles();\
ba395927
KA
492 while (1) {\
493 sts = op (iommu->reg + offset);\
494 if (cond)\
495 break;\
a7eb08c2 496 if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
ba395927
KA
497 panic("DMAR hardware is malfunctioning\n");\
498 cpu_relax();\
499 }\
500}
501
502static void iommu_set_root_entry(struct intel_iommu *iommu)
503{
504 void *addr;
505 u32 cmd, sts;
506 unsigned long flag;
507
508 addr = iommu->root_entry;
509
510 spin_lock_irqsave(&iommu->register_lock, flag);
511 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
512
513 cmd = iommu->gcmd | DMA_GCMD_SRTP;
514 writel(cmd, iommu->reg + DMAR_GCMD_REG);
515
516 /* Make sure hardware complete it */
517 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
518 readl, (sts & DMA_GSTS_RTPS), sts);
519
520 spin_unlock_irqrestore(&iommu->register_lock, flag);
521}
522
523static void iommu_flush_write_buffer(struct intel_iommu *iommu)
524{
525 u32 val;
526 unsigned long flag;
527
528 if (!cap_rwbf(iommu->cap))
529 return;
530 val = iommu->gcmd | DMA_GCMD_WBF;
531
532 spin_lock_irqsave(&iommu->register_lock, flag);
533 writel(val, iommu->reg + DMAR_GCMD_REG);
534
535 /* Make sure hardware complete it */
536 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
537 readl, (!(val & DMA_GSTS_WBFS)), val);
538
539 spin_unlock_irqrestore(&iommu->register_lock, flag);
540}
541
542/* return value determine if we need a write buffer flush */
543static int __iommu_flush_context(struct intel_iommu *iommu,
544 u16 did, u16 source_id, u8 function_mask, u64 type,
545 int non_present_entry_flush)
546{
547 u64 val = 0;
548 unsigned long flag;
549
550 /*
551 * In the non-present entry flush case, if hardware doesn't cache
552 * non-present entry we do nothing and if hardware cache non-present
553 * entry, we flush entries of domain 0 (the domain id is used to cache
554 * any non-present entries)
555 */
556 if (non_present_entry_flush) {
557 if (!cap_caching_mode(iommu->cap))
558 return 1;
559 else
560 did = 0;
561 }
562
563 switch (type) {
564 case DMA_CCMD_GLOBAL_INVL:
565 val = DMA_CCMD_GLOBAL_INVL;
566 break;
567 case DMA_CCMD_DOMAIN_INVL:
568 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
569 break;
570 case DMA_CCMD_DEVICE_INVL:
571 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
572 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
573 break;
574 default:
575 BUG();
576 }
577 val |= DMA_CCMD_ICC;
578
579 spin_lock_irqsave(&iommu->register_lock, flag);
580 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
581
582 /* Make sure hardware complete it */
583 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
584 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
585
586 spin_unlock_irqrestore(&iommu->register_lock, flag);
587
588 /* flush context entry will implictly flush write buffer */
589 return 0;
590}
591
592static int inline iommu_flush_context_global(struct intel_iommu *iommu,
593 int non_present_entry_flush)
594{
595 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
596 non_present_entry_flush);
597}
598
599static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
600 int non_present_entry_flush)
601{
602 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
603 non_present_entry_flush);
604}
605
606static int inline iommu_flush_context_device(struct intel_iommu *iommu,
607 u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
608{
609 return __iommu_flush_context(iommu, did, source_id, function_mask,
610 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
611}
612
613/* return value determine if we need a write buffer flush */
614static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
615 u64 addr, unsigned int size_order, u64 type,
616 int non_present_entry_flush)
617{
618 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
619 u64 val = 0, val_iva = 0;
620 unsigned long flag;
621
622 /*
623 * In the non-present entry flush case, if hardware doesn't cache
624 * non-present entry we do nothing and if hardware cache non-present
625 * entry, we flush entries of domain 0 (the domain id is used to cache
626 * any non-present entries)
627 */
628 if (non_present_entry_flush) {
629 if (!cap_caching_mode(iommu->cap))
630 return 1;
631 else
632 did = 0;
633 }
634
635 switch (type) {
636 case DMA_TLB_GLOBAL_FLUSH:
637 /* global flush doesn't need set IVA_REG */
638 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
639 break;
640 case DMA_TLB_DSI_FLUSH:
641 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
642 break;
643 case DMA_TLB_PSI_FLUSH:
644 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
645 /* Note: always flush non-leaf currently */
646 val_iva = size_order | addr;
647 break;
648 default:
649 BUG();
650 }
651 /* Note: set drain read/write */
652#if 0
653 /*
654 * This is probably to be super secure.. Looks like we can
655 * ignore it without any impact.
656 */
657 if (cap_read_drain(iommu->cap))
658 val |= DMA_TLB_READ_DRAIN;
659#endif
660 if (cap_write_drain(iommu->cap))
661 val |= DMA_TLB_WRITE_DRAIN;
662
663 spin_lock_irqsave(&iommu->register_lock, flag);
664 /* Note: Only uses first TLB reg currently */
665 if (val_iva)
666 dmar_writeq(iommu->reg + tlb_offset, val_iva);
667 dmar_writeq(iommu->reg + tlb_offset + 8, val);
668
669 /* Make sure hardware complete it */
670 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
671 dmar_readq, (!(val & DMA_TLB_IVT)), val);
672
673 spin_unlock_irqrestore(&iommu->register_lock, flag);
674
675 /* check IOTLB invalidation granularity */
676 if (DMA_TLB_IAIG(val) == 0)
677 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
678 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
679 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
680 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
681 /* flush context entry will implictly flush write buffer */
682 return 0;
683}
684
685static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
686 int non_present_entry_flush)
687{
688 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
689 non_present_entry_flush);
690}
691
692static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
693 int non_present_entry_flush)
694{
695 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
696 non_present_entry_flush);
697}
698
ba395927
KA
699static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
700 u64 addr, unsigned int pages, int non_present_entry_flush)
701{
f76aec76 702 unsigned int mask;
ba395927
KA
703
704 BUG_ON(addr & (~PAGE_MASK_4K));
705 BUG_ON(pages == 0);
706
707 /* Fallback to domain selective flush if no PSI support */
708 if (!cap_pgsel_inv(iommu->cap))
709 return iommu_flush_iotlb_dsi(iommu, did,
710 non_present_entry_flush);
711
712 /*
713 * PSI requires page size to be 2 ^ x, and the base address is naturally
714 * aligned to the size
715 */
f76aec76 716 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 717 /* Fallback to domain selective flush if size is too big */
f76aec76 718 if (mask > cap_max_amask_val(iommu->cap))
ba395927
KA
719 return iommu_flush_iotlb_dsi(iommu, did,
720 non_present_entry_flush);
721
f76aec76 722 return __iommu_flush_iotlb(iommu, did, addr, mask,
ba395927
KA
723 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
724}
725
f8bab735 726static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
727{
728 u32 pmen;
729 unsigned long flags;
730
731 spin_lock_irqsave(&iommu->register_lock, flags);
732 pmen = readl(iommu->reg + DMAR_PMEN_REG);
733 pmen &= ~DMA_PMEN_EPM;
734 writel(pmen, iommu->reg + DMAR_PMEN_REG);
735
736 /* wait for the protected region status bit to clear */
737 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
738 readl, !(pmen & DMA_PMEN_PRS), pmen);
739
740 spin_unlock_irqrestore(&iommu->register_lock, flags);
741}
742
ba395927
KA
743static int iommu_enable_translation(struct intel_iommu *iommu)
744{
745 u32 sts;
746 unsigned long flags;
747
748 spin_lock_irqsave(&iommu->register_lock, flags);
749 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
750
751 /* Make sure hardware complete it */
752 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
753 readl, (sts & DMA_GSTS_TES), sts);
754
755 iommu->gcmd |= DMA_GCMD_TE;
756 spin_unlock_irqrestore(&iommu->register_lock, flags);
757 return 0;
758}
759
760static int iommu_disable_translation(struct intel_iommu *iommu)
761{
762 u32 sts;
763 unsigned long flag;
764
765 spin_lock_irqsave(&iommu->register_lock, flag);
766 iommu->gcmd &= ~DMA_GCMD_TE;
767 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
768
769 /* Make sure hardware complete it */
770 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
771 readl, (!(sts & DMA_GSTS_TES)), sts);
772
773 spin_unlock_irqrestore(&iommu->register_lock, flag);
774 return 0;
775}
776
3460a6d9
KA
777/* iommu interrupt handling. Most stuff are MSI-like. */
778
d94afc6c 779static const char *fault_reason_strings[] =
3460a6d9
KA
780{
781 "Software",
782 "Present bit in root entry is clear",
783 "Present bit in context entry is clear",
784 "Invalid context entry",
785 "Access beyond MGAW",
786 "PTE Write access is not set",
787 "PTE Read access is not set",
788 "Next page table ptr is invalid",
789 "Root table address invalid",
790 "Context table ptr is invalid",
791 "non-zero reserved fields in RTP",
792 "non-zero reserved fields in CTP",
793 "non-zero reserved fields in PTE",
3460a6d9 794};
f8bab735 795#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 796
d94afc6c 797const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 798{
d94afc6c 799 if (fault_reason > MAX_FAULT_REASON_IDX)
800 return "Unknown";
3460a6d9
KA
801 else
802 return fault_reason_strings[fault_reason];
803}
804
805void dmar_msi_unmask(unsigned int irq)
806{
807 struct intel_iommu *iommu = get_irq_data(irq);
808 unsigned long flag;
809
810 /* unmask it */
811 spin_lock_irqsave(&iommu->register_lock, flag);
812 writel(0, iommu->reg + DMAR_FECTL_REG);
813 /* Read a reg to force flush the post write */
814 readl(iommu->reg + DMAR_FECTL_REG);
815 spin_unlock_irqrestore(&iommu->register_lock, flag);
816}
817
818void dmar_msi_mask(unsigned int irq)
819{
820 unsigned long flag;
821 struct intel_iommu *iommu = get_irq_data(irq);
822
823 /* mask it */
824 spin_lock_irqsave(&iommu->register_lock, flag);
825 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
826 /* Read a reg to force flush the post write */
827 readl(iommu->reg + DMAR_FECTL_REG);
828 spin_unlock_irqrestore(&iommu->register_lock, flag);
829}
830
831void dmar_msi_write(int irq, struct msi_msg *msg)
832{
833 struct intel_iommu *iommu = get_irq_data(irq);
834 unsigned long flag;
835
836 spin_lock_irqsave(&iommu->register_lock, flag);
837 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
838 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
839 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
840 spin_unlock_irqrestore(&iommu->register_lock, flag);
841}
842
843void dmar_msi_read(int irq, struct msi_msg *msg)
844{
845 struct intel_iommu *iommu = get_irq_data(irq);
846 unsigned long flag;
847
848 spin_lock_irqsave(&iommu->register_lock, flag);
849 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
850 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
851 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
852 spin_unlock_irqrestore(&iommu->register_lock, flag);
853}
854
855static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
856 u8 fault_reason, u16 source_id, u64 addr)
857{
d94afc6c 858 const char *reason;
3460a6d9
KA
859
860 reason = dmar_get_fault_reason(fault_reason);
861
862 printk(KERN_ERR
863 "DMAR:[%s] Request device [%02x:%02x.%d] "
864 "fault addr %llx \n"
865 "DMAR:[fault reason %02d] %s\n",
866 (type ? "DMA Read" : "DMA Write"),
867 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
868 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
869 return 0;
870}
871
872#define PRIMARY_FAULT_REG_LEN (16)
873static irqreturn_t iommu_page_fault(int irq, void *dev_id)
874{
875 struct intel_iommu *iommu = dev_id;
876 int reg, fault_index;
877 u32 fault_status;
878 unsigned long flag;
879
880 spin_lock_irqsave(&iommu->register_lock, flag);
881 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
882
883 /* TBD: ignore advanced fault log currently */
884 if (!(fault_status & DMA_FSTS_PPF))
885 goto clear_overflow;
886
887 fault_index = dma_fsts_fault_record_index(fault_status);
888 reg = cap_fault_reg_offset(iommu->cap);
889 while (1) {
890 u8 fault_reason;
891 u16 source_id;
892 u64 guest_addr;
893 int type;
894 u32 data;
895
896 /* highest 32 bits */
897 data = readl(iommu->reg + reg +
898 fault_index * PRIMARY_FAULT_REG_LEN + 12);
899 if (!(data & DMA_FRCD_F))
900 break;
901
902 fault_reason = dma_frcd_fault_reason(data);
903 type = dma_frcd_type(data);
904
905 data = readl(iommu->reg + reg +
906 fault_index * PRIMARY_FAULT_REG_LEN + 8);
907 source_id = dma_frcd_source_id(data);
908
909 guest_addr = dmar_readq(iommu->reg + reg +
910 fault_index * PRIMARY_FAULT_REG_LEN);
911 guest_addr = dma_frcd_page_addr(guest_addr);
912 /* clear the fault */
913 writel(DMA_FRCD_F, iommu->reg + reg +
914 fault_index * PRIMARY_FAULT_REG_LEN + 12);
915
916 spin_unlock_irqrestore(&iommu->register_lock, flag);
917
918 iommu_page_fault_do_one(iommu, type, fault_reason,
919 source_id, guest_addr);
920
921 fault_index++;
922 if (fault_index > cap_num_fault_regs(iommu->cap))
923 fault_index = 0;
924 spin_lock_irqsave(&iommu->register_lock, flag);
925 }
926clear_overflow:
927 /* clear primary fault overflow */
928 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
929 if (fault_status & DMA_FSTS_PFO)
930 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
931
932 spin_unlock_irqrestore(&iommu->register_lock, flag);
933 return IRQ_HANDLED;
934}
935
936int dmar_set_interrupt(struct intel_iommu *iommu)
937{
938 int irq, ret;
939
940 irq = create_irq();
941 if (!irq) {
942 printk(KERN_ERR "IOMMU: no free vectors\n");
943 return -EINVAL;
944 }
945
946 set_irq_data(irq, iommu);
947 iommu->irq = irq;
948
949 ret = arch_setup_dmar_msi(irq);
950 if (ret) {
951 set_irq_data(irq, NULL);
952 iommu->irq = 0;
953 destroy_irq(irq);
954 return 0;
955 }
956
957 /* Force fault register is cleared */
958 iommu_page_fault(irq, iommu);
959
960 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
961 if (ret)
962 printk(KERN_ERR "IOMMU: can't request irq\n");
963 return ret;
964}
965
ba395927
KA
966static int iommu_init_domains(struct intel_iommu *iommu)
967{
968 unsigned long ndomains;
969 unsigned long nlongs;
970
971 ndomains = cap_ndoms(iommu->cap);
972 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
973 nlongs = BITS_TO_LONGS(ndomains);
974
975 /* TBD: there might be 64K domains,
976 * consider other allocation for future chip
977 */
978 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
979 if (!iommu->domain_ids) {
980 printk(KERN_ERR "Allocating domain id array failed\n");
981 return -ENOMEM;
982 }
983 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
984 GFP_KERNEL);
985 if (!iommu->domains) {
986 printk(KERN_ERR "Allocating domain array failed\n");
987 kfree(iommu->domain_ids);
988 return -ENOMEM;
989 }
990
e61d98d8
SS
991 spin_lock_init(&iommu->lock);
992
ba395927
KA
993 /*
994 * if Caching mode is set, then invalid translations are tagged
995 * with domainid 0. Hence we need to pre-allocate it.
996 */
997 if (cap_caching_mode(iommu->cap))
998 set_bit(0, iommu->domain_ids);
999 return 0;
1000}
ba395927 1001
ba395927
KA
1002
1003static void domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1004
1005void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1006{
1007 struct dmar_domain *domain;
1008 int i;
1009
ba395927
KA
1010 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1011 for (; i < cap_ndoms(iommu->cap); ) {
1012 domain = iommu->domains[i];
1013 clear_bit(i, iommu->domain_ids);
1014 domain_exit(domain);
1015 i = find_next_bit(iommu->domain_ids,
1016 cap_ndoms(iommu->cap), i+1);
1017 }
1018
1019 if (iommu->gcmd & DMA_GCMD_TE)
1020 iommu_disable_translation(iommu);
1021
1022 if (iommu->irq) {
1023 set_irq_data(iommu->irq, NULL);
1024 /* This will mask the irq */
1025 free_irq(iommu->irq, iommu);
1026 destroy_irq(iommu->irq);
1027 }
1028
1029 kfree(iommu->domains);
1030 kfree(iommu->domain_ids);
1031
1032 /* free context mapping */
1033 free_context_table(iommu);
ba395927
KA
1034}
1035
1036static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1037{
1038 unsigned long num;
1039 unsigned long ndomains;
1040 struct dmar_domain *domain;
1041 unsigned long flags;
1042
1043 domain = alloc_domain_mem();
1044 if (!domain)
1045 return NULL;
1046
1047 ndomains = cap_ndoms(iommu->cap);
1048
1049 spin_lock_irqsave(&iommu->lock, flags);
1050 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1051 if (num >= ndomains) {
1052 spin_unlock_irqrestore(&iommu->lock, flags);
1053 free_domain_mem(domain);
1054 printk(KERN_ERR "IOMMU: no free domain ids\n");
1055 return NULL;
1056 }
1057
1058 set_bit(num, iommu->domain_ids);
1059 domain->id = num;
1060 domain->iommu = iommu;
1061 iommu->domains[num] = domain;
1062 spin_unlock_irqrestore(&iommu->lock, flags);
1063
1064 return domain;
1065}
1066
1067static void iommu_free_domain(struct dmar_domain *domain)
1068{
1069 unsigned long flags;
1070
1071 spin_lock_irqsave(&domain->iommu->lock, flags);
1072 clear_bit(domain->id, domain->iommu->domain_ids);
1073 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1074}
1075
1076static struct iova_domain reserved_iova_list;
8a443df4
MG
1077static struct lock_class_key reserved_alloc_key;
1078static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1079
1080static void dmar_init_reserved_ranges(void)
1081{
1082 struct pci_dev *pdev = NULL;
1083 struct iova *iova;
1084 int i;
1085 u64 addr, size;
1086
f661197e 1087 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1088
8a443df4
MG
1089 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1090 &reserved_alloc_key);
1091 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1092 &reserved_rbtree_key);
1093
ba395927
KA
1094 /* IOAPIC ranges shouldn't be accessed by DMA */
1095 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1096 IOVA_PFN(IOAPIC_RANGE_END));
1097 if (!iova)
1098 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1099
1100 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1101 for_each_pci_dev(pdev) {
1102 struct resource *r;
1103
1104 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1105 r = &pdev->resource[i];
1106 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1107 continue;
1108 addr = r->start;
1109 addr &= PAGE_MASK_4K;
1110 size = r->end - addr;
1111 size = PAGE_ALIGN_4K(size);
1112 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1113 IOVA_PFN(size + addr) - 1);
1114 if (!iova)
1115 printk(KERN_ERR "Reserve iova failed\n");
1116 }
1117 }
1118
1119}
1120
1121static void domain_reserve_special_ranges(struct dmar_domain *domain)
1122{
1123 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1124}
1125
1126static inline int guestwidth_to_adjustwidth(int gaw)
1127{
1128 int agaw;
1129 int r = (gaw - 12) % 9;
1130
1131 if (r == 0)
1132 agaw = gaw;
1133 else
1134 agaw = gaw + 9 - r;
1135 if (agaw > 64)
1136 agaw = 64;
1137 return agaw;
1138}
1139
1140static int domain_init(struct dmar_domain *domain, int guest_width)
1141{
1142 struct intel_iommu *iommu;
1143 int adjust_width, agaw;
1144 unsigned long sagaw;
1145
f661197e 1146 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1147 spin_lock_init(&domain->mapping_lock);
1148
1149 domain_reserve_special_ranges(domain);
1150
1151 /* calculate AGAW */
1152 iommu = domain->iommu;
1153 if (guest_width > cap_mgaw(iommu->cap))
1154 guest_width = cap_mgaw(iommu->cap);
1155 domain->gaw = guest_width;
1156 adjust_width = guestwidth_to_adjustwidth(guest_width);
1157 agaw = width_to_agaw(adjust_width);
1158 sagaw = cap_sagaw(iommu->cap);
1159 if (!test_bit(agaw, &sagaw)) {
1160 /* hardware doesn't support it, choose a bigger one */
1161 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1162 agaw = find_next_bit(&sagaw, 5, agaw);
1163 if (agaw >= 5)
1164 return -ENODEV;
1165 }
1166 domain->agaw = agaw;
1167 INIT_LIST_HEAD(&domain->devices);
1168
1169 /* always allocate the top pgd */
1170 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1171 if (!domain->pgd)
1172 return -ENOMEM;
1173 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1174 return 0;
1175}
1176
1177static void domain_exit(struct dmar_domain *domain)
1178{
1179 u64 end;
1180
1181 /* Domain 0 is reserved, so dont process it */
1182 if (!domain)
1183 return;
1184
1185 domain_remove_dev_info(domain);
1186 /* destroy iovas */
1187 put_iova_domain(&domain->iovad);
1188 end = DOMAIN_MAX_ADDR(domain->gaw);
1189 end = end & (~PAGE_MASK_4K);
1190
1191 /* clear ptes */
1192 dma_pte_clear_range(domain, 0, end);
1193
1194 /* free page tables */
1195 dma_pte_free_pagetable(domain, 0, end);
1196
1197 iommu_free_domain(domain);
1198 free_domain_mem(domain);
1199}
1200
1201static int domain_context_mapping_one(struct dmar_domain *domain,
1202 u8 bus, u8 devfn)
1203{
1204 struct context_entry *context;
1205 struct intel_iommu *iommu = domain->iommu;
1206 unsigned long flags;
1207
1208 pr_debug("Set context mapping for %02x:%02x.%d\n",
1209 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1210 BUG_ON(!domain->pgd);
1211 context = device_to_context_entry(iommu, bus, devfn);
1212 if (!context)
1213 return -ENOMEM;
1214 spin_lock_irqsave(&iommu->lock, flags);
1215 if (context_present(*context)) {
1216 spin_unlock_irqrestore(&iommu->lock, flags);
1217 return 0;
1218 }
1219
1220 context_set_domain_id(*context, domain->id);
1221 context_set_address_width(*context, domain->agaw);
1222 context_set_address_root(*context, virt_to_phys(domain->pgd));
1223 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1224 context_set_fault_enable(*context);
1225 context_set_present(*context);
1226 __iommu_flush_cache(iommu, context, sizeof(*context));
1227
1228 /* it's a non-present to present mapping */
1229 if (iommu_flush_context_device(iommu, domain->id,
1230 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1231 iommu_flush_write_buffer(iommu);
1232 else
1233 iommu_flush_iotlb_dsi(iommu, 0, 0);
1234 spin_unlock_irqrestore(&iommu->lock, flags);
1235 return 0;
1236}
1237
1238static int
1239domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1240{
1241 int ret;
1242 struct pci_dev *tmp, *parent;
1243
1244 ret = domain_context_mapping_one(domain, pdev->bus->number,
1245 pdev->devfn);
1246 if (ret)
1247 return ret;
1248
1249 /* dependent device mapping */
1250 tmp = pci_find_upstream_pcie_bridge(pdev);
1251 if (!tmp)
1252 return 0;
1253 /* Secondary interface's bus number and devfn 0 */
1254 parent = pdev->bus->self;
1255 while (parent != tmp) {
1256 ret = domain_context_mapping_one(domain, parent->bus->number,
1257 parent->devfn);
1258 if (ret)
1259 return ret;
1260 parent = parent->bus->self;
1261 }
1262 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1263 return domain_context_mapping_one(domain,
1264 tmp->subordinate->number, 0);
1265 else /* this is a legacy PCI bridge */
1266 return domain_context_mapping_one(domain,
1267 tmp->bus->number, tmp->devfn);
1268}
1269
1270static int domain_context_mapped(struct dmar_domain *domain,
1271 struct pci_dev *pdev)
1272{
1273 int ret;
1274 struct pci_dev *tmp, *parent;
1275
1276 ret = device_context_mapped(domain->iommu,
1277 pdev->bus->number, pdev->devfn);
1278 if (!ret)
1279 return ret;
1280 /* dependent device mapping */
1281 tmp = pci_find_upstream_pcie_bridge(pdev);
1282 if (!tmp)
1283 return ret;
1284 /* Secondary interface's bus number and devfn 0 */
1285 parent = pdev->bus->self;
1286 while (parent != tmp) {
1287 ret = device_context_mapped(domain->iommu, parent->bus->number,
1288 parent->devfn);
1289 if (!ret)
1290 return ret;
1291 parent = parent->bus->self;
1292 }
1293 if (tmp->is_pcie)
1294 return device_context_mapped(domain->iommu,
1295 tmp->subordinate->number, 0);
1296 else
1297 return device_context_mapped(domain->iommu,
1298 tmp->bus->number, tmp->devfn);
1299}
1300
1301static int
1302domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1303 u64 hpa, size_t size, int prot)
1304{
1305 u64 start_pfn, end_pfn;
1306 struct dma_pte *pte;
1307 int index;
1308
1309 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1310 return -EINVAL;
1311 iova &= PAGE_MASK_4K;
1312 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1313 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1314 index = 0;
1315 while (start_pfn < end_pfn) {
1316 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1317 if (!pte)
1318 return -ENOMEM;
1319 /* We don't need lock here, nobody else
1320 * touches the iova range
1321 */
1322 BUG_ON(dma_pte_addr(*pte));
1323 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1324 dma_set_pte_prot(*pte, prot);
1325 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1326 start_pfn++;
1327 index++;
1328 }
1329 return 0;
1330}
1331
1332static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1333{
1334 clear_context_table(domain->iommu, bus, devfn);
1335 iommu_flush_context_global(domain->iommu, 0);
1336 iommu_flush_iotlb_global(domain->iommu, 0);
1337}
1338
1339static void domain_remove_dev_info(struct dmar_domain *domain)
1340{
1341 struct device_domain_info *info;
1342 unsigned long flags;
1343
1344 spin_lock_irqsave(&device_domain_lock, flags);
1345 while (!list_empty(&domain->devices)) {
1346 info = list_entry(domain->devices.next,
1347 struct device_domain_info, link);
1348 list_del(&info->link);
1349 list_del(&info->global);
1350 if (info->dev)
358dd8ac 1351 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1352 spin_unlock_irqrestore(&device_domain_lock, flags);
1353
1354 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1355 free_devinfo_mem(info);
1356
1357 spin_lock_irqsave(&device_domain_lock, flags);
1358 }
1359 spin_unlock_irqrestore(&device_domain_lock, flags);
1360}
1361
1362/*
1363 * find_domain
358dd8ac 1364 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927
KA
1365 */
1366struct dmar_domain *
1367find_domain(struct pci_dev *pdev)
1368{
1369 struct device_domain_info *info;
1370
1371 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1372 info = pdev->dev.archdata.iommu;
ba395927
KA
1373 if (info)
1374 return info->domain;
1375 return NULL;
1376}
1377
ba395927
KA
1378/* domain is initialized */
1379static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1380{
1381 struct dmar_domain *domain, *found = NULL;
1382 struct intel_iommu *iommu;
1383 struct dmar_drhd_unit *drhd;
1384 struct device_domain_info *info, *tmp;
1385 struct pci_dev *dev_tmp;
1386 unsigned long flags;
1387 int bus = 0, devfn = 0;
1388
1389 domain = find_domain(pdev);
1390 if (domain)
1391 return domain;
1392
1393 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1394 if (dev_tmp) {
1395 if (dev_tmp->is_pcie) {
1396 bus = dev_tmp->subordinate->number;
1397 devfn = 0;
1398 } else {
1399 bus = dev_tmp->bus->number;
1400 devfn = dev_tmp->devfn;
1401 }
1402 spin_lock_irqsave(&device_domain_lock, flags);
1403 list_for_each_entry(info, &device_domain_list, global) {
1404 if (info->bus == bus && info->devfn == devfn) {
1405 found = info->domain;
1406 break;
1407 }
1408 }
1409 spin_unlock_irqrestore(&device_domain_lock, flags);
1410 /* pcie-pci bridge already has a domain, uses it */
1411 if (found) {
1412 domain = found;
1413 goto found_domain;
1414 }
1415 }
1416
1417 /* Allocate new domain for the device */
1418 drhd = dmar_find_matched_drhd_unit(pdev);
1419 if (!drhd) {
1420 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1421 pci_name(pdev));
1422 return NULL;
1423 }
1424 iommu = drhd->iommu;
1425
1426 domain = iommu_alloc_domain(iommu);
1427 if (!domain)
1428 goto error;
1429
1430 if (domain_init(domain, gaw)) {
1431 domain_exit(domain);
1432 goto error;
1433 }
1434
1435 /* register pcie-to-pci device */
1436 if (dev_tmp) {
1437 info = alloc_devinfo_mem();
1438 if (!info) {
1439 domain_exit(domain);
1440 goto error;
1441 }
1442 info->bus = bus;
1443 info->devfn = devfn;
1444 info->dev = NULL;
1445 info->domain = domain;
1446 /* This domain is shared by devices under p2p bridge */
1447 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1448
1449 /* pcie-to-pci bridge already has a domain, uses it */
1450 found = NULL;
1451 spin_lock_irqsave(&device_domain_lock, flags);
1452 list_for_each_entry(tmp, &device_domain_list, global) {
1453 if (tmp->bus == bus && tmp->devfn == devfn) {
1454 found = tmp->domain;
1455 break;
1456 }
1457 }
1458 if (found) {
1459 free_devinfo_mem(info);
1460 domain_exit(domain);
1461 domain = found;
1462 } else {
1463 list_add(&info->link, &domain->devices);
1464 list_add(&info->global, &device_domain_list);
1465 }
1466 spin_unlock_irqrestore(&device_domain_lock, flags);
1467 }
1468
1469found_domain:
1470 info = alloc_devinfo_mem();
1471 if (!info)
1472 goto error;
1473 info->bus = pdev->bus->number;
1474 info->devfn = pdev->devfn;
1475 info->dev = pdev;
1476 info->domain = domain;
1477 spin_lock_irqsave(&device_domain_lock, flags);
1478 /* somebody is fast */
1479 found = find_domain(pdev);
1480 if (found != NULL) {
1481 spin_unlock_irqrestore(&device_domain_lock, flags);
1482 if (found != domain) {
1483 domain_exit(domain);
1484 domain = found;
1485 }
1486 free_devinfo_mem(info);
1487 return domain;
1488 }
1489 list_add(&info->link, &domain->devices);
1490 list_add(&info->global, &device_domain_list);
358dd8ac 1491 pdev->dev.archdata.iommu = info;
ba395927
KA
1492 spin_unlock_irqrestore(&device_domain_lock, flags);
1493 return domain;
1494error:
1495 /* recheck it here, maybe others set it */
1496 return find_domain(pdev);
1497}
1498
1499static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1500{
1501 struct dmar_domain *domain;
1502 unsigned long size;
1503 u64 base;
1504 int ret;
1505
1506 printk(KERN_INFO
1507 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1508 pci_name(pdev), start, end);
1509 /* page table init */
1510 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1511 if (!domain)
1512 return -ENOMEM;
1513
1514 /* The address might not be aligned */
1515 base = start & PAGE_MASK_4K;
1516 size = end - base;
1517 size = PAGE_ALIGN_4K(size);
1518 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1519 IOVA_PFN(base + size) - 1)) {
1520 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1521 ret = -ENOMEM;
1522 goto error;
1523 }
1524
1525 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1526 size, base, pci_name(pdev));
1527 /*
1528 * RMRR range might have overlap with physical memory range,
1529 * clear it first
1530 */
1531 dma_pte_clear_range(domain, base, base + size);
1532
1533 ret = domain_page_mapping(domain, base, base, size,
1534 DMA_PTE_READ|DMA_PTE_WRITE);
1535 if (ret)
1536 goto error;
1537
1538 /* context entry init */
1539 ret = domain_context_mapping(domain, pdev);
1540 if (!ret)
1541 return 0;
1542error:
1543 domain_exit(domain);
1544 return ret;
1545
1546}
1547
1548static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1549 struct pci_dev *pdev)
1550{
358dd8ac 1551 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1552 return 0;
1553 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1554 rmrr->end_address + 1);
1555}
1556
e820482c 1557#ifdef CONFIG_DMAR_GFX_WA
d52d53b8
YL
1558struct iommu_prepare_data {
1559 struct pci_dev *pdev;
1560 int ret;
1561};
1562
1563static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1564 unsigned long end_pfn, void *datax)
1565{
1566 struct iommu_prepare_data *data;
1567
1568 data = (struct iommu_prepare_data *)datax;
1569
1570 data->ret = iommu_prepare_identity_map(data->pdev,
1571 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1572 return data->ret;
1573
1574}
1575
1576static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1577{
1578 int nid;
1579 struct iommu_prepare_data data;
1580
1581 data.pdev = pdev;
1582 data.ret = 0;
1583
1584 for_each_online_node(nid) {
1585 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1586 if (data.ret)
1587 return data.ret;
1588 }
1589 return data.ret;
1590}
1591
e820482c
KA
1592static void __init iommu_prepare_gfx_mapping(void)
1593{
1594 struct pci_dev *pdev = NULL;
e820482c
KA
1595 int ret;
1596
1597 for_each_pci_dev(pdev) {
358dd8ac 1598 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1599 !IS_GFX_DEVICE(pdev))
1600 continue;
1601 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1602 pci_name(pdev));
d52d53b8
YL
1603 ret = iommu_prepare_with_active_regions(pdev);
1604 if (ret)
1605 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
e820482c
KA
1606 }
1607}
1608#endif
1609
49a0429e
KA
1610#ifdef CONFIG_DMAR_FLOPPY_WA
1611static inline void iommu_prepare_isa(void)
1612{
1613 struct pci_dev *pdev;
1614 int ret;
1615
1616 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1617 if (!pdev)
1618 return;
1619
1620 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1621 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1622
1623 if (ret)
1624 printk("IOMMU: Failed to create 0-64M identity map, "
1625 "floppy might not work\n");
1626
1627}
1628#else
1629static inline void iommu_prepare_isa(void)
1630{
1631 return;
1632}
1633#endif /* !CONFIG_DMAR_FLPY_WA */
1634
ba395927
KA
1635int __init init_dmars(void)
1636{
1637 struct dmar_drhd_unit *drhd;
1638 struct dmar_rmrr_unit *rmrr;
1639 struct pci_dev *pdev;
1640 struct intel_iommu *iommu;
80b20dd8 1641 int i, ret, unit = 0;
ba395927
KA
1642
1643 /*
1644 * for each drhd
1645 * allocate root
1646 * initialize and program root entry to not present
1647 * endfor
1648 */
1649 for_each_drhd_unit(drhd) {
5e0d2a6f 1650 g_num_of_iommus++;
1651 /*
1652 * lock not needed as this is only incremented in the single
1653 * threaded kernel __init code path all other access are read
1654 * only
1655 */
1656 }
1657
80b20dd8 1658 deferred_flush = kzalloc(g_num_of_iommus *
1659 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1660 if (!deferred_flush) {
5e0d2a6f 1661 ret = -ENOMEM;
1662 goto error;
1663 }
1664
5e0d2a6f 1665 for_each_drhd_unit(drhd) {
1666 if (drhd->ignored)
1667 continue;
c42d9f32 1668 iommu = alloc_iommu(drhd);
ba395927
KA
1669 if (!iommu) {
1670 ret = -ENOMEM;
1671 goto error;
1672 }
1673
e61d98d8
SS
1674 ret = iommu_init_domains(iommu);
1675 if (ret)
1676 goto error;
1677
ba395927
KA
1678 /*
1679 * TBD:
1680 * we could share the same root & context tables
1681 * amoung all IOMMU's. Need to Split it later.
1682 */
1683 ret = iommu_alloc_root_entry(iommu);
1684 if (ret) {
1685 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1686 goto error;
1687 }
1688 }
1689
1690 /*
1691 * For each rmrr
1692 * for each dev attached to rmrr
1693 * do
1694 * locate drhd for dev, alloc domain for dev
1695 * allocate free domain
1696 * allocate page table entries for rmrr
1697 * if context not allocated for bus
1698 * allocate and init context
1699 * set present in root table for this bus
1700 * init context with domain, translation etc
1701 * endfor
1702 * endfor
1703 */
1704 for_each_rmrr_units(rmrr) {
ba395927
KA
1705 for (i = 0; i < rmrr->devices_cnt; i++) {
1706 pdev = rmrr->devices[i];
1707 /* some BIOS lists non-exist devices in DMAR table */
1708 if (!pdev)
1709 continue;
1710 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1711 if (ret)
1712 printk(KERN_ERR
1713 "IOMMU: mapping reserved region failed\n");
1714 }
1715 }
1716
e820482c
KA
1717 iommu_prepare_gfx_mapping();
1718
49a0429e
KA
1719 iommu_prepare_isa();
1720
ba395927
KA
1721 /*
1722 * for each drhd
1723 * enable fault log
1724 * global invalidate context cache
1725 * global invalidate iotlb
1726 * enable translation
1727 */
1728 for_each_drhd_unit(drhd) {
1729 if (drhd->ignored)
1730 continue;
1731 iommu = drhd->iommu;
1732 sprintf (iommu->name, "dmar%d", unit++);
1733
1734 iommu_flush_write_buffer(iommu);
1735
3460a6d9
KA
1736 ret = dmar_set_interrupt(iommu);
1737 if (ret)
1738 goto error;
1739
ba395927
KA
1740 iommu_set_root_entry(iommu);
1741
1742 iommu_flush_context_global(iommu, 0);
1743 iommu_flush_iotlb_global(iommu, 0);
1744
f8bab735 1745 iommu_disable_protect_mem_regions(iommu);
1746
ba395927
KA
1747 ret = iommu_enable_translation(iommu);
1748 if (ret)
1749 goto error;
1750 }
1751
1752 return 0;
1753error:
1754 for_each_drhd_unit(drhd) {
1755 if (drhd->ignored)
1756 continue;
1757 iommu = drhd->iommu;
1758 free_iommu(iommu);
1759 }
1760 return ret;
1761}
1762
1763static inline u64 aligned_size(u64 host_addr, size_t size)
1764{
1765 u64 addr;
1766 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1767 return PAGE_ALIGN_4K(addr);
1768}
1769
1770struct iova *
f76aec76 1771iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 1772{
ba395927
KA
1773 struct iova *piova;
1774
1775 /* Make sure it's in range */
ba395927 1776 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 1777 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
1778 return NULL;
1779
1780 piova = alloc_iova(&domain->iovad,
f76aec76 1781 size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
ba395927
KA
1782 return piova;
1783}
1784
f76aec76
KA
1785static struct iova *
1786__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1787 size_t size)
ba395927 1788{
ba395927 1789 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 1790 struct iova *iova = NULL;
ba395927 1791
7d3b03ce 1792 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
f76aec76 1793 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
ba395927
KA
1794 } else {
1795 /*
1796 * First try to allocate an io virtual address in
1797 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 1798 * from higher range
ba395927 1799 */
f76aec76 1800 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 1801 if (!iova)
f76aec76 1802 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
ba395927
KA
1803 }
1804
1805 if (!iova) {
1806 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
1807 return NULL;
1808 }
1809
1810 return iova;
1811}
1812
1813static struct dmar_domain *
1814get_valid_domain_for_dev(struct pci_dev *pdev)
1815{
1816 struct dmar_domain *domain;
1817 int ret;
1818
1819 domain = get_domain_for_dev(pdev,
1820 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1821 if (!domain) {
1822 printk(KERN_ERR
1823 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 1824 return NULL;
ba395927
KA
1825 }
1826
1827 /* make sure context mapping is ok */
1828 if (unlikely(!domain_context_mapped(domain, pdev))) {
1829 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
1830 if (ret) {
1831 printk(KERN_ERR
1832 "Domain context map for %s failed",
1833 pci_name(pdev));
4fe05bbc 1834 return NULL;
f76aec76 1835 }
ba395927
KA
1836 }
1837
f76aec76
KA
1838 return domain;
1839}
1840
6865f0d1
IM
1841static dma_addr_t
1842intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
f76aec76
KA
1843{
1844 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 1845 struct dmar_domain *domain;
6865f0d1 1846 unsigned long start_paddr;
f76aec76
KA
1847 struct iova *iova;
1848 int prot = 0;
6865f0d1 1849 int ret;
f76aec76
KA
1850
1851 BUG_ON(dir == DMA_NONE);
358dd8ac 1852 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
6865f0d1 1853 return paddr;
f76aec76
KA
1854
1855 domain = get_valid_domain_for_dev(pdev);
1856 if (!domain)
1857 return 0;
1858
6865f0d1 1859 size = aligned_size((u64)paddr, size);
f76aec76
KA
1860
1861 iova = __intel_alloc_iova(hwdev, domain, size);
1862 if (!iova)
1863 goto error;
1864
6865f0d1 1865 start_paddr = iova->pfn_lo << PAGE_SHIFT_4K;
f76aec76 1866
ba395927
KA
1867 /*
1868 * Check if DMAR supports zero-length reads on write only
1869 * mappings..
1870 */
1871 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1872 !cap_zlr(domain->iommu->cap))
1873 prot |= DMA_PTE_READ;
1874 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1875 prot |= DMA_PTE_WRITE;
1876 /*
6865f0d1 1877 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 1878 * page. Note: if two part of one page are separately mapped, we
6865f0d1 1879 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
1880 * is not a big problem
1881 */
6865f0d1
IM
1882 ret = domain_page_mapping(domain, start_paddr,
1883 ((u64)paddr) & PAGE_MASK_4K, size, prot);
ba395927
KA
1884 if (ret)
1885 goto error;
1886
1887 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
6865f0d1
IM
1888 pci_name(pdev), size, (u64)paddr,
1889 size, (u64)start_paddr, dir);
f76aec76
KA
1890
1891 /* it's a non-present to present mapping */
1892 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
6865f0d1 1893 start_paddr, size >> PAGE_SHIFT_4K, 1);
f76aec76
KA
1894 if (ret)
1895 iommu_flush_write_buffer(domain->iommu);
1896
6865f0d1 1897 return (start_paddr + ((u64)paddr & (~PAGE_MASK_4K)));
ba395927 1898
ba395927 1899error:
f76aec76
KA
1900 if (iova)
1901 __free_iova(&domain->iovad, iova);
ba395927 1902 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
6865f0d1 1903 pci_name(pdev), size, (u64)paddr, dir);
ba395927
KA
1904 return 0;
1905}
1906
5e0d2a6f 1907static void flush_unmaps(void)
1908{
80b20dd8 1909 int i, j;
5e0d2a6f 1910
5e0d2a6f 1911 timer_on = 0;
1912
1913 /* just flush them all */
1914 for (i = 0; i < g_num_of_iommus; i++) {
80b20dd8 1915 if (deferred_flush[i].next) {
c42d9f32
SS
1916 struct intel_iommu *iommu =
1917 deferred_flush[i].domain[0]->iommu;
1918
1919 iommu_flush_iotlb_global(iommu, 0);
80b20dd8 1920 for (j = 0; j < deferred_flush[i].next; j++) {
1921 __free_iova(&deferred_flush[i].domain[j]->iovad,
1922 deferred_flush[i].iova[j]);
1923 }
1924 deferred_flush[i].next = 0;
1925 }
5e0d2a6f 1926 }
1927
5e0d2a6f 1928 list_size = 0;
5e0d2a6f 1929}
1930
1931static void flush_unmaps_timeout(unsigned long data)
1932{
80b20dd8 1933 unsigned long flags;
1934
1935 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 1936 flush_unmaps();
80b20dd8 1937 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 1938}
1939
1940static void add_unmap(struct dmar_domain *dom, struct iova *iova)
1941{
1942 unsigned long flags;
80b20dd8 1943 int next, iommu_id;
5e0d2a6f 1944
1945 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 1946 if (list_size == HIGH_WATER_MARK)
1947 flush_unmaps();
1948
c42d9f32
SS
1949 iommu_id = dom->iommu->seq_id;
1950
80b20dd8 1951 next = deferred_flush[iommu_id].next;
1952 deferred_flush[iommu_id].domain[next] = dom;
1953 deferred_flush[iommu_id].iova[next] = iova;
1954 deferred_flush[iommu_id].next++;
5e0d2a6f 1955
1956 if (!timer_on) {
1957 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
1958 timer_on = 1;
1959 }
1960 list_size++;
1961 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1962}
1963
f76aec76 1964static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
ba395927
KA
1965 size_t size, int dir)
1966{
ba395927 1967 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
1968 struct dmar_domain *domain;
1969 unsigned long start_addr;
ba395927
KA
1970 struct iova *iova;
1971
358dd8ac 1972 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 1973 return;
ba395927
KA
1974 domain = find_domain(pdev);
1975 BUG_ON(!domain);
1976
1977 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 1978 if (!iova)
ba395927 1979 return;
ba395927 1980
f76aec76
KA
1981 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1982 size = aligned_size((u64)dev_addr, size);
ba395927 1983
f76aec76
KA
1984 pr_debug("Device %s unmapping: %lx@%llx\n",
1985 pci_name(pdev), size, (u64)start_addr);
ba395927 1986
f76aec76
KA
1987 /* clear the whole page */
1988 dma_pte_clear_range(domain, start_addr, start_addr + size);
1989 /* free page tables */
1990 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
5e0d2a6f 1991 if (intel_iommu_strict) {
1992 if (iommu_flush_iotlb_psi(domain->iommu,
1993 domain->id, start_addr, size >> PAGE_SHIFT_4K, 0))
1994 iommu_flush_write_buffer(domain->iommu);
1995 /* free iova */
1996 __free_iova(&domain->iovad, iova);
1997 } else {
1998 add_unmap(domain, iova);
1999 /*
2000 * queue up the release of the unmap to save the 1/6th of the
2001 * cpu used up by the iotlb flush operation...
2002 */
5e0d2a6f 2003 }
ba395927
KA
2004}
2005
2006static void * intel_alloc_coherent(struct device *hwdev, size_t size,
2007 dma_addr_t *dma_handle, gfp_t flags)
2008{
2009 void *vaddr;
2010 int order;
2011
2012 size = PAGE_ALIGN_4K(size);
2013 order = get_order(size);
2014 flags &= ~(GFP_DMA | GFP_DMA32);
2015
2016 vaddr = (void *)__get_free_pages(flags, order);
2017 if (!vaddr)
2018 return NULL;
2019 memset(vaddr, 0, size);
2020
6865f0d1 2021 *dma_handle = intel_map_single(hwdev, virt_to_bus(vaddr), size, DMA_BIDIRECTIONAL);
ba395927
KA
2022 if (*dma_handle)
2023 return vaddr;
2024 free_pages((unsigned long)vaddr, order);
2025 return NULL;
2026}
2027
2028static void intel_free_coherent(struct device *hwdev, size_t size,
2029 void *vaddr, dma_addr_t dma_handle)
2030{
2031 int order;
2032
2033 size = PAGE_ALIGN_4K(size);
2034 order = get_order(size);
2035
2036 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2037 free_pages((unsigned long)vaddr, order);
2038}
2039
12d4d40e 2040#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
c03ab37c 2041static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
ba395927
KA
2042 int nelems, int dir)
2043{
2044 int i;
2045 struct pci_dev *pdev = to_pci_dev(hwdev);
2046 struct dmar_domain *domain;
f76aec76
KA
2047 unsigned long start_addr;
2048 struct iova *iova;
2049 size_t size = 0;
2050 void *addr;
c03ab37c 2051 struct scatterlist *sg;
ba395927 2052
358dd8ac 2053 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2054 return;
2055
2056 domain = find_domain(pdev);
ba395927 2057
c03ab37c 2058 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2059 if (!iova)
2060 return;
c03ab37c 2061 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2062 addr = SG_ENT_VIRT_ADDRESS(sg);
2063 size += aligned_size((u64)addr, sg->length);
2064 }
2065
2066 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2067
2068 /* clear the whole page */
2069 dma_pte_clear_range(domain, start_addr, start_addr + size);
2070 /* free page tables */
2071 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2072
2073 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2074 size >> PAGE_SHIFT_4K, 0))
ba395927 2075 iommu_flush_write_buffer(domain->iommu);
f76aec76
KA
2076
2077 /* free iova */
2078 __free_iova(&domain->iovad, iova);
ba395927
KA
2079}
2080
ba395927 2081static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2082 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2083{
2084 int i;
c03ab37c 2085 struct scatterlist *sg;
ba395927 2086
c03ab37c 2087 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2088 BUG_ON(!sg_page(sg));
c03ab37c
FT
2089 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2090 sg->dma_length = sg->length;
ba395927
KA
2091 }
2092 return nelems;
2093}
2094
c03ab37c
FT
2095static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
2096 int nelems, int dir)
ba395927
KA
2097{
2098 void *addr;
2099 int i;
ba395927
KA
2100 struct pci_dev *pdev = to_pci_dev(hwdev);
2101 struct dmar_domain *domain;
f76aec76
KA
2102 size_t size = 0;
2103 int prot = 0;
2104 size_t offset = 0;
2105 struct iova *iova = NULL;
2106 int ret;
c03ab37c 2107 struct scatterlist *sg;
f76aec76 2108 unsigned long start_addr;
ba395927
KA
2109
2110 BUG_ON(dir == DMA_NONE);
358dd8ac 2111 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2112 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2113
f76aec76
KA
2114 domain = get_valid_domain_for_dev(pdev);
2115 if (!domain)
2116 return 0;
2117
c03ab37c 2118 for_each_sg(sglist, sg, nelems, i) {
ba395927 2119 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2120 addr = (void *)virt_to_phys(addr);
2121 size += aligned_size((u64)addr, sg->length);
2122 }
2123
2124 iova = __intel_alloc_iova(hwdev, domain, size);
2125 if (!iova) {
c03ab37c 2126 sglist->dma_length = 0;
f76aec76
KA
2127 return 0;
2128 }
2129
2130 /*
2131 * Check if DMAR supports zero-length reads on write only
2132 * mappings..
2133 */
2134 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2135 !cap_zlr(domain->iommu->cap))
2136 prot |= DMA_PTE_READ;
2137 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2138 prot |= DMA_PTE_WRITE;
2139
2140 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2141 offset = 0;
c03ab37c 2142 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2143 addr = SG_ENT_VIRT_ADDRESS(sg);
2144 addr = (void *)virt_to_phys(addr);
2145 size = aligned_size((u64)addr, sg->length);
2146 ret = domain_page_mapping(domain, start_addr + offset,
2147 ((u64)addr) & PAGE_MASK_4K,
2148 size, prot);
2149 if (ret) {
2150 /* clear the page */
2151 dma_pte_clear_range(domain, start_addr,
2152 start_addr + offset);
2153 /* free page tables */
2154 dma_pte_free_pagetable(domain, start_addr,
2155 start_addr + offset);
2156 /* free iova */
2157 __free_iova(&domain->iovad, iova);
ba395927
KA
2158 return 0;
2159 }
f76aec76
KA
2160 sg->dma_address = start_addr + offset +
2161 ((u64)addr & (~PAGE_MASK_4K));
ba395927 2162 sg->dma_length = sg->length;
f76aec76 2163 offset += size;
ba395927
KA
2164 }
2165
ba395927 2166 /* it's a non-present to present mapping */
f76aec76
KA
2167 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2168 start_addr, offset >> PAGE_SHIFT_4K, 1))
ba395927
KA
2169 iommu_flush_write_buffer(domain->iommu);
2170 return nelems;
2171}
2172
2173static struct dma_mapping_ops intel_dma_ops = {
2174 .alloc_coherent = intel_alloc_coherent,
2175 .free_coherent = intel_free_coherent,
2176 .map_single = intel_map_single,
2177 .unmap_single = intel_unmap_single,
2178 .map_sg = intel_map_sg,
2179 .unmap_sg = intel_unmap_sg,
2180};
2181
2182static inline int iommu_domain_cache_init(void)
2183{
2184 int ret = 0;
2185
2186 iommu_domain_cache = kmem_cache_create("iommu_domain",
2187 sizeof(struct dmar_domain),
2188 0,
2189 SLAB_HWCACHE_ALIGN,
2190
2191 NULL);
2192 if (!iommu_domain_cache) {
2193 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2194 ret = -ENOMEM;
2195 }
2196
2197 return ret;
2198}
2199
2200static inline int iommu_devinfo_cache_init(void)
2201{
2202 int ret = 0;
2203
2204 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2205 sizeof(struct device_domain_info),
2206 0,
2207 SLAB_HWCACHE_ALIGN,
2208
2209 NULL);
2210 if (!iommu_devinfo_cache) {
2211 printk(KERN_ERR "Couldn't create devinfo cache\n");
2212 ret = -ENOMEM;
2213 }
2214
2215 return ret;
2216}
2217
2218static inline int iommu_iova_cache_init(void)
2219{
2220 int ret = 0;
2221
2222 iommu_iova_cache = kmem_cache_create("iommu_iova",
2223 sizeof(struct iova),
2224 0,
2225 SLAB_HWCACHE_ALIGN,
2226
2227 NULL);
2228 if (!iommu_iova_cache) {
2229 printk(KERN_ERR "Couldn't create iova cache\n");
2230 ret = -ENOMEM;
2231 }
2232
2233 return ret;
2234}
2235
2236static int __init iommu_init_mempool(void)
2237{
2238 int ret;
2239 ret = iommu_iova_cache_init();
2240 if (ret)
2241 return ret;
2242
2243 ret = iommu_domain_cache_init();
2244 if (ret)
2245 goto domain_error;
2246
2247 ret = iommu_devinfo_cache_init();
2248 if (!ret)
2249 return ret;
2250
2251 kmem_cache_destroy(iommu_domain_cache);
2252domain_error:
2253 kmem_cache_destroy(iommu_iova_cache);
2254
2255 return -ENOMEM;
2256}
2257
2258static void __init iommu_exit_mempool(void)
2259{
2260 kmem_cache_destroy(iommu_devinfo_cache);
2261 kmem_cache_destroy(iommu_domain_cache);
2262 kmem_cache_destroy(iommu_iova_cache);
2263
2264}
2265
2266void __init detect_intel_iommu(void)
2267{
2268 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2269 return;
2270 if (early_dmar_detect()) {
2271 iommu_detected = 1;
2272 }
2273}
2274
2275static void __init init_no_remapping_devices(void)
2276{
2277 struct dmar_drhd_unit *drhd;
2278
2279 for_each_drhd_unit(drhd) {
2280 if (!drhd->include_all) {
2281 int i;
2282 for (i = 0; i < drhd->devices_cnt; i++)
2283 if (drhd->devices[i] != NULL)
2284 break;
2285 /* ignore DMAR unit if no pci devices exist */
2286 if (i == drhd->devices_cnt)
2287 drhd->ignored = 1;
2288 }
2289 }
2290
2291 if (dmar_map_gfx)
2292 return;
2293
2294 for_each_drhd_unit(drhd) {
2295 int i;
2296 if (drhd->ignored || drhd->include_all)
2297 continue;
2298
2299 for (i = 0; i < drhd->devices_cnt; i++)
2300 if (drhd->devices[i] &&
2301 !IS_GFX_DEVICE(drhd->devices[i]))
2302 break;
2303
2304 if (i < drhd->devices_cnt)
2305 continue;
2306
2307 /* bypass IOMMU if it is just for gfx devices */
2308 drhd->ignored = 1;
2309 for (i = 0; i < drhd->devices_cnt; i++) {
2310 if (!drhd->devices[i])
2311 continue;
358dd8ac 2312 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2313 }
2314 }
2315}
2316
2317int __init intel_iommu_init(void)
2318{
2319 int ret = 0;
2320
2321 if (no_iommu || swiotlb || dmar_disabled)
2322 return -ENODEV;
2323
2324 if (dmar_table_init())
2325 return -ENODEV;
2326
2327 iommu_init_mempool();
2328 dmar_init_reserved_ranges();
2329
2330 init_no_remapping_devices();
2331
2332 ret = init_dmars();
2333 if (ret) {
2334 printk(KERN_ERR "IOMMU: dmar init failed\n");
2335 put_iova_domain(&reserved_iova_list);
2336 iommu_exit_mempool();
2337 return ret;
2338 }
2339 printk(KERN_INFO
2340 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2341
5e0d2a6f 2342 init_timer(&unmap_timer);
ba395927
KA
2343 force_iommu = 1;
2344 dma_ops = &intel_dma_ops;
2345 return 0;
2346}
e820482c 2347