From: Suresh Siddha Subject: x64, x2apic/intr-remap: Queued invalidation infrastructure (part of VT-d) References: fate #303948 and fate #303984 Patch-Mainline: queued for .28 Commit-ID: fe962e90cb17a8426e144dee970e77ed789d98ee Signed-off-by: Thomas Renninger Queued invalidation (part of Intel Virtualization Technology for Directed I/O architecture) infrastructure. This will be used for invalidating the interrupt entry cache in the case of Interrupt-remapping and IOTLB invalidation in the case of DMA-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/intel-iommu.c | 7 -- drivers/pci/intel-iommu.h | 61 ++++++++++++++++++ 3 files changed, 211 insertions(+), 7 deletions(-) --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -28,6 +28,7 @@ #include #include +#include #include "iova.h" #include "intel-iommu.h" @@ -511,3 +512,152 @@ void free_iommu(struct intel_iommu *iomm iounmap(iommu->reg); kfree(iommu); } + +/* + * Reclaim all the submitted descriptors which have completed its work. + */ +static inline void reclaim_free_desc(struct q_inval *qi) +{ + while (qi->desc_status[qi->free_tail] == QI_DONE) { + qi->desc_status[qi->free_tail] = QI_FREE; + qi->free_tail = (qi->free_tail + 1) % QI_LENGTH; + qi->free_cnt++; + } +} + +/* + * Submit the queued invalidation descriptor to the remapping + * hardware unit and wait for its completion. + */ +void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) +{ + struct q_inval *qi = iommu->qi; + struct qi_desc *hw, wait_desc; + int wait_index, index; + unsigned long flags; + + if (!qi) + return; + + hw = qi->desc; + + spin_lock(&qi->q_lock); + while (qi->free_cnt < 3) { + spin_unlock(&qi->q_lock); + cpu_relax(); + spin_lock(&qi->q_lock); + } + + index = qi->free_head; + wait_index = (index + 1) % QI_LENGTH; + + qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE; + + hw[index] = *desc; + + wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE; + wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]); + + hw[wait_index] = wait_desc; + + __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc)); + __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc)); + + qi->free_head = (qi->free_head + 2) % QI_LENGTH; + qi->free_cnt -= 2; + + spin_lock_irqsave(&iommu->register_lock, flags); + /* + * update the HW tail register indicating the presence of + * new descriptors. + */ + writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + while (qi->desc_status[wait_index] != QI_DONE) { + spin_unlock(&qi->q_lock); + cpu_relax(); + spin_lock(&qi->q_lock); + } + + qi->desc_status[index] = QI_DONE; + + reclaim_free_desc(qi); + spin_unlock(&qi->q_lock); +} + +/* + * Flush the global interrupt entry cache. + */ +void qi_global_iec(struct intel_iommu *iommu) +{ + struct qi_desc desc; + + desc.low = QI_IEC_TYPE; + desc.high = 0; + + qi_submit_sync(&desc, iommu); +} + +/* + * Enable Queued Invalidation interface. This is a must to support + * interrupt-remapping. Also used by DMA-remapping, which replaces + * register based IOTLB invalidation. + */ +int dmar_enable_qi(struct intel_iommu *iommu) +{ + u32 cmd, sts; + unsigned long flags; + struct q_inval *qi; + + if (!ecap_qis(iommu->ecap)) + return -ENOENT; + + /* + * queued invalidation is already setup and enabled. + */ + if (iommu->qi) + return 0; + + iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL); + if (!iommu->qi) + return -ENOMEM; + + qi = iommu->qi; + + qi->desc = (void *)(get_zeroed_page(GFP_KERNEL)); + if (!qi->desc) { + kfree(qi); + iommu->qi = 0; + return -ENOMEM; + } + + qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL); + if (!qi->desc_status) { + free_page((unsigned long) qi->desc); + kfree(qi); + iommu->qi = 0; + return -ENOMEM; + } + + qi->free_head = qi->free_tail = 0; + qi->free_cnt = QI_LENGTH; + + spin_lock_init(&qi->q_lock); + + spin_lock_irqsave(&iommu->register_lock, flags); + /* write zero to the tail reg */ + writel(0, iommu->reg + DMAR_IQT_REG); + + dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); + + cmd = iommu->gcmd | DMA_GCMD_QIE; + iommu->gcmd |= DMA_GCMD_QIE; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + return 0; +} --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -183,13 +183,6 @@ void free_iova_mem(struct iova *iova) kmem_cache_free(iommu_iova_cache, iova); } -static inline void __iommu_flush_cache( - struct intel_iommu *iommu, void *addr, int size) -{ - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(addr, size); -} - /* Gets context entry for a given bus and devfn */ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, u8 bus, u8 devfn) --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -27,6 +27,7 @@ #include #include "iova.h" #include +#include #include "dma_remapping.h" /* @@ -51,6 +52,10 @@ #define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ #define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ #define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ +#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ +#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ +#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ +#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ #define OFFSET_STRIDE (9) /* @@ -114,6 +119,7 @@ static inline void dmar_writeq(void __io #define ecap_max_iotlb_offset(e) \ (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) #define ecap_coherent(e) ((e) & 0x1) +#define ecap_qis(e) ((e) & 0x2) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) @@ -131,6 +137,17 @@ static inline void dmar_writeq(void __io #define DMA_TLB_IH_NONLEAF (((u64)1) << 6) #define DMA_TLB_MAX_SIZE (0x3f) +/* INVALID_DESC */ +#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3) +#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3) +#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3) +#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) +#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) +#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) +#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6) +#define DMA_ID_TLB_ADDR(addr) (addr) +#define DMA_ID_TLB_ADDR_MASK(mask) (mask) + /* PMEN_REG */ #define DMA_PMEN_EPM (((u32)1)<<31) #define DMA_PMEN_PRS (((u32)1)<<0) @@ -140,6 +157,7 @@ static inline void dmar_writeq(void __io #define DMA_GCMD_SRTP (((u32)1) << 30) #define DMA_GCMD_SFL (((u32)1) << 29) #define DMA_GCMD_EAFL (((u32)1) << 28) +#define DMA_GCMD_QIE (((u32)1) << 26) #define DMA_GCMD_WBF (((u32)1) << 27) /* GSTS_REG */ @@ -147,6 +165,7 @@ static inline void dmar_writeq(void __io #define DMA_GSTS_RTPS (((u32)1) << 30) #define DMA_GSTS_FLS (((u32)1) << 29) #define DMA_GSTS_AFLS (((u32)1) << 28) +#define DMA_GSTS_QIES (((u32)1) << 26) #define DMA_GSTS_WBFS (((u32)1) << 27) /* CCMD_REG */ @@ -192,6 +211,40 @@ static inline void dmar_writeq(void __io }\ } +#define QI_LENGTH 256 /* queue length */ + +enum { + QI_FREE, + QI_IN_USE, + QI_DONE +}; + +#define QI_CC_TYPE 0x1 +#define QI_IOTLB_TYPE 0x2 +#define QI_DIOTLB_TYPE 0x3 +#define QI_IEC_TYPE 0x4 +#define QI_IWD_TYPE 0x5 + +#define QI_IEC_SELECTIVE (((u64)1) << 4) +#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32)) +#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27)) + +#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32) +#define QI_IWD_STATUS_WRITE (((u64)1) << 5) + +struct qi_desc { + u64 low, high; +}; + +struct q_inval { + spinlock_t q_lock; + struct qi_desc *desc; /* invalidation queue */ + int *desc_status; /* desc status */ + int free_head; /* first free entry */ + int free_tail; /* last free entry */ + int free_cnt; +}; + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; @@ -212,8 +265,16 @@ struct intel_iommu { struct msi_msg saved_msg; struct sys_device sysdev; #endif + struct q_inval *qi; /* Queued invalidation info */ }; +static inline void __iommu_flush_cache( + struct intel_iommu *iommu, void *addr, int size) +{ + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(addr, size); +} + extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int alloc_iommu(struct dmar_drhd_unit *drhd);