1 From: Suresh Siddha <suresh.b.siddha@intel.com>
2 Subject: x64, x2apic/intr-remap: Queued invalidation infrastructure (part of VT-d)
3 References: fate #303948 and fate #303984
4 Patch-Mainline: queued for .28
5 Commit-ID: fe962e90cb17a8426e144dee970e77ed789d98ee
7 Signed-off-by: Thomas Renninger <trenn@suse.de>
9 Queued invalidation (part of Intel Virtualization Technology for
10 Directed I/O architecture) infrastructure.
12 This will be used for invalidating the interrupt entry cache in the
13 case of Interrupt-remapping and IOTLB invalidation in the case
16 Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
17 Cc: akpm@linux-foundation.org
18 Cc: arjan@linux.intel.com
19 Cc: andi@firstfloor.org
20 Cc: ebiederm@xmission.com
21 Cc: jbarnes@virtuousgeek.org
23 Signed-off-by: Ingo Molnar <mingo@elte.hu>
26 drivers/pci/dmar.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++
27 drivers/pci/intel-iommu.c | 7 --
28 drivers/pci/intel-iommu.h | 61 ++++++++++++++++++
29 3 files changed, 211 insertions(+), 7 deletions(-)
31 --- a/drivers/pci/dmar.c
32 +++ b/drivers/pci/dmar.c
35 #include <linux/pci.h>
36 #include <linux/dmar.h>
37 +#include <linux/timer.h>
39 #include "intel-iommu.h"
41 @@ -511,3 +512,152 @@ void free_iommu(struct intel_iommu *iomm
47 + * Reclaim all the submitted descriptors which have completed its work.
49 +static inline void reclaim_free_desc(struct q_inval *qi)
51 + while (qi->desc_status[qi->free_tail] == QI_DONE) {
52 + qi->desc_status[qi->free_tail] = QI_FREE;
53 + qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
59 + * Submit the queued invalidation descriptor to the remapping
60 + * hardware unit and wait for its completion.
62 +void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
64 + struct q_inval *qi = iommu->qi;
65 + struct qi_desc *hw, wait_desc;
66 + int wait_index, index;
67 + unsigned long flags;
74 + spin_lock(&qi->q_lock);
75 + while (qi->free_cnt < 3) {
76 + spin_unlock(&qi->q_lock);
78 + spin_lock(&qi->q_lock);
81 + index = qi->free_head;
82 + wait_index = (index + 1) % QI_LENGTH;
84 + qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
88 + wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
89 + wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
91 + hw[wait_index] = wait_desc;
93 + __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
94 + __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
96 + qi->free_head = (qi->free_head + 2) % QI_LENGTH;
99 + spin_lock_irqsave(&iommu->register_lock, flags);
101 + * update the HW tail register indicating the presence of
104 + writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
105 + spin_unlock_irqrestore(&iommu->register_lock, flags);
107 + while (qi->desc_status[wait_index] != QI_DONE) {
108 + spin_unlock(&qi->q_lock);
110 + spin_lock(&qi->q_lock);
113 + qi->desc_status[index] = QI_DONE;
115 + reclaim_free_desc(qi);
116 + spin_unlock(&qi->q_lock);
120 + * Flush the global interrupt entry cache.
122 +void qi_global_iec(struct intel_iommu *iommu)
124 + struct qi_desc desc;
126 + desc.low = QI_IEC_TYPE;
129 + qi_submit_sync(&desc, iommu);
133 + * Enable Queued Invalidation interface. This is a must to support
134 + * interrupt-remapping. Also used by DMA-remapping, which replaces
135 + * register based IOTLB invalidation.
137 +int dmar_enable_qi(struct intel_iommu *iommu)
140 + unsigned long flags;
141 + struct q_inval *qi;
143 + if (!ecap_qis(iommu->ecap))
147 + * queued invalidation is already setup and enabled.
152 + iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
158 + qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
165 + qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
166 + if (!qi->desc_status) {
167 + free_page((unsigned long) qi->desc);
173 + qi->free_head = qi->free_tail = 0;
174 + qi->free_cnt = QI_LENGTH;
176 + spin_lock_init(&qi->q_lock);
178 + spin_lock_irqsave(&iommu->register_lock, flags);
179 + /* write zero to the tail reg */
180 + writel(0, iommu->reg + DMAR_IQT_REG);
182 + dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
184 + cmd = iommu->gcmd | DMA_GCMD_QIE;
185 + iommu->gcmd |= DMA_GCMD_QIE;
186 + writel(cmd, iommu->reg + DMAR_GCMD_REG);
188 + /* Make sure hardware complete it */
189 + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
190 + spin_unlock_irqrestore(&iommu->register_lock, flags);
194 --- a/drivers/pci/intel-iommu.c
195 +++ b/drivers/pci/intel-iommu.c
196 @@ -183,13 +183,6 @@ void free_iova_mem(struct iova *iova)
197 kmem_cache_free(iommu_iova_cache, iova);
200 -static inline void __iommu_flush_cache(
201 - struct intel_iommu *iommu, void *addr, int size)
203 - if (!ecap_coherent(iommu->ecap))
204 - clflush_cache_range(addr, size);
207 /* Gets context entry for a given bus and devfn */
208 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
210 --- a/drivers/pci/intel-iommu.h
211 +++ b/drivers/pci/intel-iommu.h
213 #include <linux/sysdev.h>
215 #include <linux/io.h>
216 +#include <asm/cacheflush.h>
217 #include "dma_remapping.h"
221 #define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
222 #define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
223 #define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
224 +#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */
225 +#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */
226 +#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */
227 +#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */
229 #define OFFSET_STRIDE (9)
231 @@ -114,6 +119,7 @@ static inline void dmar_writeq(void __io
232 #define ecap_max_iotlb_offset(e) \
233 (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
234 #define ecap_coherent(e) ((e) & 0x1)
235 +#define ecap_qis(e) ((e) & 0x2)
236 #define ecap_eim_support(e) ((e >> 4) & 0x1)
237 #define ecap_ir_support(e) ((e >> 3) & 0x1)
239 @@ -131,6 +137,17 @@ static inline void dmar_writeq(void __io
240 #define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
241 #define DMA_TLB_MAX_SIZE (0x3f)
244 +#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3)
245 +#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3)
246 +#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3)
247 +#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7)
248 +#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
249 +#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16)))
250 +#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6)
251 +#define DMA_ID_TLB_ADDR(addr) (addr)
252 +#define DMA_ID_TLB_ADDR_MASK(mask) (mask)
255 #define DMA_PMEN_EPM (((u32)1)<<31)
256 #define DMA_PMEN_PRS (((u32)1)<<0)
257 @@ -140,6 +157,7 @@ static inline void dmar_writeq(void __io
258 #define DMA_GCMD_SRTP (((u32)1) << 30)
259 #define DMA_GCMD_SFL (((u32)1) << 29)
260 #define DMA_GCMD_EAFL (((u32)1) << 28)
261 +#define DMA_GCMD_QIE (((u32)1) << 26)
262 #define DMA_GCMD_WBF (((u32)1) << 27)
265 @@ -147,6 +165,7 @@ static inline void dmar_writeq(void __io
266 #define DMA_GSTS_RTPS (((u32)1) << 30)
267 #define DMA_GSTS_FLS (((u32)1) << 29)
268 #define DMA_GSTS_AFLS (((u32)1) << 28)
269 +#define DMA_GSTS_QIES (((u32)1) << 26)
270 #define DMA_GSTS_WBFS (((u32)1) << 27)
273 @@ -192,6 +211,40 @@ static inline void dmar_writeq(void __io
277 +#define QI_LENGTH 256 /* queue length */
285 +#define QI_CC_TYPE 0x1
286 +#define QI_IOTLB_TYPE 0x2
287 +#define QI_DIOTLB_TYPE 0x3
288 +#define QI_IEC_TYPE 0x4
289 +#define QI_IWD_TYPE 0x5
291 +#define QI_IEC_SELECTIVE (((u64)1) << 4)
292 +#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32))
293 +#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27))
295 +#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
296 +#define QI_IWD_STATUS_WRITE (((u64)1) << 5)
304 + struct qi_desc *desc; /* invalidation queue */
305 + int *desc_status; /* desc status */
306 + int free_head; /* first free entry */
307 + int free_tail; /* last free entry */
312 void __iomem *reg; /* Pointer to hardware regs, virtual addr */
314 @@ -212,8 +265,16 @@ struct intel_iommu {
315 struct msi_msg saved_msg;
316 struct sys_device sysdev;
318 + struct q_inval *qi; /* Queued invalidation info */
321 +static inline void __iommu_flush_cache(
322 + struct intel_iommu *iommu, void *addr, int size)
324 + if (!ecap_coherent(iommu->ecap))
325 + clflush_cache_range(addr, size);
328 extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
330 extern int alloc_iommu(struct dmar_drhd_unit *drhd);