]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Suresh Siddha <suresh.b.siddha@intel.com> |
2 | Subject: x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure | |
3 | References: fate #303948 and fate #303984 | |
4 | Patch-Mainline: queued for .28 | |
5 | Commit-ID: 75c46fa61bc5b4ccd20a168ff325c58771248fcd | |
6 | ||
7 | Signed-off-by: Thomas Renninger <trenn@suse.de> | |
8 | ||
9 | MSI and MSI-X support for interrupt remapping infrastructure. | |
10 | ||
11 | MSI address register will be programmed with interrupt-remapping table | |
12 | entry(IRTE) index and the IRTE will contain information about the vector, | |
13 | cpu destination, etc. | |
14 | ||
15 | For MSI-X, all the IRTE's will be consecutively allocated in the table, | |
16 | and the address registers will contain the starting index to the block | |
17 | and the data register will contain the subindex with in that block. | |
18 | ||
19 | This also introduces a new irq_chip for cleaner irq migration (in the process | |
20 | context as opposed to the current irq migration in the context of an interrupt. | |
21 | interrupt-remapping infrastructure will help us achieve this). | |
22 | ||
23 | As MSI is edge triggered, irq migration is a simple atomic update(of vector | |
24 | and cpu destination) of IRTE and flushing the hardware cache. | |
25 | ||
26 | Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> | |
27 | Cc: akpm@linux-foundation.org | |
28 | Cc: arjan@linux.intel.com | |
29 | Cc: andi@firstfloor.org | |
30 | Cc: ebiederm@xmission.com | |
31 | Cc: jbarnes@virtuousgeek.org | |
32 | Cc: steiner@sgi.com | |
33 | Signed-off-by: Ingo Molnar <mingo@elte.hu> | |
34 | ||
35 | --- | |
36 | arch/x86/kernel/io_apic_64.c | 230 +++++++++++++++++++++++++++++++++++++++++-- | |
37 | drivers/pci/intr_remapping.c | 11 ++ | |
38 | include/asm-x86/msidef.h | 4 | |
39 | include/linux/dmar.h | 1 | |
40 | 4 files changed, 238 insertions(+), 8 deletions(-) | |
41 | ||
42 | Index: linux-2.6.26/arch/x86/kernel/io_apic_64.c | |
43 | =================================================================== | |
44 | --- linux-2.6.26.orig/arch/x86/kernel/io_apic_64.c | |
45 | +++ linux-2.6.26/arch/x86/kernel/io_apic_64.c | |
46 | @@ -2295,6 +2295,9 @@ void destroy_irq(unsigned int irq) | |
47 | ||
48 | dynamic_irq_cleanup(irq); | |
49 | ||
50 | +#ifdef CONFIG_INTR_REMAP | |
51 | + free_irte(irq); | |
52 | +#endif | |
53 | spin_lock_irqsave(&vector_lock, flags); | |
54 | __clear_irq_vector(irq); | |
55 | spin_unlock_irqrestore(&vector_lock, flags); | |
56 | @@ -2313,11 +2316,42 @@ static int msi_compose_msg(struct pci_de | |
57 | ||
58 | tmp = TARGET_CPUS; | |
59 | err = assign_irq_vector(irq, tmp); | |
60 | - if (!err) { | |
61 | - cpus_and(tmp, cfg->domain, tmp); | |
62 | - dest = cpu_mask_to_apicid(tmp); | |
63 | + if (err) | |
64 | + return err; | |
65 | + | |
66 | + cpus_and(tmp, cfg->domain, tmp); | |
67 | + dest = cpu_mask_to_apicid(tmp); | |
68 | + | |
69 | +#ifdef CONFIG_INTR_REMAP | |
70 | + if (irq_remapped(irq)) { | |
71 | + struct irte irte; | |
72 | + int ir_index; | |
73 | + u16 sub_handle; | |
74 | + | |
75 | + ir_index = map_irq_to_irte_handle(irq, &sub_handle); | |
76 | + BUG_ON(ir_index == -1); | |
77 | + | |
78 | + memset (&irte, 0, sizeof(irte)); | |
79 | + | |
80 | + irte.present = 1; | |
81 | + irte.dst_mode = INT_DEST_MODE; | |
82 | + irte.trigger_mode = 0; /* edge */ | |
83 | + irte.dlvry_mode = INT_DELIVERY_MODE; | |
84 | + irte.vector = cfg->vector; | |
85 | + irte.dest_id = IRTE_DEST(dest); | |
86 | + | |
87 | + modify_irte(irq, &irte); | |
88 | ||
89 | msg->address_hi = MSI_ADDR_BASE_HI; | |
90 | + msg->data = sub_handle; | |
91 | + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | | |
92 | + MSI_ADDR_IR_SHV | | |
93 | + MSI_ADDR_IR_INDEX1(ir_index) | | |
94 | + MSI_ADDR_IR_INDEX2(ir_index); | |
95 | + } else | |
96 | +#endif | |
97 | + { | |
98 | + msg->address_hi = MSI_ADDR_BASE_HI; | |
99 | msg->address_lo = | |
100 | MSI_ADDR_BASE_LO | | |
101 | ((INT_DEST_MODE == 0) ? | |
102 | @@ -2367,6 +2401,55 @@ static void set_msi_irq_affinity(unsigne | |
103 | write_msi_msg(irq, &msg); | |
104 | irq_desc[irq].affinity = mask; | |
105 | } | |
106 | + | |
107 | +#ifdef CONFIG_INTR_REMAP | |
108 | +/* | |
109 | + * Migrate the MSI irq to another cpumask. This migration is | |
110 | + * done in the process context using interrupt-remapping hardware. | |
111 | + */ | |
112 | +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | |
113 | +{ | |
114 | + struct irq_cfg *cfg = irq_cfg + irq; | |
115 | + unsigned int dest; | |
116 | + cpumask_t tmp, cleanup_mask; | |
117 | + struct irte irte; | |
118 | + | |
119 | + cpus_and(tmp, mask, cpu_online_map); | |
120 | + if (cpus_empty(tmp)) | |
121 | + return; | |
122 | + | |
123 | + if (get_irte(irq, &irte)) | |
124 | + return; | |
125 | + | |
126 | + if (assign_irq_vector(irq, mask)) | |
127 | + return; | |
128 | + | |
129 | + cpus_and(tmp, cfg->domain, mask); | |
130 | + dest = cpu_mask_to_apicid(tmp); | |
131 | + | |
132 | + irte.vector = cfg->vector; | |
133 | + irte.dest_id = IRTE_DEST(dest); | |
134 | + | |
135 | + /* | |
136 | + * atomically update the IRTE with the new destination and vector. | |
137 | + */ | |
138 | + modify_irte(irq, &irte); | |
139 | + | |
140 | + /* | |
141 | + * After this point, all the interrupts will start arriving | |
142 | + * at the new destination. So, time to cleanup the previous | |
143 | + * vector allocation. | |
144 | + */ | |
145 | + if (cfg->move_in_progress) { | |
146 | + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | |
147 | + cfg->move_cleanup_count = cpus_weight(cleanup_mask); | |
148 | + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | |
149 | + cfg->move_in_progress = 0; | |
150 | + } | |
151 | + | |
152 | + irq_desc[irq].affinity = mask; | |
153 | +} | |
154 | +#endif | |
155 | #endif /* CONFIG_SMP */ | |
156 | ||
157 | /* | |
158 | @@ -2384,26 +2467,157 @@ static struct irq_chip msi_chip = { | |
159 | .retrigger = ioapic_retrigger_irq, | |
160 | }; | |
161 | ||
162 | -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) | |
163 | +#ifdef CONFIG_INTR_REMAP | |
164 | +static struct irq_chip msi_ir_chip = { | |
165 | + .name = "IR-PCI-MSI", | |
166 | + .unmask = unmask_msi_irq, | |
167 | + .mask = mask_msi_irq, | |
168 | + .ack = ack_x2apic_edge, | |
169 | +#ifdef CONFIG_SMP | |
170 | + .set_affinity = ir_set_msi_irq_affinity, | |
171 | +#endif | |
172 | + .retrigger = ioapic_retrigger_irq, | |
173 | +}; | |
174 | + | |
175 | +/* | |
176 | + * Map the PCI dev to the corresponding remapping hardware unit | |
177 | + * and allocate 'nvec' consecutive interrupt-remapping table entries | |
178 | + * in it. | |
179 | + */ | |
180 | +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) | |
181 | { | |
182 | + struct intel_iommu *iommu; | |
183 | + int index; | |
184 | + | |
185 | + iommu = map_dev_to_ir(dev); | |
186 | + if (!iommu) { | |
187 | + printk(KERN_ERR | |
188 | + "Unable to map PCI %s to iommu\n", pci_name(dev)); | |
189 | + return -ENOENT; | |
190 | + } | |
191 | + | |
192 | + index = alloc_irte(iommu, irq, nvec); | |
193 | + if (index < 0) { | |
194 | + printk(KERN_ERR | |
195 | + "Unable to allocate %d IRTE for PCI %s\n", nvec, | |
196 | + pci_name(dev)); | |
197 | + return -ENOSPC; | |
198 | + } | |
199 | + return index; | |
200 | +} | |
201 | +#endif | |
202 | + | |
203 | +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) | |
204 | +{ | |
205 | + int ret; | |
206 | struct msi_msg msg; | |
207 | + | |
208 | + ret = msi_compose_msg(dev, irq, &msg); | |
209 | + if (ret < 0) | |
210 | + return ret; | |
211 | + | |
212 | + set_irq_msi(irq, desc); | |
213 | + write_msi_msg(irq, &msg); | |
214 | + | |
215 | +#ifdef CONFIG_INTR_REMAP | |
216 | + if (irq_remapped(irq)) { | |
217 | + struct irq_desc *desc = irq_desc + irq; | |
218 | + /* | |
219 | + * irq migration in process context | |
220 | + */ | |
221 | + desc->status |= IRQ_MOVE_PCNTXT; | |
222 | + set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); | |
223 | + } else | |
224 | +#endif | |
225 | + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); | |
226 | + | |
227 | + return 0; | |
228 | +} | |
229 | + | |
230 | +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) | |
231 | +{ | |
232 | int irq, ret; | |
233 | + | |
234 | irq = create_irq(); | |
235 | if (irq < 0) | |
236 | return irq; | |
237 | ||
238 | - ret = msi_compose_msg(dev, irq, &msg); | |
239 | +#ifdef CONFIG_INTR_REMAP | |
240 | + if (!intr_remapping_enabled) | |
241 | + goto no_ir; | |
242 | + | |
243 | + ret = msi_alloc_irte(dev, irq, 1); | |
244 | + if (ret < 0) | |
245 | + goto error; | |
246 | +no_ir: | |
247 | +#endif | |
248 | + ret = setup_msi_irq(dev, desc, irq); | |
249 | if (ret < 0) { | |
250 | destroy_irq(irq); | |
251 | return ret; | |
252 | } | |
253 | + return 0; | |
254 | ||
255 | - set_irq_msi(irq, desc); | |
256 | - write_msi_msg(irq, &msg); | |
257 | +#ifdef CONFIG_INTR_REMAP | |
258 | +error: | |
259 | + destroy_irq(irq); | |
260 | + return ret; | |
261 | +#endif | |
262 | +} | |
263 | ||
264 | - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); | |
265 | +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |
266 | +{ | |
267 | + int irq, ret, sub_handle; | |
268 | + struct msi_desc *desc; | |
269 | +#ifdef CONFIG_INTR_REMAP | |
270 | + struct intel_iommu *iommu = 0; | |
271 | + int index = 0; | |
272 | +#endif | |
273 | ||
274 | + sub_handle = 0; | |
275 | + list_for_each_entry(desc, &dev->msi_list, list) { | |
276 | + irq = create_irq(); | |
277 | + if (irq < 0) | |
278 | + return irq; | |
279 | +#ifdef CONFIG_INTR_REMAP | |
280 | + if (!intr_remapping_enabled) | |
281 | + goto no_ir; | |
282 | + | |
283 | + if (!sub_handle) { | |
284 | + /* | |
285 | + * allocate the consecutive block of IRTE's | |
286 | + * for 'nvec' | |
287 | + */ | |
288 | + index = msi_alloc_irte(dev, irq, nvec); | |
289 | + if (index < 0) { | |
290 | + ret = index; | |
291 | + goto error; | |
292 | + } | |
293 | + } else { | |
294 | + iommu = map_dev_to_ir(dev); | |
295 | + if (!iommu) { | |
296 | + ret = -ENOENT; | |
297 | + goto error; | |
298 | + } | |
299 | + /* | |
300 | + * setup the mapping between the irq and the IRTE | |
301 | + * base index, the sub_handle pointing to the | |
302 | + * appropriate interrupt remap table entry. | |
303 | + */ | |
304 | + set_irte_irq(irq, iommu, index, sub_handle); | |
305 | + } | |
306 | +no_ir: | |
307 | +#endif | |
308 | + ret = setup_msi_irq(dev, desc, irq); | |
309 | + if (ret < 0) | |
310 | + goto error; | |
311 | + sub_handle++; | |
312 | + } | |
313 | return 0; | |
314 | + | |
315 | +error: | |
316 | + destroy_irq(irq); | |
317 | + return ret; | |
318 | } | |
319 | ||
320 | void arch_teardown_msi_irq(unsigned int irq) | |
321 | Index: linux-2.6.26/drivers/pci/intr_remapping.c | |
322 | =================================================================== | |
323 | --- linux-2.6.26.orig/drivers/pci/intr_remapping.c | |
324 | +++ linux-2.6.26/drivers/pci/intr_remapping.c | |
325 | @@ -230,6 +230,17 @@ struct intel_iommu *map_ioapic_to_ir(int | |
326 | return NULL; | |
327 | } | |
328 | ||
329 | +struct intel_iommu *map_dev_to_ir(struct pci_dev *dev) | |
330 | +{ | |
331 | + struct dmar_drhd_unit *drhd; | |
332 | + | |
333 | + drhd = dmar_find_matched_drhd_unit(dev); | |
334 | + if (!drhd) | |
335 | + return NULL; | |
336 | + | |
337 | + return drhd->iommu; | |
338 | +} | |
339 | + | |
340 | int free_irte(int irq) | |
341 | { | |
342 | int index, i; | |
343 | Index: linux-2.6.26/include/asm-x86/msidef.h | |
344 | =================================================================== | |
345 | --- linux-2.6.26.orig/include/asm-x86/msidef.h | |
346 | +++ linux-2.6.26/include/asm-x86/msidef.h | |
347 | @@ -48,4 +48,8 @@ | |
348 | #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ | |
349 | MSI_ADDR_DEST_ID_MASK) | |
350 | ||
351 | +#define MSI_ADDR_IR_EXT_INT (1 << 4) | |
352 | +#define MSI_ADDR_IR_SHV (1 << 3) | |
353 | +#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13) | |
354 | +#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5) | |
355 | #endif /* ASM_MSIDEF_H */ | |
356 | Index: linux-2.6.26/include/linux/dmar.h | |
357 | =================================================================== | |
358 | --- linux-2.6.26.orig/include/linux/dmar.h | |
359 | +++ linux-2.6.26/include/linux/dmar.h | |
360 | @@ -109,6 +109,7 @@ extern int flush_irte(int irq); | |
361 | extern int free_irte(int irq); | |
362 | ||
363 | extern int irq_remapped(int irq); | |
364 | +extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); | |
365 | extern struct intel_iommu *map_ioapic_to_ir(int apic); | |
366 | #else | |
367 | #define irq_remapped(irq) (0) |