]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.25/patches.arch/ppc-axon-missing-msi-workaround-5.diff
Revert "Move xen patchset to new version's subdir."
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.arch / ppc-axon-missing-msi-workaround-5.diff
CommitLineData
8f69975d
BS
1From: Arnd Bergmann <arnd.bergmann@de.ibm.com>
2Subject: powerpc/cell/axon-msi: retry on missing interrupt
3References: bnc#445964,bnc#467633
4
5The MSI capture logic on the axon bridge can sometimes
6lose interrupts in case of high DMA and interrupt load,
7when it signals an MSI interrupt to the MPIC interrupt
8controller while we are already handling another MSI.
9
10Each MSI vector gets written into a FIFO buffer in main
11memory using DMA, and that DMA access is normally flushed
12by the actual interrupt packet on the IOIF. An MMIO
13register in the MSIC holds the position of the last
14entry in the FIFO buffer that was written. However,
15reading that position does not flush the DMA, so that
16we can observe stale data in the buffer.
17
18In a stress test, we have observed the DMA to arrive
19up to 14 microseconds after reading the register.
20We can reliably detect this conditioning by writing
21an invalid MSI vector into the FIFO buffer after
22reading from it, assuming that all MSIs we get
23are valid. After detecting an invalid MSI vector,
24we udelay(1) in the interrupt cascade for up to
25100 times before giving up.
26
27Signed-off-by: Arnd Bergmann <arnd@arndb.de>
28Acked-by: John Jolly <jjolly@novell.com>
29
30commit 23e0e8afafd9ac065d81506524adf3339584044b
31Author: Arnd Bergmann <arnd@arndb.de>
32Date: Fri Dec 12 09:19:50 2008 +0000
33
34 powerpc/cell/axon-msi: Fix MSI after kexec
35
36 Commit d015fe995 'powerpc/cell/axon-msi: Retry on missing interrupt'
37 has turned a rare failure to kexec on QS22 into a reproducible
38 error, which we have now analysed.
39
40 The problem is that after a kexec, the MSIC hardware still points
41 into the middle of the old ring buffer. We set up the ring buffer
42 during reboot, but not the offset into it. On older kernels, this
43 would cause a storm of thousands of spurious interrupts after a
44 kexec, which would most of the time get dropped silently.
45
46 With the new code, we time out on each interrupt, waiting for
47 it to become valid. If more interrupts come in that we time
48 out on, this goes on indefinitely, which eventually leads to
49 a hard crash.
50
51 The solution in this commit is to read the current offset from
52 the MSIC when reinitializing it. This now works correctly, as
53 expected.
54
55 Reported-by: Dirk Herrendoerfer <d.herrendoerfer@de.ibm.com>
56 Signed-off-by: Arnd Bergmann <arnd@arndb.de>
57 Acked-by: Michael Ellerman <michael@ellerman.id.au>
58 Signed-off-by: Paul Mackerras <paulus@samba.org>
59
60
61---
62 arch/powerpc/platforms/cell/axon_msi.c | 39 ++++++++++++++++++++++++++++-----
63 1 file changed, 34 insertions(+), 5 deletions(-)
64
65--- a/arch/powerpc/platforms/cell/axon_msi.c
66+++ b/arch/powerpc/platforms/cell/axon_msi.c
67@@ -95,6 +95,7 @@ static void axon_msi_cascade(unsigned in
68 struct axon_msic *msic = get_irq_data(irq);
69 u32 write_offset, msi;
70 int idx;
71+ int retry = 0;
72
73 write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG);
74 pr_debug("axon_msi: original write_offset 0x%x\n", write_offset);
75@@ -102,7 +103,7 @@ static void axon_msi_cascade(unsigned in
76 /* write_offset doesn't wrap properly, so we have to mask it */
77 write_offset &= MSIC_FIFO_SIZE_MASK;
78
79- while (msic->read_offset != write_offset) {
80+ while (msic->read_offset != write_offset && retry < 100) {
81 idx = msic->read_offset / sizeof(__le32);
82 msi = le32_to_cpu(msic->fifo_virt[idx]);
83 msi &= 0xFFFF;
84@@ -110,13 +111,37 @@ static void axon_msi_cascade(unsigned in
85 pr_debug("axon_msi: woff %x roff %x msi %x\n",
86 write_offset, msic->read_offset, msi);
87
88+ if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) {
89+ generic_handle_irq(msi);
90+ msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
91+ } else {
92+ /*
93+ * Reading the MSIC_WRITE_OFFSET_REG does not
94+ * reliably flush the outstanding DMA to the
95+ * FIFO buffer. Here we were reading stale
96+ * data, so we need to retry.
97+ */
98+ udelay(1);
99+ retry++;
100+ pr_debug("axon_msi: invalid irq 0x%x!\n", msi);
101+ continue;
102+ }
103+
104+ if (retry) {
105+ pr_debug("axon_msi: late irq 0x%x, retry %d\n",
106+ msi, retry);
107+ retry = 0;
108+ }
109+
110 msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
111 msic->read_offset &= MSIC_FIFO_SIZE_MASK;
112+ }
113
114- if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host)
115- generic_handle_irq(msi);
116- else
117- pr_debug("axon_msi: invalid irq 0x%x!\n", msi);
118+ if (retry) {
119+ printk(KERN_WARNING "axon_msi: irq timed out\n");
120+
121+ msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
122+ msic->read_offset &= MSIC_FIFO_SIZE_MASK;
123 }
124
125 desc->chip->eoi(irq);
126@@ -364,6 +389,7 @@ static int axon_msi_probe(struct of_devi
127 dn->full_name);
128 goto out_free_fifo;
129 }
130+ memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
131
132 msic->irq_host = irq_alloc_host(dn, IRQ_HOST_MAP_NOMAP,
133 NR_IRQS, &msic_host_ops, 0);
134@@ -387,6 +413,9 @@ static int axon_msi_probe(struct of_devi
135 MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
136 MSIC_CTRL_FIFO_SIZE);
137
138+ msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG)
139+ & MSIC_FIFO_SIZE_MASK;
140+
141 device->dev.platform_data = msic;
142
143 ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs;