]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blob - src/patches/suse-2.6.27.31/patches.arch/ppc-axon-missing-msi-workaround-5.diff
Merge branch 'master' into next
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.arch / ppc-axon-missing-msi-workaround-5.diff
1 From: Arnd Bergmann <arnd.bergmann@de.ibm.com>
2 Subject: powerpc/cell/axon-msi: retry on missing interrupt
3 References: bnc#445964,bnc#467633
4
5 The MSI capture logic on the axon bridge can sometimes
6 lose interrupts in case of high DMA and interrupt load,
7 when it signals an MSI interrupt to the MPIC interrupt
8 controller while we are already handling another MSI.
9
10 Each MSI vector gets written into a FIFO buffer in main
11 memory using DMA, and that DMA access is normally flushed
12 by the actual interrupt packet on the IOIF. An MMIO
13 register in the MSIC holds the position of the last
14 entry in the FIFO buffer that was written. However,
15 reading that position does not flush the DMA, so that
16 we can observe stale data in the buffer.
17
18 In a stress test, we have observed the DMA to arrive
19 up to 14 microseconds after reading the register.
20 We can reliably detect this conditioning by writing
21 an invalid MSI vector into the FIFO buffer after
22 reading from it, assuming that all MSIs we get
23 are valid. After detecting an invalid MSI vector,
24 we udelay(1) in the interrupt cascade for up to
25 100 times before giving up.
26
27 Signed-off-by: Arnd Bergmann <arnd@arndb.de>
28 Acked-by: John Jolly <jjolly@novell.com>
29
30 commit 23e0e8afafd9ac065d81506524adf3339584044b
31 Author: Arnd Bergmann <arnd@arndb.de>
32 Date: Fri Dec 12 09:19:50 2008 +0000
33
34 powerpc/cell/axon-msi: Fix MSI after kexec
35
36 Commit d015fe995 'powerpc/cell/axon-msi: Retry on missing interrupt'
37 has turned a rare failure to kexec on QS22 into a reproducible
38 error, which we have now analysed.
39
40 The problem is that after a kexec, the MSIC hardware still points
41 into the middle of the old ring buffer. We set up the ring buffer
42 during reboot, but not the offset into it. On older kernels, this
43 would cause a storm of thousands of spurious interrupts after a
44 kexec, which would most of the time get dropped silently.
45
46 With the new code, we time out on each interrupt, waiting for
47 it to become valid. If more interrupts come in that we time
48 out on, this goes on indefinitely, which eventually leads to
49 a hard crash.
50
51 The solution in this commit is to read the current offset from
52 the MSIC when reinitializing it. This now works correctly, as
53 expected.
54
55 Reported-by: Dirk Herrendoerfer <d.herrendoerfer@de.ibm.com>
56 Signed-off-by: Arnd Bergmann <arnd@arndb.de>
57 Acked-by: Michael Ellerman <michael@ellerman.id.au>
58 Signed-off-by: Paul Mackerras <paulus@samba.org>
59
60
61 ---
62 arch/powerpc/platforms/cell/axon_msi.c | 39 ++++++++++++++++++++++++++++-----
63 1 file changed, 34 insertions(+), 5 deletions(-)
64
65 --- a/arch/powerpc/platforms/cell/axon_msi.c
66 +++ b/arch/powerpc/platforms/cell/axon_msi.c
67 @@ -95,6 +95,7 @@ static void axon_msi_cascade(unsigned in
68 struct axon_msic *msic = get_irq_data(irq);
69 u32 write_offset, msi;
70 int idx;
71 + int retry = 0;
72
73 write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG);
74 pr_debug("axon_msi: original write_offset 0x%x\n", write_offset);
75 @@ -102,7 +103,7 @@ static void axon_msi_cascade(unsigned in
76 /* write_offset doesn't wrap properly, so we have to mask it */
77 write_offset &= MSIC_FIFO_SIZE_MASK;
78
79 - while (msic->read_offset != write_offset) {
80 + while (msic->read_offset != write_offset && retry < 100) {
81 idx = msic->read_offset / sizeof(__le32);
82 msi = le32_to_cpu(msic->fifo_virt[idx]);
83 msi &= 0xFFFF;
84 @@ -110,13 +111,37 @@ static void axon_msi_cascade(unsigned in
85 pr_debug("axon_msi: woff %x roff %x msi %x\n",
86 write_offset, msic->read_offset, msi);
87
88 + if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) {
89 + generic_handle_irq(msi);
90 + msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
91 + } else {
92 + /*
93 + * Reading the MSIC_WRITE_OFFSET_REG does not
94 + * reliably flush the outstanding DMA to the
95 + * FIFO buffer. Here we were reading stale
96 + * data, so we need to retry.
97 + */
98 + udelay(1);
99 + retry++;
100 + pr_debug("axon_msi: invalid irq 0x%x!\n", msi);
101 + continue;
102 + }
103 +
104 + if (retry) {
105 + pr_debug("axon_msi: late irq 0x%x, retry %d\n",
106 + msi, retry);
107 + retry = 0;
108 + }
109 +
110 msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
111 msic->read_offset &= MSIC_FIFO_SIZE_MASK;
112 + }
113
114 - if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host)
115 - generic_handle_irq(msi);
116 - else
117 - pr_debug("axon_msi: invalid irq 0x%x!\n", msi);
118 + if (retry) {
119 + printk(KERN_WARNING "axon_msi: irq timed out\n");
120 +
121 + msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
122 + msic->read_offset &= MSIC_FIFO_SIZE_MASK;
123 }
124
125 desc->chip->eoi(irq);
126 @@ -364,6 +389,7 @@ static int axon_msi_probe(struct of_devi
127 dn->full_name);
128 goto out_free_fifo;
129 }
130 + memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
131
132 msic->irq_host = irq_alloc_host(dn, IRQ_HOST_MAP_NOMAP,
133 NR_IRQS, &msic_host_ops, 0);
134 @@ -387,6 +413,9 @@ static int axon_msi_probe(struct of_devi
135 MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
136 MSIC_CTRL_FIFO_SIZE);
137
138 + msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG)
139 + & MSIC_FIFO_SIZE_MASK;
140 +
141 device->dev.platform_data = msic;
142
143 ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs;