]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - queue-5.10/xen-events-close-evtchn-after-mapping-cleanup.patch
Linux 5.4.274
[thirdparty/kernel/stable-queue.git] / queue-5.10 / xen-events-close-evtchn-after-mapping-cleanup.patch
1 From fa765c4b4aed2d64266b694520ecb025c862c5a9 Mon Sep 17 00:00:00 2001
2 From: Maximilian Heyne <mheyne@amazon.de>
3 Date: Wed, 24 Jan 2024 16:31:28 +0000
4 Subject: xen/events: close evtchn after mapping cleanup
5
6 From: Maximilian Heyne <mheyne@amazon.de>
7
8 commit fa765c4b4aed2d64266b694520ecb025c862c5a9 upstream.
9
10 shutdown_pirq and startup_pirq are not taking the
11 irq_mapping_update_lock because they can't due to lock inversion. Both
12 are called with the irq_desc->lock being taking. The lock order,
13 however, is first irq_mapping_update_lock and then irq_desc->lock.
14
15 This opens multiple races:
16 - shutdown_pirq can be interrupted by a function that allocates an event
17 channel:
18
19 CPU0 CPU1
20 shutdown_pirq {
21 xen_evtchn_close(e)
22 __startup_pirq {
23 EVTCHNOP_bind_pirq
24 -> returns just freed evtchn e
25 set_evtchn_to_irq(e, irq)
26 }
27 xen_irq_info_cleanup() {
28 set_evtchn_to_irq(e, -1)
29 }
30 }
31
32 Assume here event channel e refers here to the same event channel
33 number.
34 After this race the evtchn_to_irq mapping for e is invalid (-1).
35
36 - __startup_pirq races with __unbind_from_irq in a similar way. Because
37 __startup_pirq doesn't take irq_mapping_update_lock it can grab the
38 evtchn that __unbind_from_irq is currently freeing and cleaning up. In
39 this case even though the event channel is allocated, its mapping can
40 be unset in evtchn_to_irq.
41
42 The fix is to first cleanup the mappings and then close the event
43 channel. In this way, when an event channel gets allocated it's
44 potential previous evtchn_to_irq mappings are guaranteed to be unset already.
45 This is also the reverse order of the allocation where first the event
46 channel is allocated and then the mappings are setup.
47
48 On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal
49 [un]bind interfaces"), we hit a BUG like the following during probing of NVMe
50 devices. The issue is that during nvme_setup_io_queues, pci_free_irq
51 is called for every device which results in a call to shutdown_pirq.
52 With many nvme devices it's therefore likely to hit this race during
53 boot because there will be multiple calls to shutdown_pirq and
54 startup_pirq are running potentially in parallel.
55
56 ------------[ cut here ]------------
57 blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled
58 kernel BUG at drivers/xen/events/events_base.c:499!
59 invalid opcode: 0000 [#1] SMP PTI
60 CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1
61 Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006
62 Workqueue: nvme-reset-wq nvme_reset_work
63 RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0
64 Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00
65 RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046
66 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
67 RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff
68 RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00
69 R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed
70 R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002
71 FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000
72 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
73 CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0
74 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
75 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
76 Call Trace:
77 ? show_trace_log_lvl+0x1c1/0x2d9
78 ? show_trace_log_lvl+0x1c1/0x2d9
79 ? set_affinity_irq+0xdc/0x1c0
80 ? __die_body.cold+0x8/0xd
81 ? die+0x2b/0x50
82 ? do_trap+0x90/0x110
83 ? bind_evtchn_to_cpu+0xdf/0xf0
84 ? do_error_trap+0x65/0x80
85 ? bind_evtchn_to_cpu+0xdf/0xf0
86 ? exc_invalid_op+0x4e/0x70
87 ? bind_evtchn_to_cpu+0xdf/0xf0
88 ? asm_exc_invalid_op+0x12/0x20
89 ? bind_evtchn_to_cpu+0xdf/0xf0
90 ? bind_evtchn_to_cpu+0xc5/0xf0
91 set_affinity_irq+0xdc/0x1c0
92 irq_do_set_affinity+0x1d7/0x1f0
93 irq_setup_affinity+0xd6/0x1a0
94 irq_startup+0x8a/0xf0
95 __setup_irq+0x639/0x6d0
96 ? nvme_suspend+0x150/0x150
97 request_threaded_irq+0x10c/0x180
98 ? nvme_suspend+0x150/0x150
99 pci_request_irq+0xa8/0xf0
100 ? __blk_mq_free_request+0x74/0xa0
101 queue_request_irq+0x6f/0x80
102 nvme_create_queue+0x1af/0x200
103 nvme_create_io_queues+0xbd/0xf0
104 nvme_setup_io_queues+0x246/0x320
105 ? nvme_irq_check+0x30/0x30
106 nvme_reset_work+0x1c8/0x400
107 process_one_work+0x1b0/0x350
108 worker_thread+0x49/0x310
109 ? process_one_work+0x350/0x350
110 kthread+0x11b/0x140
111 ? __kthread_bind_mask+0x60/0x60
112 ret_from_fork+0x22/0x30
113 Modules linked in:
114 ---[ end trace a11715de1eee1873 ]---
115
116 Fixes: d46a78b05c0e ("xen: implement pirq type event channels")
117 Cc: stable@vger.kernel.org
118 Co-debugged-by: Andrew Panyakin <apanyaki@amazon.com>
119 Signed-off-by: Maximilian Heyne <mheyne@amazon.de>
120 Reviewed-by: Juergen Gross <jgross@suse.com>
121 Link: https://lore.kernel.org/r/20240124163130.31324-1-mheyne@amazon.de
122 Signed-off-by: Juergen Gross <jgross@suse.com>
123 [apanyaki: backport to v5.10-stable]
124 Signed-off-by: Andrew Paniakin <apanyaki@amazon.com>
125 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
126 ---
127 drivers/xen/events/events_base.c | 5 ++---
128 1 file changed, 2 insertions(+), 3 deletions(-)
129
130 --- a/drivers/xen/events/events_base.c
131 +++ b/drivers/xen/events/events_base.c
132 @@ -885,8 +885,8 @@ static void shutdown_pirq(struct irq_dat
133 return;
134
135 do_mask(info, EVT_MASK_REASON_EXPLICIT);
136 - xen_evtchn_close(evtchn);
137 xen_irq_info_cleanup(info);
138 + xen_evtchn_close(evtchn);
139 }
140
141 static void enable_pirq(struct irq_data *data)
142 @@ -929,8 +929,6 @@ static void __unbind_from_irq(unsigned i
143 if (VALID_EVTCHN(evtchn)) {
144 unsigned int cpu = cpu_from_irq(irq);
145
146 - xen_evtchn_close(evtchn);
147 -
148 switch (type_from_irq(irq)) {
149 case IRQT_VIRQ:
150 per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
151 @@ -943,6 +941,7 @@ static void __unbind_from_irq(unsigned i
152 }
153
154 xen_irq_info_cleanup(info);
155 + xen_evtchn_close(evtchn);
156 }
157
158 xen_free_irq(irq);