]>
Commit | Line | Data |
---|---|---|
ec71c73b GKH |
1 | From bc5add09764c123f58942a37c8335247e683d234 Mon Sep 17 00:00:00 2001 |
2 | From: "Michael J. Ruhl" <michael.j.ruhl@intel.com> | |
3 | Date: Tue, 26 Feb 2019 08:45:35 -0800 | |
4 | Subject: IB/hfi1: Close race condition on user context disable and close | |
5 | ||
6 | From: Michael J. Ruhl <michael.j.ruhl@intel.com> | |
7 | ||
8 | commit bc5add09764c123f58942a37c8335247e683d234 upstream. | |
9 | ||
10 | When disabling and removing a receive context, it is possible for an | |
11 | asynchronous event (i.e IRQ) to occur. Because of this, there is a race | |
12 | between cleaning up the context, and the context being used by the | |
13 | asynchronous event. | |
14 | ||
15 | cpu 0 (context cleanup) | |
16 | rc->ref_count-- (ref_count == 0) | |
17 | hfi1_rcd_free() | |
18 | cpu 1 (IRQ (with rcd index)) | |
19 | rcd_get_by_index() | |
20 | lock | |
21 | ref_count+++ <-- reference count race (WARNING) | |
22 | return rcd | |
23 | unlock | |
24 | cpu 0 | |
25 | hfi1_free_ctxtdata() <-- incorrect free location | |
26 | lock | |
27 | remove rcd from array | |
28 | unlock | |
29 | free rcd | |
30 | ||
31 | This race will cause the following WARNING trace: | |
32 | ||
33 | WARNING: CPU: 0 PID: 175027 at include/linux/kref.h:52 hfi1_rcd_get_by_index+0x84/0xa0 [hfi1] | |
34 | CPU: 0 PID: 175027 Comm: IMB-MPI1 Kdump: loaded Tainted: G OE ------------ 3.10.0-957.el7.x86_64 #1 | |
35 | Hardware name: Intel Corporation S2600KP/S2600KP, BIOS SE5C610.86B.11.01.0076.C4.111920150602 11/19/2015 | |
36 | Call Trace: | |
37 | dump_stack+0x19/0x1b | |
38 | __warn+0xd8/0x100 | |
39 | warn_slowpath_null+0x1d/0x20 | |
40 | hfi1_rcd_get_by_index+0x84/0xa0 [hfi1] | |
41 | is_rcv_urgent_int+0x24/0x90 [hfi1] | |
42 | general_interrupt+0x1b6/0x210 [hfi1] | |
43 | __handle_irq_event_percpu+0x44/0x1c0 | |
44 | handle_irq_event_percpu+0x32/0x80 | |
45 | handle_irq_event+0x3c/0x60 | |
46 | handle_edge_irq+0x7f/0x150 | |
47 | handle_irq+0xe4/0x1a0 | |
48 | do_IRQ+0x4d/0xf0 | |
49 | common_interrupt+0x162/0x162 | |
50 | ||
51 | The race can also lead to a use after free which could be similar to: | |
52 | ||
53 | general protection fault: 0000 1 SMP | |
54 | CPU: 71 PID: 177147 Comm: IMB-MPI1 Kdump: loaded Tainted: G W OE ------------ 3.10.0-957.el7.x86_64 #1 | |
55 | Hardware name: Intel Corporation S2600KP/S2600KP, BIOS SE5C610.86B.11.01.0076.C4.111920150602 11/19/2015 | |
56 | task: ffff9962a8098000 ti: ffff99717a508000 task.ti: ffff99717a508000 __kmalloc+0x94/0x230 | |
57 | Call Trace: | |
58 | ? hfi1_user_sdma_process_request+0x9c8/0x1250 [hfi1] | |
59 | hfi1_user_sdma_process_request+0x9c8/0x1250 [hfi1] | |
60 | hfi1_aio_write+0xba/0x110 [hfi1] | |
61 | do_sync_readv_writev+0x7b/0xd0 | |
62 | do_readv_writev+0xce/0x260 | |
63 | ? handle_mm_fault+0x39d/0x9b0 | |
64 | ? pick_next_task_fair+0x5f/0x1b0 | |
65 | ? sched_clock_cpu+0x85/0xc0 | |
66 | ? __schedule+0x13a/0x890 | |
67 | vfs_writev+0x35/0x60 | |
68 | SyS_writev+0x7f/0x110 | |
69 | system_call_fastpath+0x22/0x27 | |
70 | ||
71 | Use the appropriate kref API to verify access. | |
72 | ||
73 | Reorder context cleanup to ensure context removal before cleanup occurs | |
74 | correctly. | |
75 | ||
76 | Cc: stable@vger.kernel.org # v4.14.0+ | |
77 | Fixes: f683c80ca68e ("IB/hfi1: Resolve kernel panics by reference counting receive contexts") | |
78 | Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> | |
79 | Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com> | |
80 | Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> | |
81 | Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> | |
82 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
83 | ||
84 | --- | |
85 | drivers/infiniband/hw/hfi1/hfi.h | 2 +- | |
86 | drivers/infiniband/hw/hfi1/init.c | 14 +++++++++----- | |
87 | 2 files changed, 10 insertions(+), 6 deletions(-) | |
88 | ||
89 | --- a/drivers/infiniband/hw/hfi1/hfi.h | |
90 | +++ b/drivers/infiniband/hw/hfi1/hfi.h | |
91 | @@ -1425,7 +1425,7 @@ void hfi1_init_pportdata(struct pci_dev | |
92 | struct hfi1_devdata *dd, u8 hw_pidx, u8 port); | |
93 | void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); | |
94 | int hfi1_rcd_put(struct hfi1_ctxtdata *rcd); | |
95 | -void hfi1_rcd_get(struct hfi1_ctxtdata *rcd); | |
96 | +int hfi1_rcd_get(struct hfi1_ctxtdata *rcd); | |
97 | struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd, | |
98 | u16 ctxt); | |
99 | struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt); | |
100 | --- a/drivers/infiniband/hw/hfi1/init.c | |
101 | +++ b/drivers/infiniband/hw/hfi1/init.c | |
102 | @@ -213,12 +213,12 @@ static void hfi1_rcd_free(struct kref *k | |
103 | struct hfi1_ctxtdata *rcd = | |
104 | container_of(kref, struct hfi1_ctxtdata, kref); | |
105 | ||
106 | - hfi1_free_ctxtdata(rcd->dd, rcd); | |
107 | - | |
108 | spin_lock_irqsave(&rcd->dd->uctxt_lock, flags); | |
109 | rcd->dd->rcd[rcd->ctxt] = NULL; | |
110 | spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags); | |
111 | ||
112 | + hfi1_free_ctxtdata(rcd->dd, rcd); | |
113 | + | |
114 | kfree(rcd); | |
115 | } | |
116 | ||
117 | @@ -241,10 +241,13 @@ int hfi1_rcd_put(struct hfi1_ctxtdata *r | |
118 | * @rcd: pointer to an initialized rcd data structure | |
119 | * | |
120 | * Use this to get a reference after the init. | |
121 | + * | |
122 | + * Return : reflect kref_get_unless_zero(), which returns non-zero on | |
123 | + * increment, otherwise 0. | |
124 | */ | |
125 | -void hfi1_rcd_get(struct hfi1_ctxtdata *rcd) | |
126 | +int hfi1_rcd_get(struct hfi1_ctxtdata *rcd) | |
127 | { | |
128 | - kref_get(&rcd->kref); | |
129 | + return kref_get_unless_zero(&rcd->kref); | |
130 | } | |
131 | ||
132 | /** | |
133 | @@ -324,7 +327,8 @@ struct hfi1_ctxtdata *hfi1_rcd_get_by_in | |
134 | spin_lock_irqsave(&dd->uctxt_lock, flags); | |
135 | if (dd->rcd[ctxt]) { | |
136 | rcd = dd->rcd[ctxt]; | |
137 | - hfi1_rcd_get(rcd); | |
138 | + if (!hfi1_rcd_get(rcd)) | |
139 | + rcd = NULL; | |
140 | } | |
141 | spin_unlock_irqrestore(&dd->uctxt_lock, flags); | |
142 |