]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - queue-5.1/ib-hfi1-correct-tid-qp-rcd-to-match-verbs-context.patch
5.1-stable patches
[thirdparty/kernel/stable-queue.git] / queue-5.1 / ib-hfi1-correct-tid-qp-rcd-to-match-verbs-context.patch
1 From cc78076af14e1478c1a8fb18997674b5f8cbe3c8 Mon Sep 17 00:00:00 2001
2 From: Mike Marciniszyn <mike.marciniszyn@intel.com>
3 Date: Mon, 10 Jun 2019 12:28:18 -0400
4 Subject: IB/hfi1: Correct tid qp rcd to match verbs context
5
6 From: Mike Marciniszyn <mike.marciniszyn@intel.com>
7
8 commit cc78076af14e1478c1a8fb18997674b5f8cbe3c8 upstream.
9
10 The qp priv rcd pointer doesn't match the context being used for verbs
11 causing issues when 9B and kdeth packets are processed by different
12 receive contexts and hence different CPUs.
13
14 When running on different CPUs the following panic can occur:
15
16 WARNING: CPU: 3 PID: 2584 at lib/list_debug.c:59 __list_del_entry+0xa1/0xd0
17 list_del corruption. prev->next should be ffff9a7ac31f7a30, but was ffff9a7c3bc89230
18 CPU: 3 PID: 2584 Comm: z_wr_iss Kdump: loaded Tainted: P OE ------------ 3.10.0-862.2.3.el7_lustre.x86_64 #1
19 Call Trace:
20 <IRQ> [<ffffffffb7b0d78e>] dump_stack+0x19/0x1b
21 [<ffffffffb74916d8>] __warn+0xd8/0x100
22 [<ffffffffb749175f>] warn_slowpath_fmt+0x5f/0x80
23 [<ffffffffb7768671>] __list_del_entry+0xa1/0xd0
24 [<ffffffffc0c7a945>] process_rcv_qp_work+0xb5/0x160 [hfi1]
25 [<ffffffffc0c7bc2b>] handle_receive_interrupt_nodma_rtail+0x20b/0x2b0 [hfi1]
26 [<ffffffffc0c70683>] receive_context_interrupt+0x23/0x40 [hfi1]
27 [<ffffffffb7540a94>] __handle_irq_event_percpu+0x44/0x1c0
28 [<ffffffffb7540c42>] handle_irq_event_percpu+0x32/0x80
29 [<ffffffffb7540ccc>] handle_irq_event+0x3c/0x60
30 [<ffffffffb7543a1f>] handle_edge_irq+0x7f/0x150
31 [<ffffffffb742d504>] handle_irq+0xe4/0x1a0
32 [<ffffffffb7b23f7d>] do_IRQ+0x4d/0xf0
33 [<ffffffffb7b16362>] common_interrupt+0x162/0x162
34 <EOI> [<ffffffffb775a326>] ? memcpy+0x6/0x110
35 [<ffffffffc109210d>] ? abd_copy_from_buf_off_cb+0x1d/0x30 [zfs]
36 [<ffffffffc10920f0>] ? abd_copy_to_buf_off_cb+0x30/0x30 [zfs]
37 [<ffffffffc1093257>] abd_iterate_func+0x97/0x120 [zfs]
38 [<ffffffffc10934d9>] abd_copy_from_buf_off+0x39/0x60 [zfs]
39 [<ffffffffc109b828>] arc_write_ready+0x178/0x300 [zfs]
40 [<ffffffffb7b11032>] ? mutex_lock+0x12/0x2f
41 [<ffffffffb7b11032>] ? mutex_lock+0x12/0x2f
42 [<ffffffffc1164d05>] zio_ready+0x65/0x3d0 [zfs]
43 [<ffffffffc04d725e>] ? tsd_get_by_thread+0x2e/0x50 [spl]
44 [<ffffffffc04d1318>] ? taskq_member+0x18/0x30 [spl]
45 [<ffffffffc115ef22>] zio_execute+0xa2/0x100 [zfs]
46 [<ffffffffc04d1d2c>] taskq_thread+0x2ac/0x4f0 [spl]
47 [<ffffffffb74cee80>] ? wake_up_state+0x20/0x20
48 [<ffffffffc115ee80>] ? zio_taskq_member.isra.7.constprop.10+0x80/0x80 [zfs]
49 [<ffffffffc04d1a80>] ? taskq_thread_spawn+0x60/0x60 [spl]
50 [<ffffffffb74bae31>] kthread+0xd1/0xe0
51 [<ffffffffb74bad60>] ? insert_kthread_work+0x40/0x40
52 [<ffffffffb7b1f5f7>] ret_from_fork_nospec_begin+0x21/0x21
53 [<ffffffffb74bad60>] ? insert_kthread_work+0x40/0x40
54
55 Fix by reading the map entry in the same manner as the hardware so that
56 the kdeth and verbs contexts match.
57
58 Cc: <stable@vger.kernel.org>
59 Fixes: 5190f052a365 ("IB/hfi1: Allow the driver to initialize QP priv struct")
60 Reviewed-by: Kaike Wan <kaike.wan@intel.com>
61 Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
62 Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
63 Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
64 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
65
66 ---
67 drivers/infiniband/hw/hfi1/chip.c | 13 +++++++++++++
68 drivers/infiniband/hw/hfi1/chip.h | 1 +
69 drivers/infiniband/hw/hfi1/tid_rdma.c | 4 +---
70 3 files changed, 15 insertions(+), 3 deletions(-)
71
72 --- a/drivers/infiniband/hw/hfi1/chip.c
73 +++ b/drivers/infiniband/hw/hfi1/chip.c
74 @@ -14028,6 +14028,19 @@ static void init_kdeth_qp(struct hfi1_de
75 }
76
77 /**
78 + * hfi1_get_qp_map
79 + * @dd: device data
80 + * @idx: index to read
81 + */
82 +u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx)
83 +{
84 + u64 reg = read_csr(dd, RCV_QP_MAP_TABLE + (idx / 8) * 8);
85 +
86 + reg >>= (idx % 8) * 8;
87 + return reg;
88 +}
89 +
90 +/**
91 * init_qpmap_table
92 * @dd - device data
93 * @first_ctxt - first context
94 --- a/drivers/infiniband/hw/hfi1/chip.h
95 +++ b/drivers/infiniband/hw/hfi1/chip.h
96 @@ -1442,6 +1442,7 @@ void clear_all_interrupts(struct hfi1_de
97 void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
98 void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
99 void reset_interrupts(struct hfi1_devdata *dd);
100 +u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx);
101
102 /*
103 * Interrupt source table.
104 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c
105 +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
106 @@ -305,9 +305,7 @@ static struct hfi1_ctxtdata *qp_to_rcd(s
107 if (qp->ibqp.qp_num == 0)
108 ctxt = 0;
109 else
110 - ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) %
111 - (dd->n_krcv_queues - 1)) + 1;
112 -
113 + ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift);
114 return dd->rcd[ctxt];
115 }
116