]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
IB/hfi1: Correct tid qp rcd to match verbs context
authorMike Marciniszyn <mike.marciniszyn@intel.com>
Mon, 10 Jun 2019 16:28:18 +0000 (12:28 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 25 Jun 2019 03:34:45 +0000 (11:34 +0800)
commit cc78076af14e1478c1a8fb18997674b5f8cbe3c8 upstream.

The qp priv rcd pointer doesn't match the context being used for verbs
causing issues when 9B and kdeth packets are processed by different
receive contexts and hence different CPUs.

When running on different CPUs the following panic can occur:

 WARNING: CPU: 3 PID: 2584 at lib/list_debug.c:59 __list_del_entry+0xa1/0xd0
 list_del corruption. prev->next should be ffff9a7ac31f7a30, but was ffff9a7c3bc89230
 CPU: 3 PID: 2584 Comm: z_wr_iss Kdump: loaded Tainted: P           OE  ------------   3.10.0-862.2.3.el7_lustre.x86_64 #1
 Call Trace:
  <IRQ>  [<ffffffffb7b0d78e>] dump_stack+0x19/0x1b
  [<ffffffffb74916d8>] __warn+0xd8/0x100
  [<ffffffffb749175f>] warn_slowpath_fmt+0x5f/0x80
  [<ffffffffb7768671>] __list_del_entry+0xa1/0xd0
  [<ffffffffc0c7a945>] process_rcv_qp_work+0xb5/0x160 [hfi1]
  [<ffffffffc0c7bc2b>] handle_receive_interrupt_nodma_rtail+0x20b/0x2b0 [hfi1]
  [<ffffffffc0c70683>] receive_context_interrupt+0x23/0x40 [hfi1]
  [<ffffffffb7540a94>] __handle_irq_event_percpu+0x44/0x1c0
  [<ffffffffb7540c42>] handle_irq_event_percpu+0x32/0x80
  [<ffffffffb7540ccc>] handle_irq_event+0x3c/0x60
  [<ffffffffb7543a1f>] handle_edge_irq+0x7f/0x150
  [<ffffffffb742d504>] handle_irq+0xe4/0x1a0
  [<ffffffffb7b23f7d>] do_IRQ+0x4d/0xf0
  [<ffffffffb7b16362>] common_interrupt+0x162/0x162
  <EOI>  [<ffffffffb775a326>] ? memcpy+0x6/0x110
  [<ffffffffc109210d>] ? abd_copy_from_buf_off_cb+0x1d/0x30 [zfs]
  [<ffffffffc10920f0>] ? abd_copy_to_buf_off_cb+0x30/0x30 [zfs]
  [<ffffffffc1093257>] abd_iterate_func+0x97/0x120 [zfs]
  [<ffffffffc10934d9>] abd_copy_from_buf_off+0x39/0x60 [zfs]
  [<ffffffffc109b828>] arc_write_ready+0x178/0x300 [zfs]
  [<ffffffffb7b11032>] ? mutex_lock+0x12/0x2f
  [<ffffffffb7b11032>] ? mutex_lock+0x12/0x2f
  [<ffffffffc1164d05>] zio_ready+0x65/0x3d0 [zfs]
  [<ffffffffc04d725e>] ? tsd_get_by_thread+0x2e/0x50 [spl]
  [<ffffffffc04d1318>] ? taskq_member+0x18/0x30 [spl]
  [<ffffffffc115ef22>] zio_execute+0xa2/0x100 [zfs]
  [<ffffffffc04d1d2c>] taskq_thread+0x2ac/0x4f0 [spl]
  [<ffffffffb74cee80>] ? wake_up_state+0x20/0x20
  [<ffffffffc115ee80>] ? zio_taskq_member.isra.7.constprop.10+0x80/0x80 [zfs]
  [<ffffffffc04d1a80>] ? taskq_thread_spawn+0x60/0x60 [spl]
  [<ffffffffb74bae31>] kthread+0xd1/0xe0
  [<ffffffffb74bad60>] ? insert_kthread_work+0x40/0x40
  [<ffffffffb7b1f5f7>] ret_from_fork_nospec_begin+0x21/0x21
  [<ffffffffb74bad60>] ? insert_kthread_work+0x40/0x40

Fix by reading the map entry in the same manner as the hardware so that
the kdeth and verbs contexts match.

Cc: <stable@vger.kernel.org>
Fixes: 5190f052a365 ("IB/hfi1: Allow the driver to initialize QP priv struct")
Reviewed-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/chip.h
drivers/infiniband/hw/hfi1/tid_rdma.c

index 9784c6c0d2ecfbbca031871f54fcc415602029fc..e02d9a739e9c620d2424b07889d8943640918494 100644 (file)
@@ -14027,6 +14027,19 @@ static void init_kdeth_qp(struct hfi1_devdata *dd)
                  RCV_BTH_QP_KDETH_QP_SHIFT);
 }
 
+/**
+ * hfi1_get_qp_map
+ * @dd: device data
+ * @idx: index to read
+ */
+u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx)
+{
+       u64 reg = read_csr(dd, RCV_QP_MAP_TABLE + (idx / 8) * 8);
+
+       reg >>= (idx % 8) * 8;
+       return reg;
+}
+
 /**
  * init_qpmap_table
  * @dd - device data
index 6c27c1c6a86872c55964d6677f3a3525cb79620a..a5c61400b29593ed28b02f42e197eee8fe47a327 100644 (file)
@@ -1442,6 +1442,7 @@ void clear_all_interrupts(struct hfi1_devdata *dd);
 void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
 void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
 void reset_interrupts(struct hfi1_devdata *dd);
+u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx);
 
 /*
  * Interrupt source table.
index 43cbce7a19ea43f42af2464a782221da2ee386bf..e0851f01a804ef04fd77f3574d78c5dc97c95992 100644 (file)
@@ -305,9 +305,7 @@ static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
        if (qp->ibqp.qp_num == 0)
                ctxt = 0;
        else
-               ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) %
-                       (dd->n_krcv_queues - 1)) + 1;
-
+               ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift);
        return dd->rcd[ctxt];
 }