]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
IB/hfi1: Correct tid qp rcd to match verbs context
authorMike Marciniszyn <mike.marciniszyn@intel.com>
Mon, 10 Jun 2019 16:28:18 +0000 (12:28 -0400)
committerJason Gunthorpe <jgg@mellanox.com>
Tue, 11 Jun 2019 20:06:45 +0000 (17:06 -0300)
The qp priv rcd pointer doesn't match the context being used for verbs
causing issues when 9B and kdeth packets are processed by different
receive contexts and hence different CPUs.

When running on different CPUs the following panic can occur:

 WARNING: CPU: 3 PID: 2584 at lib/list_debug.c:59 __list_del_entry+0xa1/0xd0
 list_del corruption. prev->next should be ffff9a7ac31f7a30, but was ffff9a7c3bc89230
 CPU: 3 PID: 2584 Comm: z_wr_iss Kdump: loaded Tainted: P           OE  ------------   3.10.0-862.2.3.el7_lustre.x86_64 #1
 Call Trace:
  <IRQ>  [<ffffffffb7b0d78e>] dump_stack+0x19/0x1b
  [<ffffffffb74916d8>] __warn+0xd8/0x100
  [<ffffffffb749175f>] warn_slowpath_fmt+0x5f/0x80
  [<ffffffffb7768671>] __list_del_entry+0xa1/0xd0
  [<ffffffffc0c7a945>] process_rcv_qp_work+0xb5/0x160 [hfi1]
  [<ffffffffc0c7bc2b>] handle_receive_interrupt_nodma_rtail+0x20b/0x2b0 [hfi1]
  [<ffffffffc0c70683>] receive_context_interrupt+0x23/0x40 [hfi1]
  [<ffffffffb7540a94>] __handle_irq_event_percpu+0x44/0x1c0
  [<ffffffffb7540c42>] handle_irq_event_percpu+0x32/0x80
  [<ffffffffb7540ccc>] handle_irq_event+0x3c/0x60
  [<ffffffffb7543a1f>] handle_edge_irq+0x7f/0x150
  [<ffffffffb742d504>] handle_irq+0xe4/0x1a0
  [<ffffffffb7b23f7d>] do_IRQ+0x4d/0xf0
  [<ffffffffb7b16362>] common_interrupt+0x162/0x162
  <EOI>  [<ffffffffb775a326>] ? memcpy+0x6/0x110
  [<ffffffffc109210d>] ? abd_copy_from_buf_off_cb+0x1d/0x30 [zfs]
  [<ffffffffc10920f0>] ? abd_copy_to_buf_off_cb+0x30/0x30 [zfs]
  [<ffffffffc1093257>] abd_iterate_func+0x97/0x120 [zfs]
  [<ffffffffc10934d9>] abd_copy_from_buf_off+0x39/0x60 [zfs]
  [<ffffffffc109b828>] arc_write_ready+0x178/0x300 [zfs]
  [<ffffffffb7b11032>] ? mutex_lock+0x12/0x2f
  [<ffffffffb7b11032>] ? mutex_lock+0x12/0x2f
  [<ffffffffc1164d05>] zio_ready+0x65/0x3d0 [zfs]
  [<ffffffffc04d725e>] ? tsd_get_by_thread+0x2e/0x50 [spl]
  [<ffffffffc04d1318>] ? taskq_member+0x18/0x30 [spl]
  [<ffffffffc115ef22>] zio_execute+0xa2/0x100 [zfs]
  [<ffffffffc04d1d2c>] taskq_thread+0x2ac/0x4f0 [spl]
  [<ffffffffb74cee80>] ? wake_up_state+0x20/0x20
  [<ffffffffc115ee80>] ? zio_taskq_member.isra.7.constprop.10+0x80/0x80 [zfs]
  [<ffffffffc04d1a80>] ? taskq_thread_spawn+0x60/0x60 [spl]
  [<ffffffffb74bae31>] kthread+0xd1/0xe0
  [<ffffffffb74bad60>] ? insert_kthread_work+0x40/0x40
  [<ffffffffb7b1f5f7>] ret_from_fork_nospec_begin+0x21/0x21
  [<ffffffffb74bad60>] ? insert_kthread_work+0x40/0x40

Fix by reading the map entry in the same manner as the hardware so that
the kdeth and verbs contexts match.

Cc: <stable@vger.kernel.org>
Fixes: 5190f052a365 ("IB/hfi1: Allow the driver to initialize QP priv struct")
Reviewed-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/chip.h
drivers/infiniband/hw/hfi1/tid_rdma.c

index 4221a99ee7f462a57de45a5ef3dff5f80e5e3f3c..d5b643a1d9fd2e11365787a93eeace5cd35f3bc8 100644 (file)
@@ -14031,6 +14031,19 @@ static void init_kdeth_qp(struct hfi1_devdata *dd)
                  RCV_BTH_QP_KDETH_QP_SHIFT);
 }
 
+/**
+ * hfi1_get_qp_map
+ * @dd: device data
+ * @idx: index to read
+ */
+u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx)
+{
+       u64 reg = read_csr(dd, RCV_QP_MAP_TABLE + (idx / 8) * 8);
+
+       reg >>= (idx % 8) * 8;
+       return reg;
+}
+
 /**
  * init_qpmap_table
  * @dd - device data
index 4e6c3556ec48668d7f65f0a605e6d3821ac3d604..b76cf81f927f2ad11b373d0ded2d2c1e442677c7 100644 (file)
@@ -1445,6 +1445,7 @@ void clear_all_interrupts(struct hfi1_devdata *dd);
 void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
 void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
 void reset_interrupts(struct hfi1_devdata *dd);
+u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx);
 
 /*
  * Interrupt source table.
index 6fb93032fbefcb7e74e411b42c0bf2cc98688602..aa9c8d3ef87b6b40e1634bf580b5253d1b8ddb30 100644 (file)
@@ -312,9 +312,7 @@ static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
        if (qp->ibqp.qp_num == 0)
                ctxt = 0;
        else
-               ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) %
-                       (dd->n_krcv_queues - 1)) + 1;
-
+               ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift);
        return dd->rcd[ctxt];
 }