]> git.ipfire.org Git - people/ms/linux.git/commitdiff
IB/hfi1: Fix early init panic
authorMike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
Mon, 29 Nov 2021 19:20:03 +0000 (14:20 -0500)
committerJason Gunthorpe <jgg@nvidia.com>
Tue, 7 Dec 2021 17:22:54 +0000 (13:22 -0400)
The following trace can be observed with an init failure such as firmware
load failures:

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
  PGD 0 P4D 0
  Oops: 0010 [#1] SMP PTI
  CPU: 0 PID: 537 Comm: kworker/0:3 Tainted: G           OE    --------- -  - 4.18.0-240.el8.x86_64 #1
  Workqueue: events work_for_cpu_fn
  RIP: 0010:0x0
  Code: Bad RIP value.
  RSP: 0000:ffffae5f878a3c98 EFLAGS: 00010046
  RAX: 0000000000000000 RBX: ffff95e48e025c00 RCX: 0000000000000000
  RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff95e48e025c00
  RBP: ffff95e4bf3660a4 R08: 0000000000000000 R09: ffffffff86d5e100
  R10: ffff95e49e1de600 R11: 0000000000000001 R12: ffff95e4bf366180
  R13: ffff95e48e025c00 R14: ffff95e4bf366028 R15: ffff95e4bf366000
  FS:  0000000000000000(0000) GS:ffff95e4df200000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: ffffffffffffffd6 CR3: 0000000f86a0a003 CR4: 00000000001606f0
  Call Trace:
   receive_context_interrupt+0x1f/0x40 [hfi1]
   __free_irq+0x201/0x300
   free_irq+0x2e/0x60
   pci_free_irq+0x18/0x30
   msix_free_irq.part.2+0x46/0x80 [hfi1]
   msix_clean_up_interrupts+0x2b/0x70 [hfi1]
   hfi1_init_dd+0x640/0x1a90 [hfi1]
   do_init_one.isra.19+0x34d/0x680 [hfi1]
   local_pci_probe+0x41/0x90
   work_for_cpu_fn+0x16/0x20
   process_one_work+0x1a7/0x360
   worker_thread+0x1cf/0x390
   ? create_worker+0x1a0/0x1a0
   kthread+0x112/0x130
   ? kthread_flush_work_fn+0x10/0x10
   ret_from_fork+0x35/0x40

The free_irq() results in a callback to the registered interrupt handler,
and rcd->do_interrupt is NULL because the receive context data structures
are not fully initialized.

Fix by ensuring that the do_interrupt is always assigned and adding a
guards in the slow path handler to detect and handle a partially
initialized receive context and noop the receive.

Link: https://lore.kernel.org/r/20211129192003.101968.33612.stgit@awfm-01.cornelisnetworks.com
Cc: stable@vger.kernel.org
Fixes: b0ba3c18d6bf ("IB/hfi1: Move normal functions from hfi1_devdata to const array")
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/driver.c
drivers/infiniband/hw/hfi1/init.c

index ec37f4fd8e96bac28efd784d757bc6ff12e1dbb0..f1245c94ae2629d329d68e19349360b37bf7f725 100644 (file)
@@ -8415,6 +8415,8 @@ static void receive_interrupt_common(struct hfi1_ctxtdata *rcd)
  */
 static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
 {
+       if (!rcd->rcvhdrq)
+               return;
        clear_recv_intr(rcd);
        if (check_packet_present(rcd))
                force_recv_intr(rcd);
index 61f341c3005cb972dc56a8c34e64fbbc7cd03131..e2c634af40e990103d9010c5e0eab5cd80faa663 100644 (file)
@@ -1012,6 +1012,8 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
        struct hfi1_packet packet;
        int skip_pkt = 0;
 
+       if (!rcd->rcvhdrq)
+               return RCV_PKT_OK;
        /* Control context will always use the slow path interrupt handler */
        needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1;
 
index 8e1236be46e11a230446757873b4f6428824073d..6422dd6cae6060baaa4d05aa5c360c19bc700f5b 100644 (file)
@@ -113,7 +113,6 @@ static int hfi1_create_kctxt(struct hfi1_devdata *dd,
        rcd->fast_handler = get_dma_rtail_setting(rcd) ?
                                handle_receive_interrupt_dma_rtail :
                                handle_receive_interrupt_nodma_rtail;
-       rcd->slow_handler = handle_receive_interrupt;
 
        hfi1_set_seq_cnt(rcd, 1);
 
@@ -334,6 +333,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
                rcd->numa_id = numa;
                rcd->rcv_array_groups = dd->rcv_entries.ngroups;
                rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
+               rcd->slow_handler = handle_receive_interrupt;
+               rcd->do_interrupt = rcd->slow_handler;
                rcd->msix_intr = CCE_NUM_MSIX_VECTORS;
 
                mutex_init(&rcd->exp_mutex);
@@ -898,8 +899,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
                if (!rcd)
                        continue;
 
-               rcd->do_interrupt = &handle_receive_interrupt;
-
                lastfail = hfi1_create_rcvhdrq(dd, rcd);
                if (!lastfail)
                        lastfail = hfi1_setup_eagerbufs(rcd);