]> git.ipfire.org Git - people/ms/linux.git/commitdiff
IB/hfi1: Fix probe time panic when AIP is enabled with a buggy BIOS
authorMike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
Mon, 29 Mar 2021 13:48:19 +0000 (09:48 -0400)
committerJason Gunthorpe <jgg@nvidia.com>
Wed, 7 Apr 2021 18:31:59 +0000 (15:31 -0300)
A panic can result when AIP is enabled:

  BUG: unable to handle kernel NULL pointer dereference at 000000000000000
  PGD 0 P4D 0
  Oops: 0000 1 SMP PTI
  CPU: 70 PID: 981 Comm: systemd-udevd Tainted: G OE --------- - - 4.18.0-240.el8.x86_64 #1
  Hardware name: Intel Corporation S2600KP/S2600KP, BIOS SE5C610.86B.01.01.0005.101720141054 10/17/2014
  RIP: 0010:__bitmap_and+0x1b/0x70
  RSP: 0018:ffff99aa0845f9f0 EFLAGS: 00010246
  RAX: 0000000000000000 RBX: ffff8d5a6fc18000 RCX: 0000000000000048
  RDX: 0000000000000000 RSI: ffffffffc06336f0 RDI: ffff8d5a8fa67750
  RBP: 0000000000000079 R08: 0000000fffffffff R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000001 R12: ffffffffc06336f0
  R13: 00000000000000a0 R14: ffff8d5a6fc18000 R15: 0000000000000003
  FS: 00007fec137a5980(0000) GS:ffff8d5a9fa80000(0000) knlGS:0000000000000000
  CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000000000000000 CR3: 0000000a04b48002 CR4: 00000000001606e0
  Call Trace:
  hfi1_num_netdev_contexts+0x7c/0x110 [hfi1]
  hfi1_init_dd+0xd7f/0x1a90 [hfi1]
  ? pci_bus_read_config_dword+0x49/0x70
  ? pci_mmcfg_read+0x3e/0xe0
  do_init_one.isra.18+0x336/0x640 [hfi1]
  local_pci_probe+0x41/0x90
  pci_device_probe+0x105/0x1c0
  really_probe+0x212/0x440
  driver_probe_device+0x49/0xc0
  device_driver_attach+0x50/0x60
  __driver_attach+0x61/0x130
  ? device_driver_attach+0x60/0x60
  bus_for_each_dev+0x77/0xc0
  ? klist_add_tail+0x3b/0x70
  bus_add_driver+0x14d/0x1e0
  ? dev_init+0x10b/0x10b [hfi1]
  driver_register+0x6b/0xb0
  ? dev_init+0x10b/0x10b [hfi1]
  hfi1_mod_init+0x1e6/0x20a [hfi1]
  do_one_initcall+0x46/0x1c3
  ? free_unref_page_commit+0x91/0x100
  ? _cond_resched+0x15/0x30
  ? kmem_cache_alloc_trace+0x140/0x1c0
  do_init_module+0x5a/0x220
  load_module+0x14b4/0x17e0
  ? __do_sys_finit_module+0xa8/0x110
  __do_sys_finit_module+0xa8/0x110
  do_syscall_64+0x5b/0x1a0

The issue happens when pcibus_to_node() returns NO_NUMA_NODE.

Fix this issue by moving the initialization of dd->node to hfi1_devdata
allocation and remove the other pcibus_to_node() calls in the probe path
and use dd->node instead.

Affinity logic is adjusted to use a new field dd->affinity_entry as a
guard instead of dd->node.

Fixes: 4730f4a6c6b2 ("IB/hfi1: Activate the dummy netdev")
Link: https://lore.kernel.org/r/1617025700-31865-4-git-send-email-dennis.dalessandro@cornelisnetworks.com
Cc: stable@vger.kernel.org
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/hw/hfi1/affinity.c
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/netdev_rx.c

index 2a91b8d95e12fd58e6ef2f7e8ddf4173bab9bd32..04b1e8f021f642b1044a793e6c1babaca735c11f 100644 (file)
@@ -632,22 +632,11 @@ static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd,
  */
 int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
 {
-       int node = pcibus_to_node(dd->pcidev->bus);
        struct hfi1_affinity_node *entry;
        const struct cpumask *local_mask;
        int curr_cpu, possible, i, ret;
        bool new_entry = false;
 
-       /*
-        * If the BIOS does not have the NUMA node information set, select
-        * NUMA 0 so we get consistent performance.
-        */
-       if (node < 0) {
-               dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
-               node = 0;
-       }
-       dd->node = node;
-
        local_mask = cpumask_of_node(dd->node);
        if (cpumask_first(local_mask) >= nr_cpu_ids)
                local_mask = topology_core_cpumask(0);
@@ -660,7 +649,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
         * create an entry in the global affinity structure and initialize it.
         */
        if (!entry) {
-               entry = node_affinity_allocate(node);
+               entry = node_affinity_allocate(dd->node);
                if (!entry) {
                        dd_dev_err(dd,
                                   "Unable to allocate global affinity node\n");
@@ -751,6 +740,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
        if (new_entry)
                node_affinity_add_tail(entry);
 
+       dd->affinity_entry = entry;
        mutex_unlock(&node_affinity.lock);
 
        return 0;
@@ -766,10 +756,9 @@ void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
 {
        struct hfi1_affinity_node *entry;
 
-       if (dd->node < 0)
-               return;
-
        mutex_lock(&node_affinity.lock);
+       if (!dd->affinity_entry)
+               goto unlock;
        entry = node_affinity_lookup(dd->node);
        if (!entry)
                goto unlock;
@@ -780,8 +769,8 @@ void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
         */
        _dev_comp_vect_cpu_mask_clean_up(dd, entry);
 unlock:
+       dd->affinity_entry = NULL;
        mutex_unlock(&node_affinity.lock);
-       dd->node = NUMA_NO_NODE;
 }
 
 /*
index e09e8244a94c4e2201ce73ea79fd26d04414e349..2a9a040569ebb7b5220b17166e70847b2478123e 100644 (file)
@@ -1409,6 +1409,7 @@ struct hfi1_devdata {
        spinlock_t irq_src_lock;
        int vnic_num_vports;
        struct net_device *dummy_netdev;
+       struct hfi1_affinity_node *affinity_entry;
 
        /* Keeps track of IPoIB RSM rule users */
        atomic_t ipoib_rsm_usr_num;
index cb7ad12888219774925200f22723ff381332aa81..786c6316273f74964e74650229d193a702dfeaba 100644 (file)
@@ -1277,7 +1277,6 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
        dd->pport = (struct hfi1_pportdata *)(dd + 1);
        dd->pcidev = pdev;
        pci_set_drvdata(pdev, dd);
-       dd->node = NUMA_NO_NODE;
 
        ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b,
                        GFP_KERNEL);
@@ -1287,6 +1286,15 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
                goto bail;
        }
        rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
+       /*
+        * If the BIOS does not have the NUMA node information set, select
+        * NUMA 0 so we get consistent performance.
+        */
+       dd->node = pcibus_to_node(pdev->bus);
+       if (dd->node == NUMA_NO_NODE) {
+               dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
+               dd->node = 0;
+       }
 
        /*
         * Initialize all locks for the device. This needs to be as early as
index 1fb6e1a0e4e1d64d6d60adb240c560b2ed2705e8..1bcab992ac266dbb51e1a28740d6b9cd84b4d54f 100644 (file)
@@ -173,8 +173,7 @@ u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
                return 0;
        }
 
-       cpumask_and(node_cpu_mask, cpu_mask,
-                   cpumask_of_node(pcibus_to_node(dd->pcidev->bus)));
+       cpumask_and(node_cpu_mask, cpu_mask, cpumask_of_node(dd->node));
 
        available_cpus = cpumask_weight(node_cpu_mask);