]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
s390/ap: Fix locking issue in SE bind and associate sysfs functions
authorHarald Freudenberger <freude@linux.ibm.com>
Wed, 3 Jun 2026 13:04:56 +0000 (15:04 +0200)
committerAlexander Gordeev <agordeev@linux.ibm.com>
Tue, 9 Jun 2026 17:33:00 +0000 (19:33 +0200)
Revisit and reorganize the locking and lock coverage of the
ap->lock spinlock as used in the two sysfs functions
se_bind_store() and se_associate_store().

A kernel run reported a possible deadlock situation, caused by
holding the spinlock (ap->lock) while triggering a uevent.
The fix rearranges the code protected by the spinlock by excluding
the uevent invocation, which does not require protection.

Additionally, the start of the protected region is moved earlier
to cover more lines, ensuring a consistent view of the AP queue
state between reading and updating its struct fields.

=====================================================
WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
7.1.0-20260601.rc6.git12.516b5dbd4d4a.300.fc44.s390x+debug #1 Not tainted
-----------------------------------------------------
setupseguest.sh/11034 [HC0[0]:SC0[2]:HE1:SE0] is trying to acquire:
000001c991f498e8 (fs_reclaim){+.+.}-{0:0}, at: __kmalloc_cache_noprof+0x5a/0x6d0
and this task is already holding:
000000c4a1a12378 (&aq->lock){+.-.}-{2:2}, at: se_bind_store+0x96/0x3a0
which would create a new lock dependency:
 (&aq->lock){+.-.}-{2:2} -> (fs_reclaim){+.+.}-{0:0}
but this new dependency connects a SOFTIRQ-irq-safe lock:
 (&aq->lock){+.-.}-{2:2}
... which became SOFTIRQ-irq-safe at:
  __lock_acquire+0x5ae/0x15a0
  lock_acquire+0x14c/0x400
  _raw_spin_lock_bh+0x58/0xb0
  ap_tasklet_fn+0x72/0xd0
  tasklet_action_common+0x174/0x1b0
  handle_softirqs+0x180/0x5c0
  irq_exit_rcu+0x196/0x200
  do_ext_irq+0x12a/0x4d0
  ext_int_handler+0xc6/0xf0
  folio_zero_user+0x1c6/0x240
  folio_zero_user+0x182/0x240
  vma_alloc_anon_folio_pmd+0xa0/0x1d0
  __do_huge_pmd_anonymous_page+0x3a/0x200
  __handle_mm_fault+0x56c/0x590
  handle_mm_fault+0xa2/0x370
  do_exception+0x292/0x590
  __do_pgm_check+0x136/0x3e0
  pgm_check_handler+0x114/0x160
to a SOFTIRQ-irq-unsafe lock:
 (fs_reclaim){+.+.}-{0:0}
... which became SOFTIRQ-irq-unsafe at:
...
  __lock_acquire+0x5ae/0x15a0
  lock_acquire+0x14c/0x400
  __fs_reclaim_acquire+0x44/0x50
  fs_reclaim_acquire+0xbe/0x100
  fs_reclaim_correct_nesting+0x20/0x70
  dotest+0x5e/0x148
  locking_selftest+0x2854/0x2a88
  start_kernel+0x3b2/0x4f0
  startup_continue+0x2e/0x40
other info that might help us debug this:
 Possible interrupt unsafe locking scenario:
       CPU0                    CPU1
       ----                    ----
  lock(fs_reclaim);
       local_irq_disable();
       lock(&aq->lock);
       lock(fs_reclaim);
  <Interrupt>
    lock(&aq->lock);
 *** DEADLOCK ***
4 locks held by setupseguest.sh/11034:
 #0: 000000c485d01440 (sb_writers#4){.+.+}-{0:0}, at: vfs_write+0x2fc/0x380
 #1: 000000c4d2283288 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x12a0x270
 #2: 000000c4a1830e48 (kn->active#172){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x1e/0x270
 #3: 000000c4a1a12378 (&aq->lock){+.-.}-{2:2}, at: se_bind_store+0x96/0x3a0
the dependencies between SOFTIRQ-irq-safe lock and the holding lock:
-> (&aq->lock){+.-.}-{2:2} {
   HARDIRQ-ON-W at:
    __lock_acquire+0x5ae/0x15a0
    lock_acquire+0x14c/0x400
    _raw_spin_lock_bh+0x58/0xb0
    ap_queue_init_state+0x2e/0x50
    ap_scan_domains+0x5d6/0x620
    ap_scan_adapter+0x4c0/0x810
    ap_scan_bus+0x70/0x350
    ap_scan_bus_wq_callback+0x56/0x80
    process_one_work+0x2ba/0x820
    worker_thread+0x21a/0x400
    kthread+0x164/0x190
    __ret_from_fork+0x4c/0x340
    ret_from_fork+0xa/0x30
   IN-SOFTIRQ-W at:
    __lock_acquire+0x5ae/0x15a0
    lock_acquire+0x14c/0x400
    _raw_spin_lock_bh+0x58/0xb0
    ap_tasklet_fn+0x72/0xd0
    tasklet_action_common+0x174/0x1b0
    handle_softirqs+0x180/0x5c0
    irq_exit_rcu+0x196/0x200
    do_ext_irq+0x12a/0x4d0
    ext_int_handler+0xc6/0xf0
    folio_zero_user+0x1c6/0x240
    folio_zero_user+0x182/0x240
    vma_alloc_anon_folio_pmd+0xa0/0x1d0
    __do_huge_pmd_anonymous_page+0x3a/0x200
    __handle_mm_fault+0x56c/0x590
    handle_mm_fault+0xa2/0x370
    do_exception+0x292/0x590
    __do_pgm_check+0x136/0x3e0
    pgm_check_handler+0x114/0x160
   INITIAL USE at:
   __lock_acquire+0x5ae/0x15a0
   lock_acquire+0x14c/0x400
   _raw_spin_lock_bh+0x58/0xb0
   ap_queue_init_state+0x2e/0x50
   ap_scan_domains+0x5d6/0x620
   ap_scan_adapter+0x4c0/0x810
   ap_scan_bus+0x70/0x350
   ap_scan_bus_wq_callback+0x56/0x80
   process_one_work+0x2ba/0x820
   worker_thread+0x21a/0x400
   kthread+0x164/0x190
   __ret_from_fork+0x4c/0x340
   ret_from_fork+0xa/0x30
 }
 ... key      at: [<000001c9936e8aa0>] __key.7+0x0/0x10
the dependencies between the lock to be acquired
 and SOFTIRQ-irq-unsafe lock:
-> (fs_reclaim){+.+.}-{0:0} {
   HARDIRQ-ON-W at:
    __lock_acquire+0x5ae/0x15a0
    lock_acquire+0x14c/0x400
    __fs_reclaim_acquire+0x44/0x50
    fs_reclaim_acquire+0xbe/0x100
    fs_reclaim_correct_nesting+0x20/0x70
    dotest+0x5e/0x148
    locking_selftest+0x2854/0x2a88
    start_kernel+0x3b2/0x4f0
    startup_continue+0x2e/0x40
   SOFTIRQ-ON-W at:
    __lock_acquire+0x5ae/0x15a0
    lock_acquire+0x14c/0x400
    __fs_reclaim_acquire+0x44/0x50
    fs_reclaim_acquire+0xbe/0x100
    fs_reclaim_correct_nesting+0x20/0x70
    dotest+0x5e/0x148
    locking_selftest+0x2854/0x2a88
    start_kernel+0x3b2/0x4f0
    startup_continue+0x2e/0x40
   INITIAL USE at:
   __lock_acquire+0x5ae/0x15a0
   lock_acquire+0x14c/0x400
   __fs_reclaim_acquire+0x44/0x50
   fs_reclaim_acquire+0xbe/0x100
   fs_reclaim_correct_nesting+0x20/0x70
   dotest+0x5e/0x148
   locking_selftest+0x2854/0x2a88
   start_kernel+0x3b2/0x4f0
   startup_continue+0x2e/0x40
 }
 ... key      at: [<000001c991f498e8>] __fs_reclaim_map+0x0/0x30
 ... acquired at:
   check_prev_add+0x178/0xf40
   __lock_acquire+0x12aa/0x15a0
   lock_acquire+0x14c/0x400
   __fs_reclaim_acquire+0x44/0x50
   fs_reclaim_acquire+0xbe/0x100
   __kmalloc_cache_noprof+0x5a/0x6d0
   kobject_uevent_env+0xd4/0x420
   ap_send_se_bind_uevent+0x48/0x70
   se_bind_store+0x146/0x3a0
   kernfs_fop_write_iter+0x18c/0x270
   vfs_write+0x23c/0x380
   ksys_write+0x88/0x120
   __do_syscall+0x170/0x750
   system_call+0x72/0x90
stack backtrace:
CPU: 6 UID: 0 PID: 11034 Comm: setupseguest.sh Not tainted 7.1.0-20260601.rc6.git2.516b5dbd4d4a.300.fc44.s390x+debug #1 PREEMPT
Hardware name: IBM 9175 ME1 701 (KVM/Linux)
Call Trace:
 [<000001c98ffa0a7e>] dump_stack_lvl+0xae/0x108
 [<000001c9900a6d7a>] print_bad_irq_dependency+0x47a/0x480
 [<000001c9900a7184>] check_irq_usage+0x404/0x4c0
 [<000001c9900a73b8>] check_prev_add+0x178/0xf40
 [<000001c9900aaf1a>] __lock_acquire+0x12aa/0x15a0
 [<000001c9900ab35c>] lock_acquire+0x14c/0x400
 [<000001c9903be454>] __fs_reclaim_acquire+0x44/0x50
 [<000001c9903be51e>] fs_reclaim_acquire+0xbe/0x100
 [<000001c9903cf4ca>] __kmalloc_cache_noprof+0x5a/0x6d0
 [<000001c9910ca9d4>] kobject_uevent_env+0xd4/0x420
 [<000001c990d84098>] ap_send_se_bind_uevent+0x48/0x70
 [<000001c990d87416>] se_bind_store+0x146/0x3a0
 [<000001c99057da7c>] kernfs_fop_write_iter+0x18c/0x270
 [<000001c99047712c>] vfs_write+0x23c/0x380
 [<000001c990477438>] ksys_write+0x88/0x120
 [<000001c9910f64e0>] __do_syscall+0x170/0x750
 [<000001c99110a412>] system_call+0x72/0x90
INFO: lockdep is turned off.

Fixes: 4179c3984227 ("s390/ap: Implement SE bind and associate uevents")
Reported-by: Ingo Franzki <ifranzki@linux.ibm.com>
Suggested-by: Finn Callies <fcallies@linux.ibm.com>
Reviewed-by: Finn Callies <fcallies@linux.ibm.com>
Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
drivers/s390/crypto/ap_queue.c

index 232b786d81d140d051d5c16336cc396858887f95..bc03b2101e397bc0fcea90eb9457ce3135859e12 100644 (file)
@@ -961,34 +961,37 @@ static ssize_t se_bind_store(struct device *dev,
                __ap_flush_queue(aq);
                aq->rapq_fbit = 1;
                _ap_queue_init_state(aq);
-               rc = count;
-               goto out;
+               spin_unlock_bh(&aq->lock);
+               return count;
        }
 
+       /* lock this ap to have fetch and update in an atomic way */
+       spin_lock_bh(&aq->lock);
+
        /* Bind. Check current SE bind state */
        status = ap_test_queue(aq->qid, 1, &hwinfo);
        if (status.response_code) {
                AP_DBF_WARN("%s RC 0x%02x on tapq(0x%02x.%04x)\n",
                            __func__, status.response_code,
                            AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
-               return -EIO;
+               rc = -EIO;
+               goto error;
        }
 
        /* Update BS state */
-       spin_lock_bh(&aq->lock);
        aq->se_bstate = hwinfo.bs;
        if (hwinfo.bs != AP_BS_Q_AVAIL_FOR_BINDING) {
                AP_DBF_WARN("%s bind attempt with bs %d on queue 0x%02x.%04x\n",
                            __func__, hwinfo.bs,
                            AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
                rc = -EINVAL;
-               goto out;
+               goto error;
        }
 
        /* Check SM state */
        if (aq->sm_state < AP_SM_STATE_IDLE) {
                rc = -EBUSY;
-               goto out;
+               goto error;
        }
 
        /* invoke BAPQ */
@@ -998,7 +1001,7 @@ static ssize_t se_bind_store(struct device *dev,
                            __func__, status.response_code,
                            AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
                rc = -EIO;
-               goto out;
+               goto error;
        }
        aq->assoc_idx = ASSOC_IDX_INVALID;
 
@@ -1009,7 +1012,7 @@ static ssize_t se_bind_store(struct device *dev,
                            __func__, status.response_code,
                            AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
                rc = -EIO;
-               goto out;
+               goto error;
        }
        aq->se_bstate = hwinfo.bs;
        if (!(hwinfo.bs == AP_BS_Q_USABLE ||
@@ -1018,16 +1021,19 @@ static ssize_t se_bind_store(struct device *dev,
                            __func__, hwinfo.bs,
                            AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
                rc = -EIO;
-               goto out;
+               goto error;
        }
 
        /* SE bind was successful */
+       spin_unlock_bh(&aq->lock);
+
        AP_DBF_INFO("%s bapq(0x%02x.%04x) success\n", __func__,
                    AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
        ap_send_se_bind_uevent(&aq->ap_dev);
-       rc = count;
 
-out:
+       return count;
+
+error:
        spin_unlock_bh(&aq->lock);
        return rc;
 }
@@ -1093,15 +1099,18 @@ static ssize_t se_associate_store(struct device *dev,
        if (value >= ASSOC_IDX_INVALID)
                return -EINVAL;
 
+       /* lock this ap to have fetch and update in an atomic way */
+       spin_lock_bh(&aq->lock);
+
        /* check current SE bind state */
        status = ap_test_queue(aq->qid, 1, &hwinfo);
        if (status.response_code) {
                AP_DBF_WARN("%s RC 0x%02x on tapq(0x%02x.%04x)\n",
                            __func__, status.response_code,
                            AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
-               return -EIO;
+               rc = -EIO;
+               goto out;
        }
-       spin_lock_bh(&aq->lock);
        aq->se_bstate = hwinfo.bs;
        if (hwinfo.bs != AP_BS_Q_USABLE_NO_SECURE_KEY) {
                AP_DBF_WARN("%s association attempt with bs %d on queue 0x%02x.%04x\n",
@@ -1125,17 +1134,16 @@ static ssize_t se_associate_store(struct device *dev,
                aq->sm_state = AP_SM_STATE_ASSOC_WAIT;
                aq->assoc_idx = value;
                ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL));
+               rc = count;
                break;
        default:
                AP_DBF_WARN("%s RC 0x%02x on aapq(0x%02x.%04x)\n",
                            __func__, status.response_code,
                            AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
                rc = -EIO;
-               goto out;
+               break;
        }
 
-       rc = count;
-
 out:
        spin_unlock_bh(&aq->lock);
        return rc;