From: Harald Freudenberger Date: Wed, 3 Jun 2026 13:04:56 +0000 (+0200) Subject: s390/ap: Fix locking issue in SE bind and associate sysfs functions X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7ecac01cf1c094a55c687572bbc874f75e15eb84;p=thirdparty%2Flinux.git s390/ap: Fix locking issue in SE bind and associate sysfs functions Revisit and reorganize the locking and lock coverage of the ap->lock spinlock as used in the two sysfs functions se_bind_store() and se_associate_store(). A kernel run reported a possible deadlock situation, caused by holding the spinlock (ap->lock) while triggering a uevent. The fix rearranges the code protected by the spinlock by excluding the uevent invocation, which does not require protection. Additionally, the start of the protected region is moved earlier to cover more lines, ensuring a consistent view of the AP queue state between reading and updating its struct fields. ===================================================== WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected 7.1.0-20260601.rc6.git12.516b5dbd4d4a.300.fc44.s390x+debug #1 Not tainted ----------------------------------------------------- setupseguest.sh/11034 [HC0[0]:SC0[2]:HE1:SE0] is trying to acquire: 000001c991f498e8 (fs_reclaim){+.+.}-{0:0}, at: __kmalloc_cache_noprof+0x5a/0x6d0 and this task is already holding: 000000c4a1a12378 (&aq->lock){+.-.}-{2:2}, at: se_bind_store+0x96/0x3a0 which would create a new lock dependency: (&aq->lock){+.-.}-{2:2} -> (fs_reclaim){+.+.}-{0:0} but this new dependency connects a SOFTIRQ-irq-safe lock: (&aq->lock){+.-.}-{2:2} ... which became SOFTIRQ-irq-safe at: __lock_acquire+0x5ae/0x15a0 lock_acquire+0x14c/0x400 _raw_spin_lock_bh+0x58/0xb0 ap_tasklet_fn+0x72/0xd0 tasklet_action_common+0x174/0x1b0 handle_softirqs+0x180/0x5c0 irq_exit_rcu+0x196/0x200 do_ext_irq+0x12a/0x4d0 ext_int_handler+0xc6/0xf0 folio_zero_user+0x1c6/0x240 folio_zero_user+0x182/0x240 vma_alloc_anon_folio_pmd+0xa0/0x1d0 __do_huge_pmd_anonymous_page+0x3a/0x200 __handle_mm_fault+0x56c/0x590 handle_mm_fault+0xa2/0x370 do_exception+0x292/0x590 __do_pgm_check+0x136/0x3e0 pgm_check_handler+0x114/0x160 to a SOFTIRQ-irq-unsafe lock: (fs_reclaim){+.+.}-{0:0} ... which became SOFTIRQ-irq-unsafe at: ... __lock_acquire+0x5ae/0x15a0 lock_acquire+0x14c/0x400 __fs_reclaim_acquire+0x44/0x50 fs_reclaim_acquire+0xbe/0x100 fs_reclaim_correct_nesting+0x20/0x70 dotest+0x5e/0x148 locking_selftest+0x2854/0x2a88 start_kernel+0x3b2/0x4f0 startup_continue+0x2e/0x40 other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); local_irq_disable(); lock(&aq->lock); lock(fs_reclaim); lock(&aq->lock); *** DEADLOCK *** 4 locks held by setupseguest.sh/11034: #0: 000000c485d01440 (sb_writers#4){.+.+}-{0:0}, at: vfs_write+0x2fc/0x380 #1: 000000c4d2283288 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x12a0x270 #2: 000000c4a1830e48 (kn->active#172){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x1e/0x270 #3: 000000c4a1a12378 (&aq->lock){+.-.}-{2:2}, at: se_bind_store+0x96/0x3a0 the dependencies between SOFTIRQ-irq-safe lock and the holding lock: -> (&aq->lock){+.-.}-{2:2} { HARDIRQ-ON-W at: __lock_acquire+0x5ae/0x15a0 lock_acquire+0x14c/0x400 _raw_spin_lock_bh+0x58/0xb0 ap_queue_init_state+0x2e/0x50 ap_scan_domains+0x5d6/0x620 ap_scan_adapter+0x4c0/0x810 ap_scan_bus+0x70/0x350 ap_scan_bus_wq_callback+0x56/0x80 process_one_work+0x2ba/0x820 worker_thread+0x21a/0x400 kthread+0x164/0x190 __ret_from_fork+0x4c/0x340 ret_from_fork+0xa/0x30 IN-SOFTIRQ-W at: __lock_acquire+0x5ae/0x15a0 lock_acquire+0x14c/0x400 _raw_spin_lock_bh+0x58/0xb0 ap_tasklet_fn+0x72/0xd0 tasklet_action_common+0x174/0x1b0 handle_softirqs+0x180/0x5c0 irq_exit_rcu+0x196/0x200 do_ext_irq+0x12a/0x4d0 ext_int_handler+0xc6/0xf0 folio_zero_user+0x1c6/0x240 folio_zero_user+0x182/0x240 vma_alloc_anon_folio_pmd+0xa0/0x1d0 __do_huge_pmd_anonymous_page+0x3a/0x200 __handle_mm_fault+0x56c/0x590 handle_mm_fault+0xa2/0x370 do_exception+0x292/0x590 __do_pgm_check+0x136/0x3e0 pgm_check_handler+0x114/0x160 INITIAL USE at: __lock_acquire+0x5ae/0x15a0 lock_acquire+0x14c/0x400 _raw_spin_lock_bh+0x58/0xb0 ap_queue_init_state+0x2e/0x50 ap_scan_domains+0x5d6/0x620 ap_scan_adapter+0x4c0/0x810 ap_scan_bus+0x70/0x350 ap_scan_bus_wq_callback+0x56/0x80 process_one_work+0x2ba/0x820 worker_thread+0x21a/0x400 kthread+0x164/0x190 __ret_from_fork+0x4c/0x340 ret_from_fork+0xa/0x30 } ... key at: [<000001c9936e8aa0>] __key.7+0x0/0x10 the dependencies between the lock to be acquired and SOFTIRQ-irq-unsafe lock: -> (fs_reclaim){+.+.}-{0:0} { HARDIRQ-ON-W at: __lock_acquire+0x5ae/0x15a0 lock_acquire+0x14c/0x400 __fs_reclaim_acquire+0x44/0x50 fs_reclaim_acquire+0xbe/0x100 fs_reclaim_correct_nesting+0x20/0x70 dotest+0x5e/0x148 locking_selftest+0x2854/0x2a88 start_kernel+0x3b2/0x4f0 startup_continue+0x2e/0x40 SOFTIRQ-ON-W at: __lock_acquire+0x5ae/0x15a0 lock_acquire+0x14c/0x400 __fs_reclaim_acquire+0x44/0x50 fs_reclaim_acquire+0xbe/0x100 fs_reclaim_correct_nesting+0x20/0x70 dotest+0x5e/0x148 locking_selftest+0x2854/0x2a88 start_kernel+0x3b2/0x4f0 startup_continue+0x2e/0x40 INITIAL USE at: __lock_acquire+0x5ae/0x15a0 lock_acquire+0x14c/0x400 __fs_reclaim_acquire+0x44/0x50 fs_reclaim_acquire+0xbe/0x100 fs_reclaim_correct_nesting+0x20/0x70 dotest+0x5e/0x148 locking_selftest+0x2854/0x2a88 start_kernel+0x3b2/0x4f0 startup_continue+0x2e/0x40 } ... key at: [<000001c991f498e8>] __fs_reclaim_map+0x0/0x30 ... acquired at: check_prev_add+0x178/0xf40 __lock_acquire+0x12aa/0x15a0 lock_acquire+0x14c/0x400 __fs_reclaim_acquire+0x44/0x50 fs_reclaim_acquire+0xbe/0x100 __kmalloc_cache_noprof+0x5a/0x6d0 kobject_uevent_env+0xd4/0x420 ap_send_se_bind_uevent+0x48/0x70 se_bind_store+0x146/0x3a0 kernfs_fop_write_iter+0x18c/0x270 vfs_write+0x23c/0x380 ksys_write+0x88/0x120 __do_syscall+0x170/0x750 system_call+0x72/0x90 stack backtrace: CPU: 6 UID: 0 PID: 11034 Comm: setupseguest.sh Not tainted 7.1.0-20260601.rc6.git2.516b5dbd4d4a.300.fc44.s390x+debug #1 PREEMPT Hardware name: IBM 9175 ME1 701 (KVM/Linux) Call Trace: [<000001c98ffa0a7e>] dump_stack_lvl+0xae/0x108 [<000001c9900a6d7a>] print_bad_irq_dependency+0x47a/0x480 [<000001c9900a7184>] check_irq_usage+0x404/0x4c0 [<000001c9900a73b8>] check_prev_add+0x178/0xf40 [<000001c9900aaf1a>] __lock_acquire+0x12aa/0x15a0 [<000001c9900ab35c>] lock_acquire+0x14c/0x400 [<000001c9903be454>] __fs_reclaim_acquire+0x44/0x50 [<000001c9903be51e>] fs_reclaim_acquire+0xbe/0x100 [<000001c9903cf4ca>] __kmalloc_cache_noprof+0x5a/0x6d0 [<000001c9910ca9d4>] kobject_uevent_env+0xd4/0x420 [<000001c990d84098>] ap_send_se_bind_uevent+0x48/0x70 [<000001c990d87416>] se_bind_store+0x146/0x3a0 [<000001c99057da7c>] kernfs_fop_write_iter+0x18c/0x270 [<000001c99047712c>] vfs_write+0x23c/0x380 [<000001c990477438>] ksys_write+0x88/0x120 [<000001c9910f64e0>] __do_syscall+0x170/0x750 [<000001c99110a412>] system_call+0x72/0x90 INFO: lockdep is turned off. Fixes: 4179c3984227 ("s390/ap: Implement SE bind and associate uevents") Reported-by: Ingo Franzki Suggested-by: Finn Callies Reviewed-by: Finn Callies Signed-off-by: Harald Freudenberger Signed-off-by: Alexander Gordeev --- diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 232b786d81d14..bc03b2101e397 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -961,34 +961,37 @@ static ssize_t se_bind_store(struct device *dev, __ap_flush_queue(aq); aq->rapq_fbit = 1; _ap_queue_init_state(aq); - rc = count; - goto out; + spin_unlock_bh(&aq->lock); + return count; } + /* lock this ap to have fetch and update in an atomic way */ + spin_lock_bh(&aq->lock); + /* Bind. Check current SE bind state */ status = ap_test_queue(aq->qid, 1, &hwinfo); if (status.response_code) { AP_DBF_WARN("%s RC 0x%02x on tapq(0x%02x.%04x)\n", __func__, status.response_code, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); - return -EIO; + rc = -EIO; + goto error; } /* Update BS state */ - spin_lock_bh(&aq->lock); aq->se_bstate = hwinfo.bs; if (hwinfo.bs != AP_BS_Q_AVAIL_FOR_BINDING) { AP_DBF_WARN("%s bind attempt with bs %d on queue 0x%02x.%04x\n", __func__, hwinfo.bs, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); rc = -EINVAL; - goto out; + goto error; } /* Check SM state */ if (aq->sm_state < AP_SM_STATE_IDLE) { rc = -EBUSY; - goto out; + goto error; } /* invoke BAPQ */ @@ -998,7 +1001,7 @@ static ssize_t se_bind_store(struct device *dev, __func__, status.response_code, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); rc = -EIO; - goto out; + goto error; } aq->assoc_idx = ASSOC_IDX_INVALID; @@ -1009,7 +1012,7 @@ static ssize_t se_bind_store(struct device *dev, __func__, status.response_code, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); rc = -EIO; - goto out; + goto error; } aq->se_bstate = hwinfo.bs; if (!(hwinfo.bs == AP_BS_Q_USABLE || @@ -1018,16 +1021,19 @@ static ssize_t se_bind_store(struct device *dev, __func__, hwinfo.bs, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); rc = -EIO; - goto out; + goto error; } /* SE bind was successful */ + spin_unlock_bh(&aq->lock); + AP_DBF_INFO("%s bapq(0x%02x.%04x) success\n", __func__, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); ap_send_se_bind_uevent(&aq->ap_dev); - rc = count; -out: + return count; + +error: spin_unlock_bh(&aq->lock); return rc; } @@ -1093,15 +1099,18 @@ static ssize_t se_associate_store(struct device *dev, if (value >= ASSOC_IDX_INVALID) return -EINVAL; + /* lock this ap to have fetch and update in an atomic way */ + spin_lock_bh(&aq->lock); + /* check current SE bind state */ status = ap_test_queue(aq->qid, 1, &hwinfo); if (status.response_code) { AP_DBF_WARN("%s RC 0x%02x on tapq(0x%02x.%04x)\n", __func__, status.response_code, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); - return -EIO; + rc = -EIO; + goto out; } - spin_lock_bh(&aq->lock); aq->se_bstate = hwinfo.bs; if (hwinfo.bs != AP_BS_Q_USABLE_NO_SECURE_KEY) { AP_DBF_WARN("%s association attempt with bs %d on queue 0x%02x.%04x\n", @@ -1125,17 +1134,16 @@ static ssize_t se_associate_store(struct device *dev, aq->sm_state = AP_SM_STATE_ASSOC_WAIT; aq->assoc_idx = value; ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL)); + rc = count; break; default: AP_DBF_WARN("%s RC 0x%02x on aapq(0x%02x.%04x)\n", __func__, status.response_code, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); rc = -EIO; - goto out; + break; } - rc = count; - out: spin_unlock_bh(&aq->lock); return rc;