Revisit and reorganize the locking and lock coverage of the
ap->lock spinlock as used in the two sysfs functions
se_bind_store() and se_associate_store().
A kernel run reported a possible deadlock situation, caused by
holding the spinlock (ap->lock) while triggering a uevent.
The fix rearranges the code protected by the spinlock by excluding
the uevent invocation, which does not require protection.
Additionally, the start of the protected region is moved earlier
to cover more lines, ensuring a consistent view of the AP queue
state between reading and updating its struct fields.
=====================================================
WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
7.1.0-
20260601.rc6.git12.
516b5dbd4d4a.300.fc44.s390x+debug #1 Not tainted
-----------------------------------------------------
setupseguest.sh/11034 [HC0[0]:SC0[2]:HE1:SE0] is trying to acquire:
000001c991f498e8 (fs_reclaim){+.+.}-{0:0}, at: __kmalloc_cache_noprof+0x5a/0x6d0
and this task is already holding:
000000c4a1a12378 (&aq->lock){+.-.}-{2:2}, at: se_bind_store+0x96/0x3a0
which would create a new lock dependency:
(&aq->lock){+.-.}-{2:2} -> (fs_reclaim){+.+.}-{0:0}
but this new dependency connects a SOFTIRQ-irq-safe lock:
(&aq->lock){+.-.}-{2:2}
... which became SOFTIRQ-irq-safe at:
__lock_acquire+0x5ae/0x15a0
lock_acquire+0x14c/0x400
_raw_spin_lock_bh+0x58/0xb0
ap_tasklet_fn+0x72/0xd0
tasklet_action_common+0x174/0x1b0
handle_softirqs+0x180/0x5c0
irq_exit_rcu+0x196/0x200
do_ext_irq+0x12a/0x4d0
ext_int_handler+0xc6/0xf0
folio_zero_user+0x1c6/0x240
folio_zero_user+0x182/0x240
vma_alloc_anon_folio_pmd+0xa0/0x1d0
__do_huge_pmd_anonymous_page+0x3a/0x200
__handle_mm_fault+0x56c/0x590
handle_mm_fault+0xa2/0x370
do_exception+0x292/0x590
__do_pgm_check+0x136/0x3e0
pgm_check_handler+0x114/0x160
to a SOFTIRQ-irq-unsafe lock:
(fs_reclaim){+.+.}-{0:0}
... which became SOFTIRQ-irq-unsafe at:
...
__lock_acquire+0x5ae/0x15a0
lock_acquire+0x14c/0x400
__fs_reclaim_acquire+0x44/0x50
fs_reclaim_acquire+0xbe/0x100
fs_reclaim_correct_nesting+0x20/0x70
dotest+0x5e/0x148
locking_selftest+0x2854/0x2a88
start_kernel+0x3b2/0x4f0
startup_continue+0x2e/0x40
other info that might help us debug this:
Possible interrupt unsafe locking scenario:
CPU0 CPU1
---- ----
lock(fs_reclaim);
local_irq_disable();
lock(&aq->lock);
lock(fs_reclaim);
<Interrupt>
lock(&aq->lock);
*** DEADLOCK ***
4 locks held by setupseguest.sh/11034:
#0:
000000c485d01440 (sb_writers#4){.+.+}-{0:0}, at: vfs_write+0x2fc/0x380
#1:
000000c4d2283288 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x12a0x270
#2:
000000c4a1830e48 (kn->active#172){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x1e/0x270
#3:
000000c4a1a12378 (&aq->lock){+.-.}-{2:2}, at: se_bind_store+0x96/0x3a0
the dependencies between SOFTIRQ-irq-safe lock and the holding lock:
-> (&aq->lock){+.-.}-{2:2} {
HARDIRQ-ON-W at:
__lock_acquire+0x5ae/0x15a0
lock_acquire+0x14c/0x400
_raw_spin_lock_bh+0x58/0xb0
ap_queue_init_state+0x2e/0x50
ap_scan_domains+0x5d6/0x620
ap_scan_adapter+0x4c0/0x810
ap_scan_bus+0x70/0x350
ap_scan_bus_wq_callback+0x56/0x80
process_one_work+0x2ba/0x820
worker_thread+0x21a/0x400
kthread+0x164/0x190
__ret_from_fork+0x4c/0x340
ret_from_fork+0xa/0x30
IN-SOFTIRQ-W at:
__lock_acquire+0x5ae/0x15a0
lock_acquire+0x14c/0x400
_raw_spin_lock_bh+0x58/0xb0
ap_tasklet_fn+0x72/0xd0
tasklet_action_common+0x174/0x1b0
handle_softirqs+0x180/0x5c0
irq_exit_rcu+0x196/0x200
do_ext_irq+0x12a/0x4d0
ext_int_handler+0xc6/0xf0
folio_zero_user+0x1c6/0x240
folio_zero_user+0x182/0x240
vma_alloc_anon_folio_pmd+0xa0/0x1d0
__do_huge_pmd_anonymous_page+0x3a/0x200
__handle_mm_fault+0x56c/0x590
handle_mm_fault+0xa2/0x370
do_exception+0x292/0x590
__do_pgm_check+0x136/0x3e0
pgm_check_handler+0x114/0x160
INITIAL USE at:
__lock_acquire+0x5ae/0x15a0
lock_acquire+0x14c/0x400
_raw_spin_lock_bh+0x58/0xb0
ap_queue_init_state+0x2e/0x50
ap_scan_domains+0x5d6/0x620
ap_scan_adapter+0x4c0/0x810
ap_scan_bus+0x70/0x350
ap_scan_bus_wq_callback+0x56/0x80
process_one_work+0x2ba/0x820
worker_thread+0x21a/0x400
kthread+0x164/0x190
__ret_from_fork+0x4c/0x340
ret_from_fork+0xa/0x30
}
... key at: [<
000001c9936e8aa0>] __key.7+0x0/0x10
the dependencies between the lock to be acquired
and SOFTIRQ-irq-unsafe lock:
-> (fs_reclaim){+.+.}-{0:0} {
HARDIRQ-ON-W at:
__lock_acquire+0x5ae/0x15a0
lock_acquire+0x14c/0x400
__fs_reclaim_acquire+0x44/0x50
fs_reclaim_acquire+0xbe/0x100
fs_reclaim_correct_nesting+0x20/0x70
dotest+0x5e/0x148
locking_selftest+0x2854/0x2a88
start_kernel+0x3b2/0x4f0
startup_continue+0x2e/0x40
SOFTIRQ-ON-W at:
__lock_acquire+0x5ae/0x15a0
lock_acquire+0x14c/0x400
__fs_reclaim_acquire+0x44/0x50
fs_reclaim_acquire+0xbe/0x100
fs_reclaim_correct_nesting+0x20/0x70
dotest+0x5e/0x148
locking_selftest+0x2854/0x2a88
start_kernel+0x3b2/0x4f0
startup_continue+0x2e/0x40
INITIAL USE at:
__lock_acquire+0x5ae/0x15a0
lock_acquire+0x14c/0x400
__fs_reclaim_acquire+0x44/0x50
fs_reclaim_acquire+0xbe/0x100
fs_reclaim_correct_nesting+0x20/0x70
dotest+0x5e/0x148
locking_selftest+0x2854/0x2a88
start_kernel+0x3b2/0x4f0
startup_continue+0x2e/0x40
}
... key at: [<
000001c991f498e8>] __fs_reclaim_map+0x0/0x30
... acquired at:
check_prev_add+0x178/0xf40
__lock_acquire+0x12aa/0x15a0
lock_acquire+0x14c/0x400
__fs_reclaim_acquire+0x44/0x50
fs_reclaim_acquire+0xbe/0x100
__kmalloc_cache_noprof+0x5a/0x6d0
kobject_uevent_env+0xd4/0x420
ap_send_se_bind_uevent+0x48/0x70
se_bind_store+0x146/0x3a0
kernfs_fop_write_iter+0x18c/0x270
vfs_write+0x23c/0x380
ksys_write+0x88/0x120
__do_syscall+0x170/0x750
system_call+0x72/0x90
stack backtrace:
CPU: 6 UID: 0 PID: 11034 Comm: setupseguest.sh Not tainted 7.1.0-
20260601.rc6.git2.
516b5dbd4d4a.300.fc44.s390x+debug #1 PREEMPT
Hardware name: IBM 9175 ME1 701 (KVM/Linux)
Call Trace:
[<
000001c98ffa0a7e>] dump_stack_lvl+0xae/0x108
[<
000001c9900a6d7a>] print_bad_irq_dependency+0x47a/0x480
[<
000001c9900a7184>] check_irq_usage+0x404/0x4c0
[<
000001c9900a73b8>] check_prev_add+0x178/0xf40
[<
000001c9900aaf1a>] __lock_acquire+0x12aa/0x15a0
[<
000001c9900ab35c>] lock_acquire+0x14c/0x400
[<
000001c9903be454>] __fs_reclaim_acquire+0x44/0x50
[<
000001c9903be51e>] fs_reclaim_acquire+0xbe/0x100
[<
000001c9903cf4ca>] __kmalloc_cache_noprof+0x5a/0x6d0
[<
000001c9910ca9d4>] kobject_uevent_env+0xd4/0x420
[<
000001c990d84098>] ap_send_se_bind_uevent+0x48/0x70
[<
000001c990d87416>] se_bind_store+0x146/0x3a0
[<
000001c99057da7c>] kernfs_fop_write_iter+0x18c/0x270
[<
000001c99047712c>] vfs_write+0x23c/0x380
[<
000001c990477438>] ksys_write+0x88/0x120
[<
000001c9910f64e0>] __do_syscall+0x170/0x750
[<
000001c99110a412>] system_call+0x72/0x90
INFO: lockdep is turned off.
Fixes: 4179c3984227 ("s390/ap: Implement SE bind and associate uevents")
Reported-by: Ingo Franzki <ifranzki@linux.ibm.com>
Suggested-by: Finn Callies <fcallies@linux.ibm.com>
Reviewed-by: Finn Callies <fcallies@linux.ibm.com>
Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
__ap_flush_queue(aq);
aq->rapq_fbit = 1;
_ap_queue_init_state(aq);
- rc = count;
- goto out;
+ spin_unlock_bh(&aq->lock);
+ return count;
}
+ /* lock this ap to have fetch and update in an atomic way */
+ spin_lock_bh(&aq->lock);
+
/* Bind. Check current SE bind state */
status = ap_test_queue(aq->qid, 1, &hwinfo);
if (status.response_code) {
AP_DBF_WARN("%s RC 0x%02x on tapq(0x%02x.%04x)\n",
__func__, status.response_code,
AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
- return -EIO;
+ rc = -EIO;
+ goto error;
}
/* Update BS state */
- spin_lock_bh(&aq->lock);
aq->se_bstate = hwinfo.bs;
if (hwinfo.bs != AP_BS_Q_AVAIL_FOR_BINDING) {
AP_DBF_WARN("%s bind attempt with bs %d on queue 0x%02x.%04x\n",
__func__, hwinfo.bs,
AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
rc = -EINVAL;
- goto out;
+ goto error;
}
/* Check SM state */
if (aq->sm_state < AP_SM_STATE_IDLE) {
rc = -EBUSY;
- goto out;
+ goto error;
}
/* invoke BAPQ */
__func__, status.response_code,
AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
rc = -EIO;
- goto out;
+ goto error;
}
aq->assoc_idx = ASSOC_IDX_INVALID;
__func__, status.response_code,
AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
rc = -EIO;
- goto out;
+ goto error;
}
aq->se_bstate = hwinfo.bs;
if (!(hwinfo.bs == AP_BS_Q_USABLE ||
__func__, hwinfo.bs,
AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
rc = -EIO;
- goto out;
+ goto error;
}
/* SE bind was successful */
+ spin_unlock_bh(&aq->lock);
+
AP_DBF_INFO("%s bapq(0x%02x.%04x) success\n", __func__,
AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
ap_send_se_bind_uevent(&aq->ap_dev);
- rc = count;
-out:
+ return count;
+
+error:
spin_unlock_bh(&aq->lock);
return rc;
}
if (value >= ASSOC_IDX_INVALID)
return -EINVAL;
+ /* lock this ap to have fetch and update in an atomic way */
+ spin_lock_bh(&aq->lock);
+
/* check current SE bind state */
status = ap_test_queue(aq->qid, 1, &hwinfo);
if (status.response_code) {
AP_DBF_WARN("%s RC 0x%02x on tapq(0x%02x.%04x)\n",
__func__, status.response_code,
AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
- return -EIO;
+ rc = -EIO;
+ goto out;
}
- spin_lock_bh(&aq->lock);
aq->se_bstate = hwinfo.bs;
if (hwinfo.bs != AP_BS_Q_USABLE_NO_SECURE_KEY) {
AP_DBF_WARN("%s association attempt with bs %d on queue 0x%02x.%04x\n",
aq->sm_state = AP_SM_STATE_ASSOC_WAIT;
aq->assoc_idx = value;
ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL));
+ rc = count;
break;
default:
AP_DBF_WARN("%s RC 0x%02x on aapq(0x%02x.%04x)\n",
__func__, status.response_code,
AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
rc = -EIO;
- goto out;
+ break;
}
- rc = count;
-
out:
spin_unlock_bh(&aq->lock);
return rc;