]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
selftests/ublk: add test for async partition scan
authorMing Lei <ming.lei@redhat.com>
Tue, 23 Dec 2025 03:27:41 +0000 (11:27 +0800)
committerJens Axboe <axboe@kernel.dk>
Sun, 28 Dec 2025 16:25:26 +0000 (09:25 -0700)
Add test_generic_15.sh to verify that async partition scan prevents
IO hang when reading partition tables.

The test creates ublk devices with fault_inject target and very large
delay (60s) to simulate blocked partition table reads, then kills the
daemon to verify proper state transitions without hanging:

1. Without recovery support:
   - Create device with fault_inject and 60s delay
   - Kill daemon while partition scan may be blocked
   - Verify device transitions to DEAD state

2. With recovery support (-r 1):
   - Create device with fault_inject, 60s delay, and recovery
   - Kill daemon while partition scan may be blocked
   - Verify device transitions to QUIESCED state

Before the async partition scan fix, killing the daemon during
partition scan would cause deadlock as partition scan held ub->mutex
while waiting for IO. With the async fix, partition scan happens in
a work function and flush_work() ensures proper synchronization.

Add _add_ublk_dev_no_settle() helper function to skip udevadm settle,
which would otherwise hang waiting for partition scan events to
complete when partition table read is delayed.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
tools/testing/selftests/ublk/Makefile
tools/testing/selftests/ublk/test_common.sh
tools/testing/selftests/ublk/test_generic_15.sh [new file with mode: 0755]

index 837977b62417107fcad59bf7ca2ec3c79239e156..eb0e6cfb00ad305a21b4a2db8ae58424cd88978a 100644 (file)
@@ -22,6 +22,7 @@ TEST_PROGS += test_generic_11.sh
 TEST_PROGS += test_generic_12.sh
 TEST_PROGS += test_generic_13.sh
 TEST_PROGS += test_generic_14.sh
+TEST_PROGS += test_generic_15.sh
 
 TEST_PROGS += test_null_01.sh
 TEST_PROGS += test_null_02.sh
index 6f1c042de40e797dc30744e31de521ae08f39c2f..ea9a5f3eb70abd2d842165515ab48cf6bea97bfa 100755 (executable)
@@ -178,8 +178,9 @@ _have_feature()
 _create_ublk_dev() {
        local dev_id;
        local cmd=$1
+       local settle=$2
 
-       shift 1
+       shift 2
 
        if [ ! -c /dev/ublk-control ]; then
                return ${UBLK_SKIP_CODE}
@@ -194,7 +195,10 @@ _create_ublk_dev() {
                echo "fail to add ublk dev $*"
                return 255
        fi
-       udevadm settle
+
+       if [ "$settle" = "yes" ]; then
+               udevadm settle
+       fi
 
        if [[ "$dev_id" =~ ^[0-9]+$ ]]; then
                echo "${dev_id}"
@@ -204,14 +208,18 @@ _create_ublk_dev() {
 }
 
 _add_ublk_dev() {
-       _create_ublk_dev "add" "$@"
+       _create_ublk_dev "add" "yes" "$@"
+}
+
+_add_ublk_dev_no_settle() {
+       _create_ublk_dev "add" "no" "$@"
 }
 
 _recover_ublk_dev() {
        local dev_id
        local state
 
-       dev_id=$(_create_ublk_dev "recover" "$@")
+       dev_id=$(_create_ublk_dev "recover" "yes" "$@")
        for ((j=0;j<20;j++)); do
                state=$(_get_ublk_dev_state "${dev_id}")
                [ "$state" == "LIVE" ] && break
diff --git a/tools/testing/selftests/ublk/test_generic_15.sh b/tools/testing/selftests/ublk/test_generic_15.sh
new file mode 100755 (executable)
index 0000000..7637936
--- /dev/null
@@ -0,0 +1,68 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_15"
+ERR_CODE=0
+
+_test_partition_scan_no_hang()
+{
+       local recovery_flag=$1
+       local expected_state=$2
+       local dev_id
+       local state
+       local daemon_pid
+       local start_time
+       local elapsed
+
+       # Create ublk device with fault_inject target and very large delay
+       # to simulate hang during partition table read
+       # --delay_us 60000000 = 60 seconds delay
+       # Use _add_ublk_dev_no_settle to avoid udevadm settle hang waiting
+       # for partition scan events to complete
+       if [ "$recovery_flag" = "yes" ]; then
+               echo "Testing partition scan with recovery support..."
+               dev_id=$(_add_ublk_dev_no_settle -t fault_inject -q 1 -d 1 --delay_us 60000000 -r 1)
+       else
+               echo "Testing partition scan without recovery..."
+               dev_id=$(_add_ublk_dev_no_settle -t fault_inject -q 1 -d 1 --delay_us 60000000)
+       fi
+
+       _check_add_dev "$TID" $?
+
+       # The add command should return quickly because partition scan is async.
+       # Now sleep briefly to let the async partition scan work start and hit
+       # the delay in the fault_inject handler.
+       sleep 1
+
+       # Kill the ublk daemon while partition scan is potentially blocked
+       # And check state transitions properly
+       start_time=${SECONDS}
+       daemon_pid=$(_get_ublk_daemon_pid "${dev_id}")
+       state=$(__ublk_kill_daemon "${dev_id}" "${expected_state}")
+       elapsed=$((SECONDS - start_time))
+
+       # Verify the device transitioned to expected state
+       if [ "$state" != "${expected_state}" ]; then
+               echo "FAIL: Device state is $state, expected ${expected_state}"
+               ERR_CODE=255
+               ${UBLK_PROG} del -n "${dev_id}" > /dev/null 2>&1
+               return
+       fi
+       echo "PASS: Device transitioned to ${expected_state} in ${elapsed}s without hanging"
+
+       # Clean up the device
+       ${UBLK_PROG} del -n "${dev_id}" > /dev/null 2>&1
+}
+
+_prep_test "partition_scan" "verify async partition scan prevents IO hang"
+
+# Test 1: Without recovery support - should transition to DEAD
+_test_partition_scan_no_hang "no" "DEAD"
+
+# Test 2: With recovery support - should transition to QUIESCED
+_test_partition_scan_no_hang "yes" "QUIESCED"
+
+_cleanup_test "partition_scan"
+_show_result $TID $ERR_CODE