]>
Commit | Line | Data |
---|---|---|
76414229 GKH |
1 | From 95a49603707d982b25d17c5b70e220a05556a2f9 Mon Sep 17 00:00:00 2001 |
2 | From: Ming Lei <tom.leiming@gmail.com> | |
3 | Date: Wed, 22 Mar 2017 10:14:43 +0800 | |
4 | Subject: blk-mq: don't complete un-started request in timeout handler | |
5 | ||
6 | From: Ming Lei <tom.leiming@gmail.com> | |
7 | ||
8 | commit 95a49603707d982b25d17c5b70e220a05556a2f9 upstream. | |
9 | ||
10 | When iterating busy requests in timeout handler, | |
11 | if the STARTED flag of one request isn't set, that means | |
12 | the request is being processed in block layer or driver, and | |
13 | isn't submitted to hardware yet. | |
14 | ||
15 | In current implementation of blk_mq_check_expired(), | |
16 | if the request queue becomes dying, un-started requests are | |
17 | handled as being completed/freed immediately. This way is | |
18 | wrong, and can cause rq corruption or double allocation[1][2], | |
19 | when doing I/O and removing&resetting NVMe device at the sametime. | |
20 | ||
21 | This patch fixes several issues reported by Yi Zhang. | |
22 | ||
23 | [1]. oops log 1 | |
24 | [ 581.789754] ------------[ cut here ]------------ | |
25 | [ 581.789758] kernel BUG at block/blk-mq.c:374! | |
26 | [ 581.789760] invalid opcode: 0000 [#1] SMP | |
27 | [ 581.789761] Modules linked in: vfat fat ipmi_ssif intel_rapl sb_edac | |
28 | edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm nvme | |
29 | irqbypass crct10dif_pclmul nvme_core crc32_pclmul ghash_clmulni_intel | |
30 | intel_cstate ipmi_si mei_me ipmi_devintf intel_uncore sg ipmi_msghandler | |
31 | intel_rapl_perf iTCO_wdt mei iTCO_vendor_support mxm_wmi lpc_ich dcdbas shpchp | |
32 | pcspkr acpi_power_meter wmi nfsd auth_rpcgss nfs_acl lockd dm_multipath grace | |
33 | sunrpc ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper | |
34 | syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm ahci libahci | |
35 | crc32c_intel tg3 libata megaraid_sas i2c_core ptp fjes pps_core dm_mirror | |
36 | dm_region_hash dm_log dm_mod | |
37 | [ 581.789796] CPU: 1 PID: 1617 Comm: kworker/1:1H Not tainted 4.10.0.bz1420297+ #4 | |
38 | [ 581.789797] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.2.5 09/06/2016 | |
39 | [ 581.789804] Workqueue: kblockd blk_mq_timeout_work | |
40 | [ 581.789806] task: ffff8804721c8000 task.stack: ffffc90006ee4000 | |
41 | [ 581.789809] RIP: 0010:blk_mq_end_request+0x58/0x70 | |
42 | [ 581.789810] RSP: 0018:ffffc90006ee7d50 EFLAGS: 00010202 | |
43 | [ 581.789811] RAX: 0000000000000001 RBX: ffff8802e4195340 RCX: ffff88028e2f4b88 | |
44 | [ 581.789812] RDX: 0000000000001000 RSI: 0000000000001000 RDI: 0000000000000000 | |
45 | [ 581.789813] RBP: ffffc90006ee7d60 R08: 0000000000000003 R09: ffff88028e2f4b00 | |
46 | [ 581.789814] R10: 0000000000001000 R11: 0000000000000001 R12: 00000000fffffffb | |
47 | [ 581.789815] R13: ffff88042abe5780 R14: 000000000000002d R15: ffff88046fbdff80 | |
48 | [ 581.789817] FS: 0000000000000000(0000) GS:ffff88047fc00000(0000) knlGS:0000000000000000 | |
49 | [ 581.789818] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 | |
50 | [ 581.789819] CR2: 00007f64f403a008 CR3: 000000014d078000 CR4: 00000000001406e0 | |
51 | [ 581.789820] Call Trace: | |
52 | [ 581.789825] blk_mq_check_expired+0x76/0x80 | |
53 | [ 581.789828] bt_iter+0x45/0x50 | |
54 | [ 581.789830] blk_mq_queue_tag_busy_iter+0xdd/0x1f0 | |
55 | [ 581.789832] ? blk_mq_rq_timed_out+0x70/0x70 | |
56 | [ 581.789833] ? blk_mq_rq_timed_out+0x70/0x70 | |
57 | [ 581.789840] ? __switch_to+0x140/0x450 | |
58 | [ 581.789841] blk_mq_timeout_work+0x88/0x170 | |
59 | [ 581.789845] process_one_work+0x165/0x410 | |
60 | [ 581.789847] worker_thread+0x137/0x4c0 | |
61 | [ 581.789851] kthread+0x101/0x140 | |
62 | [ 581.789853] ? rescuer_thread+0x3b0/0x3b0 | |
63 | [ 581.789855] ? kthread_park+0x90/0x90 | |
64 | [ 581.789860] ret_from_fork+0x2c/0x40 | |
65 | [ 581.789861] Code: 48 85 c0 74 0d 44 89 e6 48 89 df ff d0 5b 41 5c 5d c3 48 | |
66 | 8b bb 70 01 00 00 48 85 ff 75 0f 48 89 df e8 7d f0 ff ff 5b 41 5c 5d c3 <0f> | |
67 | 0b e8 71 f0 ff ff 90 eb e9 0f 1f 40 00 66 2e 0f 1f 84 00 00 | |
68 | [ 581.789882] RIP: blk_mq_end_request+0x58/0x70 RSP: ffffc90006ee7d50 | |
69 | [ 581.789889] ---[ end trace bcaf03d9a14a0a70 ]--- | |
70 | ||
71 | [2]. oops log2 | |
72 | [ 6984.857362] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 | |
73 | [ 6984.857372] IP: nvme_queue_rq+0x6e6/0x8cd [nvme] | |
74 | [ 6984.857373] PGD 0 | |
75 | [ 6984.857374] | |
76 | [ 6984.857376] Oops: 0000 [#1] SMP | |
77 | [ 6984.857379] Modules linked in: ipmi_ssif vfat fat intel_rapl sb_edac | |
78 | edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm | |
79 | irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ipmi_si iTCO_wdt | |
80 | iTCO_vendor_support mxm_wmi ipmi_devintf intel_cstate sg dcdbas intel_uncore | |
81 | mei_me intel_rapl_perf mei pcspkr lpc_ich ipmi_msghandler shpchp | |
82 | acpi_power_meter wmi nfsd auth_rpcgss dm_multipath nfs_acl lockd grace sunrpc | |
83 | ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea | |
84 | sysfillrect crc32c_intel sysimgblt fb_sys_fops ttm nvme drm nvme_core ahci | |
85 | libahci i2c_core tg3 libata ptp megaraid_sas pps_core fjes dm_mirror | |
86 | dm_region_hash dm_log dm_mod | |
87 | [ 6984.857416] CPU: 7 PID: 1635 Comm: kworker/7:1H Not tainted | |
88 | 4.10.0-2.el7.bz1420297.x86_64 #1 | |
89 | [ 6984.857417] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.2.5 09/06/2016 | |
90 | [ 6984.857427] Workqueue: kblockd blk_mq_run_work_fn | |
91 | [ 6984.857429] task: ffff880476e3da00 task.stack: ffffc90002e90000 | |
92 | [ 6984.857432] RIP: 0010:nvme_queue_rq+0x6e6/0x8cd [nvme] | |
93 | [ 6984.857433] RSP: 0018:ffffc90002e93c50 EFLAGS: 00010246 | |
94 | [ 6984.857434] RAX: 0000000000000000 RBX: ffff880275646600 RCX: 0000000000001000 | |
95 | [ 6984.857435] RDX: 0000000000000fff RSI: 00000002fba2a000 RDI: ffff8804734e6950 | |
96 | [ 6984.857436] RBP: ffffc90002e93d30 R08: 0000000000002000 R09: 0000000000001000 | |
97 | [ 6984.857437] R10: 0000000000001000 R11: 0000000000000000 R12: ffff8804741d8000 | |
98 | [ 6984.857438] R13: 0000000000000040 R14: ffff880475649f80 R15: ffff8804734e6780 | |
99 | [ 6984.857439] FS: 0000000000000000(0000) GS:ffff88047fcc0000(0000) knlGS:0000000000000000 | |
100 | [ 6984.857440] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 | |
101 | [ 6984.857442] CR2: 0000000000000010 CR3: 0000000001c09000 CR4: 00000000001406e0 | |
102 | [ 6984.857443] Call Trace: | |
103 | [ 6984.857451] ? mempool_free+0x2b/0x80 | |
104 | [ 6984.857455] ? bio_free+0x4e/0x60 | |
105 | [ 6984.857459] blk_mq_dispatch_rq_list+0xf5/0x230 | |
106 | [ 6984.857462] blk_mq_process_rq_list+0x133/0x170 | |
107 | [ 6984.857465] __blk_mq_run_hw_queue+0x8c/0xa0 | |
108 | [ 6984.857467] blk_mq_run_work_fn+0x12/0x20 | |
109 | [ 6984.857473] process_one_work+0x165/0x410 | |
110 | [ 6984.857475] worker_thread+0x137/0x4c0 | |
111 | [ 6984.857478] kthread+0x101/0x140 | |
112 | [ 6984.857480] ? rescuer_thread+0x3b0/0x3b0 | |
113 | [ 6984.857481] ? kthread_park+0x90/0x90 | |
114 | [ 6984.857489] ret_from_fork+0x2c/0x40 | |
115 | [ 6984.857490] Code: 8b bd 70 ff ff ff 89 95 50 ff ff ff 89 8d 58 ff ff ff 44 | |
116 | 89 95 60 ff ff ff e8 b7 dd 12 e1 8b 95 50 ff ff ff 48 89 85 68 ff ff ff <4c> | |
117 | 8b 48 10 44 8b 58 18 8b 8d 58 ff ff ff 44 8b 95 60 ff ff ff | |
118 | [ 6984.857511] RIP: nvme_queue_rq+0x6e6/0x8cd [nvme] RSP: ffffc90002e93c50 | |
119 | [ 6984.857512] CR2: 0000000000000010 | |
120 | [ 6984.895359] ---[ end trace 2d7ceb528432bf83 ]--- | |
121 | ||
122 | Reported-by: Yi Zhang <yizhan@redhat.com> | |
123 | Tested-by: Yi Zhang <yizhan@redhat.com> | |
124 | Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com> | |
125 | Reviewed-by: Hannes Reinecke <hare@suse.com> | |
126 | Signed-off-by: Ming Lei <tom.leiming@gmail.com> | |
127 | Signed-off-by: Jens Axboe <axboe@fb.com> | |
128 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
129 | ||
130 | --- | |
131 | block/blk-mq.c | 11 +---------- | |
132 | 1 file changed, 1 insertion(+), 10 deletions(-) | |
133 | ||
134 | --- a/block/blk-mq.c | |
135 | +++ b/block/blk-mq.c | |
136 | @@ -678,17 +678,8 @@ static void blk_mq_check_expired(struct | |
137 | { | |
138 | struct blk_mq_timeout_data *data = priv; | |
139 | ||
140 | - if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { | |
141 | - /* | |
142 | - * If a request wasn't started before the queue was | |
143 | - * marked dying, kill it here or it'll go unnoticed. | |
144 | - */ | |
145 | - if (unlikely(blk_queue_dying(rq->q))) { | |
146 | - rq->errors = -EIO; | |
147 | - blk_mq_end_request(rq, rq->errors); | |
148 | - } | |
149 | + if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) | |
150 | return; | |
151 | - } | |
152 | ||
153 | if (time_after_eq(jiffies, rq->deadline)) { | |
154 | if (!blk_mark_rq_complete(rq)) |