]>
Commit | Line | Data |
---|---|---|
72fc6108 GKH |
1 | From foo@baz Mon Apr 9 10:16:32 CEST 2018 |
2 | From: Ming Lei <ming.lei@redhat.com> | |
3 | Date: Sat, 6 Jan 2018 16:27:39 +0800 | |
4 | Subject: blk-mq: avoid to map CPU into stale hw queue | |
5 | ||
6 | From: Ming Lei <ming.lei@redhat.com> | |
7 | ||
8 | ||
9 | [ Upstream commit 7d4901a90d02500c8011472a060f9b2e60e6e605 ] | |
10 | ||
11 | blk_mq_pci_map_queues() may not map one CPU into any hw queue, but its | |
12 | previous map isn't cleared yet, and may point to one stale hw queue | |
13 | index. | |
14 | ||
15 | This patch fixes the following issue by clearing the mapping table before | |
16 | setting it up in blk_mq_pci_map_queues(). | |
17 | ||
18 | This patches fixes this following issue reported by Zhang Yi: | |
19 | ||
20 | [ 101.202734] BUG: unable to handle kernel NULL pointer dereference at 0000000094d3013f | |
21 | [ 101.211487] IP: blk_mq_map_swqueue+0xbc/0x200 | |
22 | [ 101.216346] PGD 0 P4D 0 | |
23 | [ 101.219171] Oops: 0000 [#1] SMP | |
24 | [ 101.222674] Modules linked in: sunrpc ipmi_ssif vfat fat intel_rapl sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel intel_cstate intel_uncore mxm_wmi intel_rapl_perf iTCO_wdt ipmi_si ipmi_devintf pcspkr iTCO_vendor_support sg dcdbas ipmi_msghandler wmi mei_me lpc_ich shpchp mei acpi_power_meter dm_multipath ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm ahci libahci crc32c_intel libata tg3 nvme nvme_core megaraid_sas ptp i2c_core pps_core dm_mirror dm_region_hash dm_log dm_mod | |
25 | [ 101.284881] CPU: 0 PID: 504 Comm: kworker/u25:5 Not tainted 4.15.0-rc2 #1 | |
26 | [ 101.292455] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.5.5 08/16/2017 | |
27 | [ 101.301001] Workqueue: nvme-wq nvme_reset_work [nvme] | |
28 | [ 101.306636] task: 00000000f2c53190 task.stack: 000000002da874f9 | |
29 | [ 101.313241] RIP: 0010:blk_mq_map_swqueue+0xbc/0x200 | |
30 | [ 101.318681] RSP: 0018:ffffc9000234fd70 EFLAGS: 00010282 | |
31 | [ 101.324511] RAX: ffff88047ffc9480 RBX: ffff88047e130850 RCX: 0000000000000000 | |
32 | [ 101.332471] RDX: ffffe8ffffd40580 RSI: ffff88047e509b40 RDI: ffff88046f37a008 | |
33 | [ 101.340432] RBP: 000000000000000b R08: ffff88046f37a008 R09: 0000000011f94280 | |
34 | [ 101.348392] R10: ffff88047ffd4d00 R11: 0000000000000000 R12: ffff88046f37a008 | |
35 | [ 101.356353] R13: ffff88047e130f38 R14: 000000000000000b R15: ffff88046f37a558 | |
36 | [ 101.364314] FS: 0000000000000000(0000) GS:ffff880277c00000(0000) knlGS:0000000000000000 | |
37 | [ 101.373342] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 | |
38 | [ 101.379753] CR2: 0000000000000098 CR3: 000000047f409004 CR4: 00000000001606f0 | |
39 | [ 101.387714] Call Trace: | |
40 | [ 101.390445] blk_mq_update_nr_hw_queues+0xbf/0x130 | |
41 | [ 101.395791] nvme_reset_work+0x6f4/0xc06 [nvme] | |
42 | [ 101.400848] ? pick_next_task_fair+0x290/0x5f0 | |
43 | [ 101.405807] ? __switch_to+0x1f5/0x430 | |
44 | [ 101.409988] ? put_prev_entity+0x2f/0xd0 | |
45 | [ 101.414365] process_one_work+0x141/0x340 | |
46 | [ 101.418836] worker_thread+0x47/0x3e0 | |
47 | [ 101.422921] kthread+0xf5/0x130 | |
48 | [ 101.426424] ? rescuer_thread+0x380/0x380 | |
49 | [ 101.430896] ? kthread_associate_blkcg+0x90/0x90 | |
50 | [ 101.436048] ret_from_fork+0x1f/0x30 | |
51 | [ 101.440034] Code: 48 83 3c ca 00 0f 84 2b 01 00 00 48 63 cd 48 8b 93 10 01 00 00 8b 0c 88 48 8b 83 20 01 00 00 4a 03 14 f5 60 04 af 81 48 8b 0c c8 <48> 8b 81 98 00 00 00 f0 4c 0f ab 30 8b 81 f8 00 00 00 89 42 44 | |
52 | [ 101.461116] RIP: blk_mq_map_swqueue+0xbc/0x200 RSP: ffffc9000234fd70 | |
53 | [ 101.468205] CR2: 0000000000000098 | |
54 | [ 101.471907] ---[ end trace 5fe710f98228a3ca ]--- | |
55 | [ 101.482489] Kernel panic - not syncing: Fatal exception | |
56 | [ 101.488505] Kernel Offset: disabled | |
57 | [ 101.497752] ---[ end Kernel panic - not syncing: Fatal exception | |
58 | ||
59 | Reviewed-by: Christoph Hellwig <hch@lst.de> | |
60 | Suggested-by: Christoph Hellwig <hch@lst.de> | |
61 | Reported-by: Yi Zhang <yi.zhang@redhat.com> | |
62 | Tested-by: Yi Zhang <yi.zhang@redhat.com> | |
63 | Signed-off-by: Ming Lei <ming.lei@redhat.com> | |
64 | Signed-off-by: Jens Axboe <axboe@kernel.dk> | |
65 | Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> | |
66 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
67 | --- | |
68 | block/blk-mq.c | 22 ++++++++++++++++++++-- | |
69 | 1 file changed, 20 insertions(+), 2 deletions(-) | |
70 | ||
71 | --- a/block/blk-mq.c | |
72 | +++ b/block/blk-mq.c | |
73 | @@ -2603,9 +2603,27 @@ static int blk_mq_alloc_rq_maps(struct b | |
74 | ||
75 | static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) | |
76 | { | |
77 | - if (set->ops->map_queues) | |
78 | + if (set->ops->map_queues) { | |
79 | + int cpu; | |
80 | + /* | |
81 | + * transport .map_queues is usually done in the following | |
82 | + * way: | |
83 | + * | |
84 | + * for (queue = 0; queue < set->nr_hw_queues; queue++) { | |
85 | + * mask = get_cpu_mask(queue) | |
86 | + * for_each_cpu(cpu, mask) | |
87 | + * set->mq_map[cpu] = queue; | |
88 | + * } | |
89 | + * | |
90 | + * When we need to remap, the table has to be cleared for | |
91 | + * killing stale mapping since one CPU may not be mapped | |
92 | + * to any hw queue. | |
93 | + */ | |
94 | + for_each_possible_cpu(cpu) | |
95 | + set->mq_map[cpu] = 0; | |
96 | + | |
97 | return set->ops->map_queues(set); | |
98 | - else | |
99 | + } else | |
100 | return blk_mq_map_queues(set); | |
101 | } | |
102 |