]>
Commit | Line | Data |
---|---|---|
9515b5a6 GKH |
1 | From 200612ec33e555a356eebc717630b866ae2b694f Mon Sep 17 00:00:00 2001 |
2 | From: Jeff Moyer <jmoyer@redhat.com> | |
3 | Date: Fri, 8 Aug 2014 11:03:41 -0400 | |
4 | Subject: dm table: propagate QUEUE_FLAG_NO_SG_MERGE | |
5 | ||
6 | From: Jeff Moyer <jmoyer@redhat.com> | |
7 | ||
8 | commit 200612ec33e555a356eebc717630b866ae2b694f upstream. | |
9 | ||
10 | Commit 05f1dd5 ("block: add queue flag for disabling SG merging") | |
11 | introduced a new queue flag: QUEUE_FLAG_NO_SG_MERGE. This gets set by | |
12 | default in blk_mq_init_queue for mq-enabled devices. The effect of | |
13 | the flag is to bypass the SG segment merging. Instead, the | |
14 | bio->bi_vcnt is used as the number of hardware segments. | |
15 | ||
16 | With a device mapper target on top of a device with | |
17 | QUEUE_FLAG_NO_SG_MERGE set, we can end up sending down more segments | |
18 | than a driver is prepared to handle. I ran into this when backporting | |
19 | the virtio_blk mq support. It triggerred this BUG_ON, in | |
20 | virtio_queue_rq: | |
21 | ||
22 | BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); | |
23 | ||
24 | The queue's max is set here: | |
25 | blk_queue_max_segments(q, vblk->sg_elems-2); | |
26 | ||
27 | Basically, what happens is that a bio is built up for the dm device | |
28 | (which does not have the QUEUE_FLAG_NO_SG_MERGE flag set) using | |
29 | bio_add_page. That path will call into __blk_recalc_rq_segments, so | |
30 | what you end up with is bi_phys_segments being much smaller than bi_vcnt | |
31 | (and bi_vcnt grows beyond the maximum sg elements). Then, when the bio | |
32 | is submitted, it gets cloned. When the cloned bio is submitted, it will | |
33 | end up in blk_recount_segments, here: | |
34 | ||
35 | if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags)) | |
36 | bio->bi_phys_segments = bio->bi_vcnt; | |
37 | ||
38 | and now we've set bio->bi_phys_segments to a number that is beyond what | |
39 | was registered as queue_max_segments by the driver. | |
40 | ||
41 | The right way to fix this is to propagate the queue flag up the stack. | |
42 | ||
43 | The rules for propagating the flag are simple: | |
44 | - if the flag is set for any underlying device, it must be set for the | |
45 | upper device | |
46 | - consequently, if the flag is not set for any underlying device, it | |
47 | should not be set for the upper device. | |
48 | ||
49 | Signed-off-by: Jeff Moyer <jmoyer@redhat.com> | |
50 | Signed-off-by: Mike Snitzer <snitzer@redhat.com> | |
51 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
52 | ||
53 | --- | |
54 | drivers/md/dm-table.c | 13 +++++++++++++ | |
55 | 1 file changed, 13 insertions(+) | |
56 | ||
57 | --- a/drivers/md/dm-table.c | |
58 | +++ b/drivers/md/dm-table.c | |
59 | @@ -1386,6 +1386,14 @@ static int device_is_not_random(struct d | |
60 | return q && !blk_queue_add_random(q); | |
61 | } | |
62 | ||
63 | +static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev, | |
64 | + sector_t start, sector_t len, void *data) | |
65 | +{ | |
66 | + struct request_queue *q = bdev_get_queue(dev->bdev); | |
67 | + | |
68 | + return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags); | |
69 | +} | |
70 | + | |
71 | static bool dm_table_all_devices_attribute(struct dm_table *t, | |
72 | iterate_devices_callout_fn func) | |
73 | { | |
74 | @@ -1464,6 +1472,11 @@ void dm_table_set_restrictions(struct dm | |
75 | if (!dm_table_supports_write_same(t)) | |
76 | q->limits.max_write_same_sectors = 0; | |
77 | ||
78 | + if (dm_table_all_devices_attribute(t, queue_supports_sg_merge)) | |
79 | + queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); | |
80 | + else | |
81 | + queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); | |
82 | + | |
83 | dm_table_set_integrity(t); | |
84 | ||
85 | /* |