]>
Commit | Line | Data |
---|---|---|
a709ecc8 GKH |
1 | From neilb@suse.de Wed Apr 21 15:45:49 2010 |
2 | From: Neil Brown <neilb@suse.de> | |
3 | Date: Wed, 31 Mar 2010 12:07:16 +1100 | |
4 | Subject: md: deal with merge_bvec_fn in component devices better. | |
5 | To: stable@kernel.org | |
6 | Message-ID: <20100331120716.14054609@notabene.brown> | |
7 | ||
8 | From: NeilBrown <neilb@suse.de> | |
9 | ||
10 | commit 627a2d3c29427637f4c5d31ccc7fcbd8d312cd71 upstream. | |
11 | ||
12 | If a component device has a merge_bvec_fn then as we never call it | |
13 | we must ensure we never need to. Currently this is done by setting | |
14 | max_sector to 1 PAGE, however this does not stop a bio being created | |
15 | with several sub-page iovecs that would violate the merge_bvec_fn. | |
16 | ||
17 | So instead set max_phys_segments to 1 and set the segment boundary to the | |
18 | same as a page boundary to ensure there is only ever one single-page | |
19 | segment of IO requested at a time. | |
20 | ||
21 | This can particularly be an issue when 'xen' is used as it is | |
22 | known to submit multiple small buffers in a single bio. | |
23 | ||
24 | Signed-off-by: NeilBrown <neilb@suse.de> | |
25 | Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> | |
26 | ||
27 | --- | |
28 | drivers/md/linear.c | 12 +++++++----- | |
29 | drivers/md/multipath.c | 20 ++++++++++++-------- | |
30 | drivers/md/raid0.c | 13 +++++++------ | |
31 | drivers/md/raid10.c | 28 +++++++++++++++++----------- | |
32 | 4 files changed, 43 insertions(+), 30 deletions(-) | |
33 | ||
34 | --- a/drivers/md/linear.c | |
35 | +++ b/drivers/md/linear.c | |
36 | @@ -172,12 +172,14 @@ static linear_conf_t *linear_conf(mddev_ | |
37 | disk_stack_limits(mddev->gendisk, rdev->bdev, | |
38 | rdev->data_offset << 9); | |
39 | /* as we don't honour merge_bvec_fn, we must never risk | |
40 | - * violating it, so limit ->max_sector to one PAGE, as | |
41 | - * a one page request is never in violation. | |
42 | + * violating it, so limit max_phys_segments to 1 lying within | |
43 | + * a single page. | |
44 | */ | |
45 | - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && | |
46 | - queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) | |
47 | - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | |
48 | + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | |
49 | + blk_queue_max_phys_segments(mddev->queue, 1); | |
50 | + blk_queue_segment_boundary(mddev->queue, | |
51 | + PAGE_CACHE_SIZE - 1); | |
52 | + } | |
53 | ||
54 | conf->array_sectors += rdev->sectors; | |
55 | cnt++; | |
56 | --- a/drivers/md/multipath.c | |
57 | +++ b/drivers/md/multipath.c | |
58 | @@ -301,14 +301,16 @@ static int multipath_add_disk(mddev_t *m | |
59 | rdev->data_offset << 9); | |
60 | ||
61 | /* as we don't honour merge_bvec_fn, we must never risk | |
62 | - * violating it, so limit ->max_sector to one PAGE, as | |
63 | - * a one page request is never in violation. | |
64 | + * violating it, so limit ->max_phys_segments to one, lying | |
65 | + * within a single page. | |
66 | * (Note: it is very unlikely that a device with | |
67 | * merge_bvec_fn will be involved in multipath.) | |
68 | */ | |
69 | - if (q->merge_bvec_fn && | |
70 | - queue_max_sectors(q) > (PAGE_SIZE>>9)) | |
71 | - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | |
72 | + if (q->merge_bvec_fn) { | |
73 | + blk_queue_max_phys_segments(mddev->queue, 1); | |
74 | + blk_queue_segment_boundary(mddev->queue, | |
75 | + PAGE_CACHE_SIZE - 1); | |
76 | + } | |
77 | ||
78 | conf->working_disks++; | |
79 | mddev->degraded--; | |
80 | @@ -476,9 +478,11 @@ static int multipath_run (mddev_t *mddev | |
81 | /* as we don't honour merge_bvec_fn, we must never risk | |
82 | * violating it, not that we ever expect a device with | |
83 | * a merge_bvec_fn to be involved in multipath */ | |
84 | - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && | |
85 | - queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) | |
86 | - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | |
87 | + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | |
88 | + blk_queue_max_phys_segments(mddev->queue, 1); | |
89 | + blk_queue_segment_boundary(mddev->queue, | |
90 | + PAGE_CACHE_SIZE - 1); | |
91 | + } | |
92 | ||
93 | if (!test_bit(Faulty, &rdev->flags)) | |
94 | conf->working_disks++; | |
95 | --- a/drivers/md/raid0.c | |
96 | +++ b/drivers/md/raid0.c | |
97 | @@ -176,14 +176,15 @@ static int create_strip_zones(mddev_t *m | |
98 | disk_stack_limits(mddev->gendisk, rdev1->bdev, | |
99 | rdev1->data_offset << 9); | |
100 | /* as we don't honour merge_bvec_fn, we must never risk | |
101 | - * violating it, so limit ->max_sector to one PAGE, as | |
102 | - * a one page request is never in violation. | |
103 | + * violating it, so limit ->max_phys_segments to 1, lying within | |
104 | + * a single page. | |
105 | */ | |
106 | ||
107 | - if (rdev1->bdev->bd_disk->queue->merge_bvec_fn && | |
108 | - queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) | |
109 | - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | |
110 | - | |
111 | + if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) { | |
112 | + blk_queue_max_phys_segments(mddev->queue, 1); | |
113 | + blk_queue_segment_boundary(mddev->queue, | |
114 | + PAGE_CACHE_SIZE - 1); | |
115 | + } | |
116 | if (!smallest || (rdev1->sectors < smallest->sectors)) | |
117 | smallest = rdev1; | |
118 | cnt++; | |
119 | --- a/drivers/md/raid10.c | |
120 | +++ b/drivers/md/raid10.c | |
121 | @@ -1155,13 +1155,17 @@ static int raid10_add_disk(mddev_t *mdde | |
122 | ||
123 | disk_stack_limits(mddev->gendisk, rdev->bdev, | |
124 | rdev->data_offset << 9); | |
125 | - /* as we don't honour merge_bvec_fn, we must never risk | |
126 | - * violating it, so limit ->max_sector to one PAGE, as | |
127 | - * a one page request is never in violation. | |
128 | + /* as we don't honour merge_bvec_fn, we must | |
129 | + * never risk violating it, so limit | |
130 | + * ->max_phys_segments to one lying with a single | |
131 | + * page, as a one page request is never in | |
132 | + * violation. | |
133 | */ | |
134 | - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && | |
135 | - queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) | |
136 | - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | |
137 | + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | |
138 | + blk_queue_max_phys_segments(mddev->queue, 1); | |
139 | + blk_queue_segment_boundary(mddev->queue, | |
140 | + PAGE_CACHE_SIZE - 1); | |
141 | + } | |
142 | ||
143 | p->head_position = 0; | |
144 | rdev->raid_disk = mirror; | |
145 | @@ -2155,12 +2159,14 @@ static int run(mddev_t *mddev) | |
146 | disk_stack_limits(mddev->gendisk, rdev->bdev, | |
147 | rdev->data_offset << 9); | |
148 | /* as we don't honour merge_bvec_fn, we must never risk | |
149 | - * violating it, so limit ->max_sector to one PAGE, as | |
150 | - * a one page request is never in violation. | |
151 | + * violating it, so limit max_phys_segments to 1 lying | |
152 | + * within a single page. | |
153 | */ | |
154 | - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && | |
155 | - queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) | |
156 | - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | |
157 | + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | |
158 | + blk_queue_max_phys_segments(mddev->queue, 1); | |
159 | + blk_queue_segment_boundary(mddev->queue, | |
160 | + PAGE_CACHE_SIZE - 1); | |
161 | + } | |
162 | ||
163 | disk->head_position = 0; | |
164 | } |