]>
Commit | Line | Data |
---|---|---|
00e5a55c BS |
1 | From b522adcde9c4d3fb7b579cfa9160d8bde7744be8 Mon Sep 17 00:00:00 2001 |
2 | From: Dan Williams <dan.j.williams@intel.com> | |
3 | Date: Tue, 31 Mar 2009 15:00:31 +1100 | |
4 | Subject: [PATCH] md: 'array_size' sysfs attribute | |
5 | ||
6 | Allow userspace to set the size of the array according to the following | |
7 | semantics: | |
8 | ||
9 | 1/ size must be <= to the size returned by mddev->pers->size(mddev, 0, 0) | |
10 | a) If size is set before the array is running, do_md_run will fail | |
11 | if size is greater than the default size | |
12 | b) A reshape attempt that reduces the default size to less than the set | |
13 | array size should be blocked | |
14 | 2/ once userspace sets the size the kernel will not change it | |
15 | 3/ writing 'default' to this attribute returns control of the size to the | |
16 | kernel and reverts to the size reported by the personality | |
17 | ||
18 | Also, convert locations that need to know the default size from directly | |
19 | reading ->array_sectors to <pers>_size. Resync/reshape operations | |
20 | always follow the default size. | |
21 | ||
22 | Finally, fixup other locations that read a number of 1k-blocks from | |
23 | userspace to use strict_blocks_to_sectors() which checks for unsigned | |
24 | long long to sector_t overflow and blocks to sectors overflow. | |
25 | ||
26 | Reviewed-by: Andre Noll <maan@systemlinux.org> | |
27 | Signed-off-by: Dan Williams <dan.j.williams@intel.com> | |
28 | Acked-by: NeilBrown <neilb@suse.de> | |
29 | --- | |
30 | drivers/md/md.c | 101 +++++++++++++++++++++++++++++++++++++++++++++- | |
31 | drivers/md/raid0.c | 2 | |
32 | drivers/md/raid1.c | 6 +- | |
33 | drivers/md/raid10.c | 2 | |
34 | drivers/md/raid5.c | 9 +++- | |
35 | include/linux/raid/md_k.h | 3 + | |
36 | 6 files changed, 116 insertions(+), 7 deletions(-) | |
37 | ||
38 | --- linux-2.6.27-SLE11_BRANCH.orig/drivers/md/md.c | |
39 | +++ linux-2.6.27-SLE11_BRANCH/drivers/md/md.c | |
40 | @@ -300,6 +300,11 @@ static inline int mddev_lock(mddev_t * m | |
41 | return mutex_lock_interruptible(&mddev->reconfig_mutex); | |
42 | } | |
43 | ||
44 | +static inline int mddev_is_locked(mddev_t *mddev) | |
45 | +{ | |
46 | + return mutex_is_locked(&mddev->reconfig_mutex); | |
47 | +} | |
48 | + | |
49 | static inline int mddev_trylock(mddev_t * mddev) | |
50 | { | |
51 | return mutex_trylock(&mddev->reconfig_mutex); | |
52 | @@ -2146,6 +2151,25 @@ static int overlaps(sector_t s1, sector_ | |
53 | return 1; | |
54 | } | |
55 | ||
56 | +static int strict_blocks_to_sectors(const char *buf, sector_t *sectors) | |
57 | +{ | |
58 | + unsigned long long blocks; | |
59 | + sector_t new; | |
60 | + | |
61 | + if (strict_strtoull(buf, 10, &blocks) < 0) | |
62 | + return -EINVAL; | |
63 | + | |
64 | + if (blocks & 1ULL << (8 * sizeof(blocks) - 1)) | |
65 | + return -EINVAL; /* sector conversion overflow */ | |
66 | + | |
67 | + new = blocks * 2; | |
68 | + if (new != blocks * 2) | |
69 | + return -EINVAL; /* unsigned long long to sector_t overflow */ | |
70 | + | |
71 | + *sectors = new; | |
72 | + return 0; | |
73 | +} | |
74 | + | |
75 | static ssize_t | |
76 | rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |
77 | { | |
78 | @@ -3398,6 +3422,57 @@ static struct md_sysfs_entry md_reshape_ | |
79 | __ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show, | |
80 | reshape_position_store); | |
81 | ||
82 | +static ssize_t | |
83 | +array_size_show(mddev_t *mddev, char *page) | |
84 | +{ | |
85 | + if (mddev->external_size) | |
86 | + return sprintf(page, "%llu\n", | |
87 | + (unsigned long long)mddev->array_sectors/2); | |
88 | + else | |
89 | + return sprintf(page, "default\n"); | |
90 | +} | |
91 | + | |
92 | +static ssize_t | |
93 | +array_size_store(mddev_t *mddev, const char *buf, size_t len) | |
94 | +{ | |
95 | + sector_t sectors; | |
96 | + | |
97 | + if (strncmp(buf, "default", 7) == 0) { | |
98 | + if (mddev->pers) | |
99 | + sectors = mddev->pers->size(mddev, 0, 0); | |
100 | + else | |
101 | + sectors = mddev->array_sectors; | |
102 | + | |
103 | + mddev->external_size = 0; | |
104 | + } else { | |
105 | + if (strict_blocks_to_sectors(buf, §ors) < 0) | |
106 | + return -EINVAL; | |
107 | + if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors) | |
108 | + return -EINVAL; | |
109 | + | |
110 | + mddev->external_size = 1; | |
111 | + } | |
112 | + | |
113 | + mddev->array_sectors = sectors; | |
114 | + set_capacity(mddev->gendisk, mddev->array_sectors); | |
115 | + if (mddev->pers) { | |
116 | + struct block_device *bdev = bdget_disk(mddev->gendisk, 0); | |
117 | + | |
118 | + if (bdev) { | |
119 | + mutex_lock(&bdev->bd_inode->i_mutex); | |
120 | + i_size_write(bdev->bd_inode, | |
121 | + (loff_t)mddev->array_sectors << 9); | |
122 | + mutex_unlock(&bdev->bd_inode->i_mutex); | |
123 | + bdput(bdev); | |
124 | + } | |
125 | + } | |
126 | + | |
127 | + return len; | |
128 | +} | |
129 | + | |
130 | +static struct md_sysfs_entry md_array_size = | |
131 | +__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show, | |
132 | + array_size_store); | |
133 | ||
134 | static struct attribute *md_default_attrs[] = { | |
135 | &md_level.attr, | |
136 | @@ -3411,6 +3486,7 @@ static struct attribute *md_default_attr | |
137 | &md_safe_delay.attr, | |
138 | &md_array_state.attr, | |
139 | &md_reshape_position.attr, | |
140 | + &md_array_size.attr, | |
141 | NULL, | |
142 | }; | |
143 | ||
144 | @@ -3722,7 +3798,17 @@ static int do_md_run(mddev_t * mddev) | |
145 | err = mddev->pers->run(mddev); | |
146 | if (err) | |
147 | printk(KERN_ERR "md: pers->run() failed ...\n"); | |
148 | - else if (mddev->pers->sync_request) { | |
149 | + else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) { | |
150 | + WARN_ONCE(!mddev->external_size, "%s: default size too small," | |
151 | + " but 'external_size' not in effect?\n", __func__); | |
152 | + printk(KERN_ERR | |
153 | + "md: invalid array_size %llu > default size %llu\n", | |
154 | + (unsigned long long)mddev->array_sectors / 2, | |
155 | + (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2); | |
156 | + err = -EINVAL; | |
157 | + mddev->pers->stop(mddev); | |
158 | + } | |
159 | + if (err == 0 && mddev->pers->sync_request) { | |
160 | err = bitmap_create(mddev); | |
161 | if (err) { | |
162 | printk(KERN_ERR "%s: failed to create bitmap (%d)\n", | |
163 | @@ -4664,10 +4750,23 @@ static int set_array_info(mddev_t * mdde | |
164 | ||
165 | void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors) | |
166 | { | |
167 | + WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__); | |
168 | + | |
169 | + if (mddev->external_size) | |
170 | + return; | |
171 | + | |
172 | mddev->array_sectors = array_sectors; | |
173 | } | |
174 | EXPORT_SYMBOL(md_set_array_sectors); | |
175 | ||
176 | +void md_set_array_sectors_lock(mddev_t *mddev, sector_t array_sectors) | |
177 | +{ | |
178 | + mddev_lock(mddev); | |
179 | + md_set_array_sectors(mddev, array_sectors); | |
180 | + mddev_unlock(mddev); | |
181 | +} | |
182 | +EXPORT_SYMBOL(md_set_array_sectors_lock); | |
183 | + | |
184 | static int update_size(mddev_t *mddev, sector_t num_sectors) | |
185 | { | |
186 | mdk_rdev_t * rdev; | |
187 | --- linux-2.6.27-SLE11_BRANCH.orig/drivers/md/raid0.c | |
188 | +++ linux-2.6.27-SLE11_BRANCH/drivers/md/raid0.c | |
189 | @@ -314,7 +314,7 @@ static int raid0_run (mddev_t *mddev) | |
190 | printk("raid0 : conf->hash_spacing is %llu blocks.\n", | |
191 | (unsigned long long)conf->hash_spacing); | |
192 | { | |
193 | - sector_t s = mddev->array_sectors / 2; | |
194 | + sector_t s = raid0_size(mddev, 0, 0) / 2; | |
195 | sector_t space = conf->hash_spacing; | |
196 | int round; | |
197 | conf->preshift = 0; | |
198 | --- linux-2.6.27-SLE11_BRANCH.orig/drivers/md/raid1.c | |
199 | +++ linux-2.6.27-SLE11_BRANCH/drivers/md/raid1.c | |
200 | @@ -2116,14 +2116,16 @@ static int raid1_resize(mddev_t *mddev, | |
201 | * worth it. | |
202 | */ | |
203 | md_set_array_sectors(mddev, raid1_size(mddev, sectors, 0)); | |
204 | + if (mddev->array_sectors > raid1_size(mddev, sectors, 0)) | |
205 | + return -EINVAL; | |
206 | set_capacity(mddev->gendisk, mddev->array_sectors); | |
207 | mddev->changed = 1; | |
208 | - if (mddev->array_sectors / 2 > mddev->size && | |
209 | + if (sectors / 2 > mddev->size && | |
210 | mddev->recovery_cp == MaxSector) { | |
211 | mddev->recovery_cp = mddev->size << 1; | |
212 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | |
213 | } | |
214 | - mddev->size = mddev->array_sectors / 2; | |
215 | + mddev->size = sectors / 2; | |
216 | mddev->resync_max_sectors = sectors; | |
217 | return 0; | |
218 | } | |
219 | --- linux-2.6.27-SLE11_BRANCH.orig/drivers/md/raid10.c | |
220 | +++ linux-2.6.27-SLE11_BRANCH/drivers/md/raid10.c | |
221 | @@ -2187,7 +2187,7 @@ static int run(mddev_t *mddev) | |
222 | * Ok, everything is just fine now | |
223 | */ | |
224 | md_set_array_sectors(mddev, raid10_size(mddev, 0, 0)); | |
225 | - mddev->resync_max_sectors = mddev->array_sectors; | |
226 | + mddev->resync_max_sectors = raid10_size(mddev, 0, 0); | |
227 | ||
228 | mddev->queue->unplug_fn = raid10_unplug; | |
229 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; | |
230 | --- linux-2.6.27-SLE11_BRANCH.orig/drivers/md/raid5.c | |
231 | +++ linux-2.6.27-SLE11_BRANCH/drivers/md/raid5.c | |
232 | @@ -3700,6 +3700,8 @@ static int make_request(struct request_q | |
233 | return 0; | |
234 | } | |
235 | ||
236 | +static sector_t raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks); | |
237 | + | |
238 | static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped) | |
239 | { | |
240 | /* reshaping is quite different to recovery/resync so it is | |
241 | @@ -3778,7 +3780,7 @@ static sector_t reshape_request(mddev_t | |
242 | j == sh->qd_idx) | |
243 | continue; | |
244 | s = compute_blocknr(sh, j); | |
245 | - if (s < mddev->array_sectors) { | |
246 | + if (s < raid5_size(mddev, 0, 0)) { | |
247 | skipped = 1; | |
248 | continue; | |
249 | } | |
250 | @@ -4660,6 +4662,9 @@ static int raid5_resize(mddev_t *mddev, | |
251 | sectors &= ~((sector_t)mddev->chunk_size/512 - 1); | |
252 | md_set_array_sectors(mddev, raid5_size(mddev, sectors, | |
253 | mddev->raid_disks)); | |
254 | + if (mddev->array_sectors > | |
255 | + raid5_size(mddev, sectors, mddev->raid_disks)) | |
256 | + return -EINVAL; | |
257 | set_capacity(mddev->gendisk, mddev->array_sectors); | |
258 | mddev->changed = 1; | |
259 | if (sectors/2 > mddev->size && mddev->recovery_cp == MaxSector) { | |
260 | @@ -4798,7 +4803,7 @@ static void end_reshape(raid5_conf_t *co | |
261 | if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { | |
262 | mddev_t *mddev = conf->mddev; | |
263 | ||
264 | - md_set_array_sectors(mddev, raid5_size(mddev, 0, | |
265 | + md_set_array_sectors_lock(mddev, raid5_size(mddev, 0, | |
266 | conf->raid_disks)); | |
267 | set_capacity(mddev->gendisk, mddev->array_sectors); | |
268 | mddev->changed = 1; | |
269 | --- linux-2.6.27-SLE11_BRANCH.orig/include/linux/raid/md_k.h | |
270 | +++ linux-2.6.27-SLE11_BRANCH/include/linux/raid/md_k.h | |
271 | @@ -152,6 +152,8 @@ struct mddev_s | |
272 | int max_disks; | |
273 | sector_t size; /* used size of component devices */ | |
274 | sector_t array_sectors; /* exported array size */ | |
275 | + int external_size; /* size managed | |
276 | + * externally */ | |
277 | __u64 events; | |
278 | ||
279 | char uuid[16]; | |
280 | @@ -394,3 +396,4 @@ static inline void safe_put_page(struct | |
281 | #endif /* CONFIG_BLOCK */ | |
282 | #endif | |
283 | ||
284 | +extern void md_set_array_sectors_lock(mddev_t *mddev, sector_t array_sectors); |