]>
Commit | Line | Data |
---|---|---|
e5329c37 NB |
1 | /* |
2 | * mdadm - manage Linux "md" devices aka RAID arrays. | |
3 | * | |
4f589ad0 | 4 | * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de> |
e5329c37 NB |
5 | * |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
20 | * | |
21 | * Author: Neil Brown | |
22 | * Email: <neilb@cse.unsw.edu.au> | |
23 | * Paper: Neil Brown | |
24 | * School of Computer Science and Engineering | |
25 | * The University of New South Wales | |
26 | * Sydney, 2052 | |
27 | * Australia | |
28 | */ | |
29 | #include "mdadm.h" | |
30 | #include "dlink.h" | |
31 | ||
32 | #if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN) | |
33 | #error no endian defined | |
34 | #endif | |
35 | #include "md_u.h" | |
36 | #include "md_p.h" | |
37 | ||
38 | int Grow_Add_device(char *devname, int fd, char *newdev) | |
39 | { | |
40 | /* Add a device to an active array. | |
41 | * Currently, just extend a linear array. | |
42 | * This requires writing a new superblock on the | |
43 | * new device, calling the kernel to add the device, | |
44 | * and if that succeeds, update the superblock on | |
45 | * all other devices. | |
46 | * This means that we need to *find* all other devices. | |
47 | */ | |
4b1ac34b NB |
48 | struct mdinfo info; |
49 | ||
e5329c37 NB |
50 | struct stat stb; |
51 | int nfd, fd2; | |
52 | int d, nd; | |
82d9eba6 | 53 | struct supertype *st = NULL; |
aba69144 | 54 | |
e5329c37 | 55 | |
4b1ac34b | 56 | if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) { |
e5329c37 NB |
57 | fprintf(stderr, Name ": cannot get array info for %s\n", devname); |
58 | return 1; | |
59 | } | |
60 | ||
1686dc25 | 61 | st = super_by_fd(fd); |
82d9eba6 | 62 | if (!st) { |
f9ce90ba NB |
63 | fprintf(stderr, Name ": cannot handle arrays with superblock version %d\n", info.array.major_version); |
64 | return 1; | |
65 | } | |
66 | ||
4b1ac34b | 67 | if (info.array.level != -1) { |
e5329c37 NB |
68 | fprintf(stderr, Name ": can only add devices to linear arrays\n"); |
69 | return 1; | |
70 | } | |
71 | ||
6416d527 | 72 | nfd = open(newdev, O_RDWR|O_EXCL|O_DIRECT); |
e5329c37 NB |
73 | if (nfd < 0) { |
74 | fprintf(stderr, Name ": cannot open %s\n", newdev); | |
75 | return 1; | |
76 | } | |
77 | fstat(nfd, &stb); | |
78 | if ((stb.st_mode & S_IFMT) != S_IFBLK) { | |
79 | fprintf(stderr, Name ": %s is not a block device!\n", newdev); | |
80 | close(nfd); | |
81 | return 1; | |
82 | } | |
83 | /* now check out all the devices and make sure we can read the superblock */ | |
4b1ac34b | 84 | for (d=0 ; d < info.array.raid_disks ; d++) { |
e5329c37 NB |
85 | mdu_disk_info_t disk; |
86 | char *dv; | |
87 | ||
88 | disk.number = d; | |
89 | if (ioctl(fd, GET_DISK_INFO, &disk) < 0) { | |
90 | fprintf(stderr, Name ": cannot get device detail for device %d\n", | |
91 | d); | |
92 | return 1; | |
93 | } | |
16c6fa80 | 94 | dv = map_dev(disk.major, disk.minor, 1); |
e5329c37 NB |
95 | if (!dv) { |
96 | fprintf(stderr, Name ": cannot find device file for device %d\n", | |
97 | d); | |
98 | return 1; | |
99 | } | |
16c6fa80 | 100 | fd2 = dev_open(dv, O_RDWR); |
e5329c37 NB |
101 | if (!fd2) { |
102 | fprintf(stderr, Name ": cannot open device file %s\n", dv); | |
103 | return 1; | |
104 | } | |
3da92f27 NB |
105 | st->ss->free_super(st); |
106 | ||
107 | if (st->ss->load_super(st, fd2, NULL)) { | |
e5329c37 NB |
108 | fprintf(stderr, Name ": cannot find super block on %s\n", dv); |
109 | close(fd2); | |
110 | return 1; | |
111 | } | |
112 | close(fd2); | |
113 | } | |
114 | /* Ok, looks good. Lets update the superblock and write it out to | |
115 | * newdev. | |
116 | */ | |
aba69144 | 117 | |
4b1ac34b NB |
118 | info.disk.number = d; |
119 | info.disk.major = major(stb.st_rdev); | |
120 | info.disk.minor = minor(stb.st_rdev); | |
121 | info.disk.raid_disk = d; | |
122 | info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE); | |
3da92f27 | 123 | st->ss->update_super(st, &info, "linear-grow-new", newdev, |
f752781f | 124 | 0, 0, NULL); |
e5329c37 | 125 | |
3da92f27 | 126 | if (st->ss->store_super(st, nfd)) { |
f752781f NB |
127 | fprintf(stderr, Name ": Cannot store new superblock on %s\n", |
128 | newdev); | |
e5329c37 NB |
129 | close(nfd); |
130 | return 1; | |
131 | } | |
e5329c37 | 132 | close(nfd); |
4b1ac34b NB |
133 | |
134 | if (ioctl(fd, ADD_NEW_DISK, &info.disk) != 0) { | |
e5329c37 NB |
135 | fprintf(stderr, Name ": Cannot add new disk to this array\n"); |
136 | return 1; | |
137 | } | |
138 | /* Well, that seems to have worked. | |
139 | * Now go through and update all superblocks | |
140 | */ | |
141 | ||
4b1ac34b | 142 | if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) { |
e5329c37 NB |
143 | fprintf(stderr, Name ": cannot get array info for %s\n", devname); |
144 | return 1; | |
145 | } | |
146 | ||
147 | nd = d; | |
4b1ac34b | 148 | for (d=0 ; d < info.array.raid_disks ; d++) { |
e5329c37 NB |
149 | mdu_disk_info_t disk; |
150 | char *dv; | |
151 | ||
152 | disk.number = d; | |
153 | if (ioctl(fd, GET_DISK_INFO, &disk) < 0) { | |
154 | fprintf(stderr, Name ": cannot get device detail for device %d\n", | |
155 | d); | |
156 | return 1; | |
157 | } | |
16c6fa80 | 158 | dv = map_dev(disk.major, disk.minor, 1); |
e5329c37 NB |
159 | if (!dv) { |
160 | fprintf(stderr, Name ": cannot find device file for device %d\n", | |
161 | d); | |
162 | return 1; | |
163 | } | |
16c6fa80 | 164 | fd2 = dev_open(dv, O_RDWR); |
e5329c37 NB |
165 | if (fd2 < 0) { |
166 | fprintf(stderr, Name ": cannot open device file %s\n", dv); | |
167 | return 1; | |
168 | } | |
3da92f27 | 169 | if (st->ss->load_super(st, fd2, NULL)) { |
e5329c37 NB |
170 | fprintf(stderr, Name ": cannot find super block on %s\n", dv); |
171 | close(fd); | |
172 | return 1; | |
173 | } | |
4b1ac34b NB |
174 | info.array.raid_disks = nd+1; |
175 | info.array.nr_disks = nd+1; | |
176 | info.array.active_disks = nd+1; | |
177 | info.array.working_disks = nd+1; | |
f752781f | 178 | |
3da92f27 | 179 | st->ss->update_super(st, &info, "linear-grow-update", dv, |
f752781f | 180 | 0, 0, NULL); |
aba69144 | 181 | |
3da92f27 | 182 | if (st->ss->store_super(st, fd2)) { |
e5329c37 NB |
183 | fprintf(stderr, Name ": Cannot store new superblock on %s\n", dv); |
184 | close(fd2); | |
185 | return 1; | |
186 | } | |
187 | close(fd2); | |
188 | } | |
189 | ||
190 | return 0; | |
191 | } | |
f5e166fe | 192 | |
8fac0577 | 193 | int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force) |
f5e166fe NB |
194 | { |
195 | /* | |
196 | * First check that array doesn't have a bitmap | |
197 | * Then create the bitmap | |
198 | * Then add it | |
199 | * | |
200 | * For internal bitmaps, we need to check the version, | |
201 | * find all the active devices, and write the bitmap block | |
202 | * to all devices | |
203 | */ | |
204 | mdu_bitmap_file_t bmf; | |
205 | mdu_array_info_t array; | |
206 | struct supertype *st; | |
dcec9ee5 NB |
207 | int major = BITMAP_MAJOR_HI; |
208 | int vers = md_get_version(fd); | |
8fac0577 | 209 | unsigned long long bitmapsize, array_size; |
dcec9ee5 NB |
210 | |
211 | if (vers < 9003) { | |
212 | major = BITMAP_MAJOR_HOSTENDIAN; | |
213 | #ifdef __BIG_ENDIAN | |
214 | fprintf(stderr, Name ": Warning - bitmaps created on this kernel are not portable\n" | |
215 | " between different architectured. Consider upgrading the Linux kernel.\n"); | |
216 | #endif | |
217 | } | |
f5e166fe NB |
218 | |
219 | if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) { | |
353632d9 | 220 | if (errno == ENOMEM) |
f5e166fe NB |
221 | fprintf(stderr, Name ": Memory allocation failure.\n"); |
222 | else | |
223 | fprintf(stderr, Name ": bitmaps not supported by this kernel.\n"); | |
224 | return 1; | |
225 | } | |
226 | if (bmf.pathname[0]) { | |
fe80f49b NB |
227 | if (strcmp(file,"none")==0) { |
228 | if (ioctl(fd, SET_BITMAP_FILE, -1)!= 0) { | |
229 | fprintf(stderr, Name ": failed to remove bitmap %s\n", | |
230 | bmf.pathname); | |
231 | return 1; | |
232 | } | |
233 | return 0; | |
234 | } | |
f5e166fe NB |
235 | fprintf(stderr, Name ": %s already has a bitmap (%s)\n", |
236 | devname, bmf.pathname); | |
237 | return 1; | |
238 | } | |
239 | if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) { | |
240 | fprintf(stderr, Name ": cannot get array status for %s\n", devname); | |
241 | return 1; | |
242 | } | |
243 | if (array.state & (1<<MD_SB_BITMAP_PRESENT)) { | |
fe80f49b NB |
244 | if (strcmp(file, "none")==0) { |
245 | array.state &= ~(1<<MD_SB_BITMAP_PRESENT); | |
246 | if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) { | |
247 | fprintf(stderr, Name ": failed to remove internal bitmap.\n"); | |
248 | return 1; | |
249 | } | |
250 | return 0; | |
251 | } | |
f5e166fe NB |
252 | fprintf(stderr, Name ": Internal bitmap already present on %s\n", |
253 | devname); | |
254 | return 1; | |
255 | } | |
5b28bd56 NB |
256 | if (array.level <= 0) { |
257 | fprintf(stderr, Name ": Bitmaps not meaningful with level %s\n", | |
258 | map_num(pers, array.level)?:"of this array"); | |
259 | return 1; | |
260 | } | |
8fac0577 NB |
261 | bitmapsize = array.size; |
262 | bitmapsize <<= 1; | |
beae1dfe | 263 | if (get_dev_size(fd, NULL, &array_size) && |
8fac0577 NB |
264 | array_size > (0x7fffffffULL<<9)) { |
265 | /* Array is big enough that we cannot trust array.size | |
266 | * try other approaches | |
267 | */ | |
268 | bitmapsize = get_component_size(fd); | |
269 | } | |
8fac0577 NB |
270 | if (bitmapsize == 0) { |
271 | fprintf(stderr, Name ": Cannot reliably determine size of array to create bitmap - sorry.\n"); | |
272 | return 1; | |
273 | } | |
274 | ||
f9c25f1d | 275 | if (array.level == 10) { |
8686f3ed | 276 | int ncopies = (array.layout&255)*((array.layout>>8)&255); |
f9c25f1d NB |
277 | bitmapsize = bitmapsize * array.raid_disks / ncopies; |
278 | } | |
279 | ||
1686dc25 | 280 | st = super_by_fd(fd); |
f5e166fe NB |
281 | if (!st) { |
282 | fprintf(stderr, Name ": Cannot understand version %d.%d\n", | |
283 | array.major_version, array.minor_version); | |
284 | return 1; | |
285 | } | |
fe80f49b NB |
286 | if (strcmp(file, "none") == 0) { |
287 | fprintf(stderr, Name ": no bitmap found on %s\n", devname); | |
288 | return 1; | |
289 | } else if (strcmp(file, "internal") == 0) { | |
f5e166fe | 290 | int d; |
ea329559 | 291 | for (d=0; d< st->max_devs; d++) { |
f5e166fe NB |
292 | mdu_disk_info_t disk; |
293 | char *dv; | |
294 | disk.number = d; | |
295 | if (ioctl(fd, GET_DISK_INFO, &disk) < 0) | |
296 | continue; | |
297 | if (disk.major == 0 && | |
298 | disk.minor == 0) | |
299 | continue; | |
300 | if ((disk.state & (1<<MD_DISK_SYNC))==0) | |
301 | continue; | |
16c6fa80 | 302 | dv = map_dev(disk.major, disk.minor, 1); |
f5e166fe | 303 | if (dv) { |
16c6fa80 | 304 | int fd2 = dev_open(dv, O_RDWR); |
f5e166fe NB |
305 | if (fd2 < 0) |
306 | continue; | |
3da92f27 | 307 | if (st->ss->load_super(st, fd2, NULL)==0) { |
199171a2 | 308 | if (st->ss->add_internal_bitmap( |
3da92f27 | 309 | st, |
199171a2 NB |
310 | &chunk, delay, write_behind, |
311 | bitmapsize, 0, major) | |
312 | ) | |
3da92f27 | 313 | st->ss->write_bitmap(st, fd2); |
21e92547 NB |
314 | else { |
315 | fprintf(stderr, Name ": failed to create internal bitmap - chunksize problem.\n"); | |
316 | close(fd2); | |
317 | return 1; | |
318 | } | |
f5e166fe NB |
319 | } |
320 | close(fd2); | |
321 | } | |
322 | } | |
323 | array.state |= (1<<MD_SB_BITMAP_PRESENT); | |
324 | if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) { | |
325 | fprintf(stderr, Name ": failed to set internal bitmap.\n"); | |
326 | return 1; | |
327 | } | |
fe80f49b NB |
328 | } else { |
329 | int uuid[4]; | |
330 | int bitmap_fd; | |
331 | int d; | |
332 | int max_devs = st->max_devs; | |
fe80f49b NB |
333 | |
334 | /* try to load a superblock */ | |
335 | for (d=0; d<max_devs; d++) { | |
336 | mdu_disk_info_t disk; | |
337 | char *dv; | |
338 | int fd2; | |
339 | disk.number = d; | |
340 | if (ioctl(fd, GET_DISK_INFO, &disk) < 0) | |
341 | continue; | |
342 | if ((disk.major==0 && disk.minor==0) || | |
343 | (disk.state & (1<<MD_DISK_REMOVED))) | |
344 | continue; | |
16c6fa80 | 345 | dv = map_dev(disk.major, disk.minor, 1); |
fe80f49b | 346 | if (!dv) continue; |
16c6fa80 | 347 | fd2 = dev_open(dv, O_RDONLY); |
fe80f49b | 348 | if (fd2 >= 0 && |
3da92f27 | 349 | st->ss->load_super(st, fd2, NULL) == 0) { |
fe80f49b | 350 | close(fd2); |
3da92f27 | 351 | st->ss->uuid_from_super(st, uuid); |
fe80f49b NB |
352 | break; |
353 | } | |
354 | close(fd2); | |
355 | } | |
356 | if (d == max_devs) { | |
357 | fprintf(stderr, Name ": cannot find UUID for array!\n"); | |
358 | return 1; | |
359 | } | |
8fac0577 | 360 | if (CreateBitmap(file, force, (char*)uuid, chunk, |
f9c25f1d | 361 | delay, write_behind, bitmapsize, major)) { |
fe80f49b NB |
362 | return 1; |
363 | } | |
364 | bitmap_fd = open(file, O_RDWR); | |
365 | if (bitmap_fd < 0) { | |
8fac0577 | 366 | fprintf(stderr, Name ": weird: %s cannot be opened\n", |
fe80f49b NB |
367 | file); |
368 | return 1; | |
369 | } | |
370 | if (ioctl(fd, SET_BITMAP_FILE, bitmap_fd) < 0) { | |
371 | fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n", | |
372 | devname, strerror(errno)); | |
373 | return 1; | |
374 | } | |
375 | } | |
f5e166fe NB |
376 | |
377 | return 0; | |
378 | } | |
379 | ||
e86c9dd6 NB |
380 | |
381 | /* | |
382 | * When reshaping an array we might need to backup some data. | |
383 | * This is written to all spares with a 'super_block' describing it. | |
384 | * The superblock goes 1K form the end of the used space on the | |
385 | * device. | |
386 | * It if written after the backup is complete. | |
387 | * It has the following structure. | |
388 | */ | |
389 | ||
390 | struct mdp_backup_super { | |
391 | char magic[16]; /* md_backup_data-1 */ | |
392 | __u8 set_uuid[16]; | |
393 | __u64 mtime; | |
394 | /* start/sizes in 512byte sectors */ | |
395 | __u64 devstart; | |
396 | __u64 arraystart; | |
397 | __u64 length; | |
398 | __u32 sb_csum; /* csum of preceeding bytes. */ | |
94a20f0c N |
399 | __u8 pad[512-68]; |
400 | } __attribute__((aligned(512))) bsb; | |
e86c9dd6 NB |
401 | |
402 | int bsb_csum(char *buf, int len) | |
403 | { | |
404 | int i; | |
405 | int csum = 0; | |
406 | for (i=0; i<len; i++) | |
407 | csum = (csum<<3) + buf[0]; | |
408 | return __cpu_to_le32(csum); | |
409 | } | |
410 | ||
06b0d786 | 411 | int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, |
e86c9dd6 NB |
412 | long long size, |
413 | int level, int layout, int chunksize, int raid_disks) | |
414 | { | |
415 | /* Make some changes in the shape of an array. | |
416 | * The kernel must support the change. | |
417 | * Different reshapes have subtly different meaning for different | |
418 | * levels, so we need to check the current state of the array | |
419 | * and go from there. | |
420 | */ | |
421 | struct mdu_array_info_s array; | |
422 | char *c; | |
423 | ||
e86c9dd6 NB |
424 | struct supertype *st; |
425 | ||
426 | int nlevel, olevel; | |
427 | int nchunk, ochunk; | |
428 | int nlayout, olayout; | |
429 | int ndisks, odisks; | |
430 | int ndata, odata; | |
431 | unsigned long long nstripe, ostripe, last_block; | |
432 | int *fdlist; | |
433 | unsigned long long *offsets; | |
434 | int d, i, spares; | |
435 | int nrdisks; | |
436 | int err; | |
e86c9dd6 | 437 | |
7e0f6979 | 438 | struct mdinfo *sra; |
06c7f68e | 439 | struct mdinfo *sd; |
e86c9dd6 NB |
440 | |
441 | if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) { | |
442 | fprintf(stderr, Name ": %s is not an active md array - aborting\n", | |
443 | devname); | |
444 | return 1; | |
445 | } | |
446 | c = map_num(pers, array.level); | |
447 | if (c == NULL) c = "-unknown-"; | |
448 | switch(array.level) { | |
449 | default: /* raid0, linear, multipath cannot be reconfigured */ | |
450 | fprintf(stderr, Name ": %s array %s cannot be reshaped.\n", | |
451 | c, devname); | |
452 | return 1; | |
453 | ||
454 | case LEVEL_FAULTY: /* only 'layout' change is permitted */ | |
455 | ||
456 | if (size >= 0) { | |
457 | fprintf(stderr, Name ": %s: Cannot change size of a 'faulty' array\n", | |
458 | devname); | |
459 | return 1; | |
460 | } | |
461 | if (level != UnSet && level != LEVEL_FAULTY) { | |
462 | fprintf(stderr, Name ": %s: Cannot change RAID level of a 'faulty' array\n", | |
463 | devname); | |
464 | return 1; | |
465 | } | |
466 | if (chunksize || raid_disks) { | |
467 | fprintf(stderr, Name ": %s: Cannot change chunksize or disks of a 'faulty' array\n", | |
468 | devname); | |
469 | return 1; | |
470 | } | |
471 | if (layout == UnSet) | |
472 | return 0; /* nothing to do.... */ | |
473 | ||
474 | array.layout = layout; | |
475 | if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { | |
476 | fprintf(stderr, Name ": Cannot set layout for %s: %s\n", | |
477 | devname, strerror(errno)); | |
478 | return 1; | |
479 | } | |
480 | if (!quiet) | |
481 | printf("layout for %s set to %d\n", devname, array.layout); | |
482 | return 0; | |
483 | ||
484 | case 1: /* raid_disks and size can each be changed. They are independant */ | |
485 | ||
486 | if (level != UnSet && level != 1) { | |
487 | fprintf(stderr, Name ": %s: Cannot change RAID level of a RAID1 array.\n", | |
488 | devname); | |
489 | return 1; | |
490 | } | |
491 | if (chunksize || layout != UnSet) { | |
492 | fprintf(stderr, Name ": %s: Cannot change chunk size of layout for a RAID1 array.\n", | |
493 | devname); | |
494 | return 1; | |
495 | } | |
496 | ||
497 | /* Each can trigger a resync/recovery which will block the | |
498 | * other from happening. Later we could block | |
499 | * resync for the duration via 'sync_action'... | |
500 | */ | |
9860f271 | 501 | if (raid_disks > 0) { |
e86c9dd6 | 502 | array.raid_disks = raid_disks; |
9860f271 NB |
503 | if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { |
504 | fprintf(stderr, Name ": Cannot set raid-devices for %s: %s\n", | |
505 | devname, strerror(errno)); | |
506 | return 1; | |
507 | } | |
508 | } | |
509 | if (size >= 0) { | |
e86c9dd6 | 510 | array.size = size; |
9860f271 NB |
511 | if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { |
512 | fprintf(stderr, Name ": Cannot set device size for %s: %s\n", | |
513 | devname, strerror(errno)); | |
514 | return 1; | |
515 | } | |
e86c9dd6 NB |
516 | } |
517 | return 0; | |
518 | ||
519 | case 4: | |
520 | case 5: | |
521 | case 6: | |
1686dc25 NB |
522 | st = super_by_fd(fd); |
523 | ||
758d3a8e | 524 | /* size can be changed independently. |
e86c9dd6 NB |
525 | * layout/chunksize/raid_disks/level can be changed |
526 | * though the kernel may not support it all. | |
527 | * If 'suspend_lo' is not present in devfs, then | |
528 | * these cannot be changed. | |
529 | */ | |
530 | if (size >= 0) { | |
531 | /* Cannot change other details as well.. */ | |
532 | if (layout != UnSet || | |
533 | chunksize != 0 || | |
534 | raid_disks != 0 || | |
535 | level != UnSet) { | |
536 | fprintf(stderr, Name ": %s: Cannot change shape as well as size of a %s array.\n", | |
537 | devname, c); | |
538 | return 1; | |
539 | } | |
540 | array.size = size; | |
541 | if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { | |
542 | fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n", | |
543 | devname, strerror(errno)); | |
544 | return 1; | |
545 | } | |
546 | return 0; | |
547 | } | |
548 | /* Ok, just change the shape. This can be awkward. | |
549 | * There are three possibilities. | |
550 | * 1/ The array will shrink. We don't support this | |
551 | * possibility. Maybe one day... | |
552 | * 2/ The array will not change size. This is easy enough | |
553 | * to do, but not reliably. If the process is aborted | |
554 | * the array *will* be corrupted. So maybe we can allow | |
555 | * this but only if the user is really certain. e.g. | |
556 | * --really-risk-everything | |
557 | * 3/ The array will grow. This can be reliably achieved. | |
558 | * However the kernel's restripe routines will cheerfully | |
559 | * overwrite some early data before it is safe. So we | |
560 | * need to make a backup of the early parts of the array | |
561 | * and be ready to restore it if rebuild aborts very early. | |
562 | * | |
563 | * We backup data by writing it to all spares (there must be | |
564 | * at least 1, so even raid6->raid5 requires a spare to be | |
565 | * present). | |
566 | * | |
567 | * So: we enumerate the devices in the array and | |
568 | * make sure we can open all of them. | |
569 | * Then we freeze the early part of the array and | |
570 | * backup to the various spares. | |
571 | * Then we request changes and start the reshape. | |
572 | * Monitor progress until it has passed the danger zone. | |
573 | * and finally invalidate the copied data and unfreeze the | |
574 | * start of the array. | |
575 | * | |
576 | * Before we can do this we need to decide: | |
577 | * - will the array grow? Just calculate size | |
578 | * - how much needs to be saved: count stripes. | |
579 | * - where to save data... good question. | |
580 | * | |
581 | */ | |
582 | nlevel = olevel = array.level; | |
583 | nchunk = ochunk = array.chunk_size; | |
584 | nlayout = olayout = array.layout; | |
585 | ndisks = odisks = array.raid_disks; | |
586 | ||
587 | if (level != UnSet) nlevel = level; | |
588 | if (chunksize) nchunk = chunksize; | |
589 | if (layout != UnSet) nlayout = layout; | |
590 | if (raid_disks) ndisks = raid_disks; | |
591 | ||
592 | odata = odisks-1; | |
593 | if (olevel == 6) odata--; /* number of data disks */ | |
594 | ndata = ndisks-1; | |
595 | if (nlevel == 6) ndata--; | |
596 | ||
597 | if (ndata < odata) { | |
598 | fprintf(stderr, Name ": %s: Cannot reduce number of data disks (yet).\n", | |
599 | devname); | |
600 | return 1; | |
601 | } | |
602 | if (ndata == odata) { | |
603 | fprintf(stderr, Name ": %s: Cannot reshape array without increasing size (yet).\n", | |
604 | devname); | |
605 | return 1; | |
606 | } | |
607 | /* Well, it is growing... so how much do we need to backup. | |
608 | * Need to backup a full number of new-stripes, such that the | |
609 | * last one does not over-write any place that it would be read | |
610 | * from | |
611 | */ | |
612 | nstripe = ostripe = 0; | |
353632d9 | 613 | while (nstripe >= ostripe) { |
e86c9dd6 NB |
614 | nstripe += nchunk/512; |
615 | last_block = nstripe * ndata; | |
353632d9 | 616 | ostripe = last_block / odata / (ochunk/512) * (ochunk/512); |
e86c9dd6 | 617 | } |
0aa389dc NB |
618 | fprintf(stderr, Name ": Need to backup %lluK of critical " |
619 | "section..\n", last_block/2); | |
e86c9dd6 NB |
620 | |
621 | sra = sysfs_read(fd, 0, | |
758d3a8e NB |
622 | GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE| |
623 | GET_CACHE); | |
e86c9dd6 NB |
624 | if (!sra) { |
625 | fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n", | |
626 | devname); | |
627 | return 1; | |
628 | } | |
629 | ||
630 | if (last_block >= sra->component_size/2) { | |
631 | fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n", | |
632 | devname); | |
633 | return 1; | |
634 | } | |
7e0f6979 | 635 | if (sra->array.spare_disks == 0 && backup_file == NULL) { |
06b0d786 | 636 | fprintf(stderr, Name ": %s: Cannot grow - need a spare or backup-file to backup critical section\n", |
353632d9 NB |
637 | devname); |
638 | return 1; | |
639 | } | |
e86c9dd6 | 640 | |
7e0f6979 | 641 | nrdisks = array.nr_disks + sra->array.spare_disks; |
e86c9dd6 NB |
642 | /* Now we need to open all these devices so we can read/write. |
643 | */ | |
06b0d786 NB |
644 | fdlist = malloc((1+nrdisks) * sizeof(int)); |
645 | offsets = malloc((1+nrdisks) * sizeof(offsets[0])); | |
e86c9dd6 NB |
646 | if (!fdlist || !offsets) { |
647 | fprintf(stderr, Name ": malloc failed: grow aborted\n"); | |
648 | return 1; | |
649 | } | |
06b0d786 | 650 | for (d=0; d <= nrdisks; d++) |
e86c9dd6 NB |
651 | fdlist[d] = -1; |
652 | d = array.raid_disks; | |
653 | for (sd = sra->devs; sd; sd=sd->next) { | |
06c7f68e | 654 | if (sd->disk.state & (1<<MD_DISK_FAULTY)) |
e86c9dd6 | 655 | continue; |
06c7f68e NB |
656 | if (sd->disk.state & (1<<MD_DISK_SYNC)) { |
657 | char *dn = map_dev(sd->disk.major, | |
658 | sd->disk.minor, 1); | |
659 | fdlist[sd->disk.raid_disk] | |
660 | = dev_open(dn, O_RDONLY); | |
661 | offsets[sd->disk.raid_disk] = sd->data_offset; | |
662 | if (fdlist[sd->disk.raid_disk] < 0) { | |
e86c9dd6 | 663 | fprintf(stderr, Name ": %s: cannot open component %s\n", |
e81cdd9f | 664 | devname, dn?dn:"-unknown-"); |
e86c9dd6 NB |
665 | goto abort; |
666 | } | |
667 | } else { | |
668 | /* spare */ | |
06c7f68e NB |
669 | char *dn = map_dev(sd->disk.major, |
670 | sd->disk.minor, 1); | |
16c6fa80 | 671 | fdlist[d] = dev_open(dn, O_RDWR); |
06c7f68e | 672 | offsets[d] = sd->data_offset; |
e86c9dd6 NB |
673 | if (fdlist[d]<0) { |
674 | fprintf(stderr, Name ": %s: cannot open component %s\n", | |
e81cdd9f | 675 | devname, dn?dn:"-unknown"); |
e86c9dd6 NB |
676 | goto abort; |
677 | } | |
678 | d++; | |
679 | } | |
680 | } | |
681 | for (i=0 ; i<array.raid_disks; i++) | |
682 | if (fdlist[i] < 0) { | |
683 | fprintf(stderr, Name ": %s: failed to find device %d. Array might be degraded.\n" | |
684 | " --grow aborted\n", devname, i); | |
685 | goto abort; | |
686 | } | |
7e0f6979 | 687 | spares = sra->array.spare_disks; |
06b0d786 | 688 | if (backup_file) { |
fb97b4d6 | 689 | fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, S_IRUSR | S_IWUSR); |
06b0d786 NB |
690 | if (fdlist[d] < 0) { |
691 | fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n", | |
692 | devname, backup_file, strerror(errno)); | |
693 | goto abort; | |
694 | } | |
695 | offsets[d] = 8; | |
696 | d++; | |
697 | spares++; | |
698 | } | |
e86c9dd6 | 699 | if (fdlist[array.raid_disks] < 0) { |
06b0d786 | 700 | fprintf(stderr, Name ": %s: failed to find a spare and no backup-file given - --grow aborted\n", |
e86c9dd6 NB |
701 | devname); |
702 | goto abort; | |
703 | } | |
704 | ||
705 | /* Find a superblock */ | |
3da92f27 | 706 | if (st->ss->load_super(st, fdlist[0], NULL)) { |
e86c9dd6 NB |
707 | fprintf(stderr, Name ": %s: Cannot find a superblock\n", |
708 | devname); | |
709 | goto abort; | |
710 | } | |
711 | ||
2efedc7b NB |
712 | |
713 | memcpy(bsb.magic, "md_backup_data-1", 16); | |
3da92f27 | 714 | st->ss->uuid_from_super(st, (int*)&bsb.set_uuid); |
2efedc7b NB |
715 | bsb.mtime = __cpu_to_le64(time(0)); |
716 | bsb.arraystart = 0; | |
717 | bsb.length = __cpu_to_le64(last_block); | |
718 | ||
719 | /* Decide offset for the backup, llseek the spares, and write | |
720 | * a leading superblock 4K earlier. | |
721 | */ | |
e86c9dd6 | 722 | for (i=array.raid_disks; i<d; i++) { |
94a20f0c N |
723 | char abuf[4096+512]; |
724 | char *buf = (char*)(((unsigned long)abuf+511)& ~511); | |
06b0d786 NB |
725 | if (i==d-1 && backup_file) { |
726 | /* This is the backup file */ | |
727 | offsets[i] = 8; | |
728 | } else | |
729 | offsets[i] += sra->component_size - last_block - 8; | |
2efedc7b NB |
730 | if (lseek64(fdlist[i], (offsets[i]<<9) - 4096, 0) |
731 | != (offsets[i]<<9) - 4096) { | |
e86c9dd6 NB |
732 | fprintf(stderr, Name ": could not seek...\n"); |
733 | goto abort; | |
734 | } | |
94a20f0c | 735 | memset(buf, 0, 4096); |
2efedc7b NB |
736 | bsb.devstart = __cpu_to_le64(offsets[i]); |
737 | bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)); | |
738 | memcpy(buf, &bsb, sizeof(bsb)); | |
739 | if (write(fdlist[i], buf, 4096) != 4096) { | |
740 | fprintf(stderr, Name ": could not write leading superblock\n"); | |
741 | goto abort; | |
742 | } | |
e86c9dd6 NB |
743 | } |
744 | array.level = nlevel; | |
745 | array.raid_disks = ndisks; | |
746 | array.chunk_size = nchunk; | |
747 | array.layout = nlayout; | |
748 | if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) { | |
758d3a8e NB |
749 | if (errno == ENOSPC) { |
750 | /* stripe cache is not big enough. | |
751 | * It needs to be 4 times chunksize_size, | |
752 | * and we assume pagesize is 4K | |
753 | */ | |
754 | if (sra->cache_size < 4 * (nchunk/4096)) { | |
755 | sysfs_set_num(sra, NULL, | |
756 | "stripe_cache_size", | |
757 | 4 * (nchunk/4096) +1); | |
758 | if (ioctl(fd, SET_ARRAY_INFO, | |
759 | &array) == 0) | |
760 | goto ok; | |
761 | } | |
762 | } | |
e86c9dd6 NB |
763 | fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n", |
764 | devname, strerror(errno)); | |
765 | goto abort; | |
766 | } | |
758d3a8e | 767 | ok: ; |
e86c9dd6 NB |
768 | |
769 | /* suspend the relevant region */ | |
770 | sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */ | |
771 | if (sysfs_set_num(sra, NULL, "suspend_lo", 0) < 0 || | |
772 | sysfs_set_num(sra, NULL, "suspend_hi", last_block) < 0) { | |
773 | fprintf(stderr, Name ": %s: failed to suspend device.\n", | |
774 | devname); | |
775 | goto abort_resume; | |
776 | } | |
777 | ||
778 | ||
779 | err = save_stripes(fdlist, offsets, | |
780 | odisks, ochunk, olevel, olayout, | |
781 | spares, fdlist+odisks, | |
06b0d786 | 782 | 0ULL, last_block*512); |
e86c9dd6 NB |
783 | |
784 | /* abort if there was an error */ | |
785 | if (err < 0) { | |
786 | fprintf(stderr, Name ": %s: failed to save critical region\n", | |
787 | devname); | |
788 | goto abort_resume; | |
789 | } | |
2efedc7b | 790 | |
e86c9dd6 | 791 | for (i=odisks; i<d ; i++) { |
353632d9 | 792 | bsb.devstart = __cpu_to_le64(offsets[i]); |
e86c9dd6 | 793 | bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)); |
206c5eae | 794 | if (lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0) < 0 || |
06b0d786 NB |
795 | write(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb) || |
796 | fsync(fdlist[i]) != 0) { | |
94a20f0c | 797 | fprintf(stderr, Name ": %s: failed to save metadata for critical region backups.\n", |
206c5eae NB |
798 | devname); |
799 | goto abort_resume; | |
800 | } | |
e86c9dd6 NB |
801 | } |
802 | ||
803 | /* start the reshape happening */ | |
804 | if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) { | |
805 | fprintf(stderr, Name ": %s: failed to initiate reshape\n", | |
806 | devname); | |
807 | goto abort_resume; | |
808 | } | |
809 | /* wait for reshape to pass the critical region */ | |
810 | while(1) { | |
811 | unsigned long long comp; | |
48924014 | 812 | |
206c5eae NB |
813 | if (sysfs_get_ll(sra, NULL, "sync_completed", &comp)<0) { |
814 | sleep(5); | |
e86c9dd6 | 815 | break; |
206c5eae | 816 | } |
e86c9dd6 NB |
817 | if (comp >= nstripe) |
818 | break; | |
93ecfa01 N |
819 | if (comp == 0) { |
820 | /* Maybe it finished already */ | |
821 | char action[20]; | |
822 | if (sysfs_get_str(sra, NULL, "sync_action", | |
823 | action, 20) > 0 && | |
824 | strncmp(action, "reshape", 7) != 0) | |
825 | break; | |
826 | } | |
e86c9dd6 NB |
827 | sleep(1); |
828 | } | |
aba69144 | 829 | |
e86c9dd6 NB |
830 | /* invalidate superblocks */ |
831 | memset(&bsb, 0, sizeof(bsb)); | |
832 | for (i=odisks; i<d ; i++) { | |
833 | lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0); | |
9fca7d62 NB |
834 | if (write(fdlist[i], &bsb, sizeof(bsb)) < 0) { |
835 | fprintf(stderr, Name ": %s: failed to invalidate metadata for raid disk %d\n", | |
836 | devname, i); | |
837 | } | |
e86c9dd6 NB |
838 | } |
839 | ||
840 | /* unsuspend. */ | |
841 | sysfs_set_num(sra, NULL, "suspend_lo", last_block); | |
842 | ||
843 | for (i=0; i<d; i++) | |
844 | if (fdlist[i] >= 0) | |
845 | close(fdlist[i]); | |
846 | free(fdlist); | |
847 | free(offsets); | |
06b0d786 NB |
848 | if (backup_file) |
849 | unlink(backup_file); | |
e86c9dd6 | 850 | |
0aa389dc | 851 | fprintf(stderr, Name ": ... critical section passed.\n"); |
e86c9dd6 NB |
852 | break; |
853 | } | |
854 | return 0; | |
855 | ||
856 | ||
857 | abort_resume: | |
858 | sysfs_set_num(sra, NULL, "suspend_lo", last_block); | |
859 | abort: | |
860 | for (i=0; i<array.nr_disks; i++) | |
861 | if (fdlist[i] >= 0) | |
862 | close(fdlist[i]); | |
863 | free(fdlist); | |
864 | free(offsets); | |
06b0d786 NB |
865 | if (backup_file) |
866 | unlink(backup_file); | |
e86c9dd6 NB |
867 | return 1; |
868 | ||
869 | } | |
353632d9 NB |
870 | |
871 | /* | |
872 | * If any spare contains md_back_data-1 which is recent wrt mtime, | |
873 | * write that data into the array and update the super blocks with | |
874 | * the new reshape_progress | |
875 | */ | |
06b0d786 | 876 | int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt, char *backup_file) |
353632d9 NB |
877 | { |
878 | int i, j; | |
879 | int old_disks; | |
353632d9 | 880 | unsigned long long *offsets; |
6e9eac4f NB |
881 | unsigned long long nstripe, ostripe, last_block; |
882 | int ndata, odata; | |
353632d9 NB |
883 | |
884 | if (info->delta_disks < 0) | |
885 | return 1; /* cannot handle a shrink */ | |
886 | if (info->new_level != info->array.level || | |
887 | info->new_layout != info->array.layout || | |
888 | info->new_chunk != info->array.chunk_size) | |
889 | return 1; /* Can only handle change in disks */ | |
890 | ||
891 | old_disks = info->array.raid_disks - info->delta_disks; | |
892 | ||
06b0d786 | 893 | for (i=old_disks-(backup_file?1:0); i<cnt; i++) { |
353632d9 | 894 | struct mdinfo dinfo; |
2efedc7b | 895 | char buf[4096]; |
06b0d786 | 896 | int fd; |
353632d9 NB |
897 | |
898 | /* This was a spare and may have some saved data on it. | |
899 | * Load the superblock, find and load the | |
900 | * backup_super_block. | |
901 | * If either fail, go on to next device. | |
902 | * If the backup contains no new info, just return | |
206c5eae | 903 | * else restore data and update all superblocks |
353632d9 | 904 | */ |
06b0d786 NB |
905 | if (i == old_disks-1) { |
906 | fd = open(backup_file, O_RDONLY); | |
907 | if (fd<0) | |
908 | continue; | |
06b0d786 NB |
909 | } else { |
910 | fd = fdlist[i]; | |
911 | if (fd < 0) | |
912 | continue; | |
3da92f27 | 913 | if (st->ss->load_super(st, fd, NULL)) |
06b0d786 | 914 | continue; |
353632d9 | 915 | |
3da92f27 NB |
916 | st->ss->getinfo_super(st, &dinfo); |
917 | st->ss->free_super(st); | |
918 | ||
06b0d786 NB |
919 | if (lseek64(fd, |
920 | (dinfo.data_offset + dinfo.component_size - 8) <<9, | |
921 | 0) < 0) | |
922 | continue; /* Cannot seek */ | |
923 | } | |
924 | if (read(fd, &bsb, sizeof(bsb)) != sizeof(bsb)) | |
353632d9 NB |
925 | continue; /* Cannot read */ |
926 | if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0) | |
927 | continue; | |
928 | if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb))) | |
929 | continue; /* bad checksum */ | |
930 | if (memcmp(bsb.set_uuid,info->uuid, 16) != 0) | |
931 | continue; /* Wrong uuid */ | |
932 | ||
933 | if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 || | |
934 | info->array.utime < __le64_to_cpu(bsb.mtime)) | |
935 | continue; /* time stamp is too bad */ | |
936 | ||
937 | if (__le64_to_cpu(bsb.arraystart) != 0) | |
938 | continue; /* Can only handle backup from start of array */ | |
939 | if (__le64_to_cpu(bsb.length) < | |
940 | info->reshape_progress) | |
941 | continue; /* No new data here */ | |
942 | ||
06b0d786 | 943 | if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0) |
353632d9 | 944 | continue; /* Cannot seek */ |
2efedc7b | 945 | /* There should be a duplicate backup superblock 4k before here */ |
06b0d786 NB |
946 | if (lseek64(fd, -4096, 1) < 0 || |
947 | read(fd, buf, 4096) != 4096 || | |
9860f271 | 948 | memcmp(buf, &bsb, sizeof(bsb)) != 0) |
2efedc7b NB |
949 | continue; /* Cannot find leading superblock */ |
950 | ||
353632d9 NB |
951 | /* Now need the data offsets for all devices. */ |
952 | offsets = malloc(sizeof(*offsets)*info->array.raid_disks); | |
953 | for(j=0; j<info->array.raid_disks; j++) { | |
954 | if (fdlist[j] < 0) | |
955 | continue; | |
3da92f27 | 956 | if (st->ss->load_super(st, fdlist[j], NULL)) |
353632d9 NB |
957 | /* FIXME should be this be an error */ |
958 | continue; | |
3da92f27 NB |
959 | st->ss->getinfo_super(st, &dinfo); |
960 | st->ss->free_super(st); | |
353632d9 NB |
961 | offsets[j] = dinfo.data_offset; |
962 | } | |
963 | printf(Name ": restoring critical section\n"); | |
964 | ||
965 | if (restore_stripes(fdlist, offsets, | |
966 | info->array.raid_disks, | |
967 | info->new_chunk, | |
968 | info->new_level, | |
969 | info->new_layout, | |
06b0d786 | 970 | fd, __le64_to_cpu(bsb.devstart)*512, |
353632d9 NB |
971 | 0, __le64_to_cpu(bsb.length)*512)) { |
972 | /* didn't succeed, so giveup */ | |
2295250a | 973 | return 1; |
353632d9 NB |
974 | } |
975 | ||
976 | /* Ok, so the data is restored. Let's update those superblocks. */ | |
977 | ||
978 | for (j=0; j<info->array.raid_disks; j++) { | |
979 | if (fdlist[j] < 0) continue; | |
3da92f27 | 980 | if (st->ss->load_super(st, fdlist[j], NULL)) |
353632d9 | 981 | continue; |
3da92f27 | 982 | st->ss->getinfo_super(st, &dinfo); |
353632d9 | 983 | dinfo.reshape_progress = __le64_to_cpu(bsb.length); |
3da92f27 | 984 | st->ss->update_super(st, &dinfo, |
68c7d6d7 NB |
985 | "_reshape_progress", |
986 | NULL,0, 0, NULL); | |
3da92f27 NB |
987 | st->ss->store_super(st, fdlist[j]); |
988 | st->ss->free_super(st); | |
353632d9 NB |
989 | } |
990 | ||
991 | /* And we are done! */ | |
992 | return 0; | |
993 | } | |
6e9eac4f NB |
994 | /* Didn't find any backup data, try to see if any |
995 | * was needed. | |
996 | */ | |
997 | nstripe = ostripe = 0; | |
998 | odata = info->array.raid_disks - info->delta_disks - 1; | |
999 | if (info->array.level == 6) odata--; /* number of data disks */ | |
1000 | ndata = info->array.raid_disks - 1; | |
1001 | if (info->new_level == 6) ndata--; | |
1002 | last_block = 0; | |
1003 | while (nstripe >= ostripe) { | |
1004 | nstripe += info->new_chunk / 512; | |
1005 | last_block = nstripe * ndata; | |
1006 | ostripe = last_block / odata / (info->array.chunk_size/512) * | |
1007 | (info->array.chunk_size/512); | |
1008 | } | |
1009 | ||
1010 | if (info->reshape_progress >= last_block) | |
1011 | return 0; | |
1012 | /* needed to recover critical section! */ | |
2295250a | 1013 | return 1; |
353632d9 | 1014 | } |