]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Grow.c
restripe: support saving when not all devices are present.
[thirdparty/mdadm.git] / Grow.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29 #include "mdadm.h"
30 #include "dlink.h"
31
32 #if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
33 #error no endian defined
34 #endif
35 #include "md_u.h"
36 #include "md_p.h"
37
38 int Grow_Add_device(char *devname, int fd, char *newdev)
39 {
40 /* Add a device to an active array.
41 * Currently, just extend a linear array.
42 * This requires writing a new superblock on the
43 * new device, calling the kernel to add the device,
44 * and if that succeeds, update the superblock on
45 * all other devices.
46 * This means that we need to *find* all other devices.
47 */
48 struct mdinfo info;
49
50 struct stat stb;
51 int nfd, fd2;
52 int d, nd;
53 struct supertype *st = NULL;
54
55
56 if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
57 fprintf(stderr, Name ": cannot get array info for %s\n", devname);
58 return 1;
59 }
60
61 st = super_by_fd(fd);
62 if (!st) {
63 fprintf(stderr, Name ": cannot handle arrays with superblock version %d\n", info.array.major_version);
64 return 1;
65 }
66
67 if (info.array.level != -1) {
68 fprintf(stderr, Name ": can only add devices to linear arrays\n");
69 return 1;
70 }
71
72 nfd = open(newdev, O_RDWR|O_EXCL|O_DIRECT);
73 if (nfd < 0) {
74 fprintf(stderr, Name ": cannot open %s\n", newdev);
75 return 1;
76 }
77 fstat(nfd, &stb);
78 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
79 fprintf(stderr, Name ": %s is not a block device!\n", newdev);
80 close(nfd);
81 return 1;
82 }
83 /* now check out all the devices and make sure we can read the superblock */
84 for (d=0 ; d < info.array.raid_disks ; d++) {
85 mdu_disk_info_t disk;
86 char *dv;
87
88 disk.number = d;
89 if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
90 fprintf(stderr, Name ": cannot get device detail for device %d\n",
91 d);
92 return 1;
93 }
94 dv = map_dev(disk.major, disk.minor, 1);
95 if (!dv) {
96 fprintf(stderr, Name ": cannot find device file for device %d\n",
97 d);
98 return 1;
99 }
100 fd2 = dev_open(dv, O_RDWR);
101 if (!fd2) {
102 fprintf(stderr, Name ": cannot open device file %s\n", dv);
103 return 1;
104 }
105 st->ss->free_super(st);
106
107 if (st->ss->load_super(st, fd2, NULL)) {
108 fprintf(stderr, Name ": cannot find super block on %s\n", dv);
109 close(fd2);
110 return 1;
111 }
112 close(fd2);
113 }
114 /* Ok, looks good. Lets update the superblock and write it out to
115 * newdev.
116 */
117
118 info.disk.number = d;
119 info.disk.major = major(stb.st_rdev);
120 info.disk.minor = minor(stb.st_rdev);
121 info.disk.raid_disk = d;
122 info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
123 st->ss->update_super(st, &info, "linear-grow-new", newdev,
124 0, 0, NULL);
125
126 if (st->ss->store_super(st, nfd)) {
127 fprintf(stderr, Name ": Cannot store new superblock on %s\n",
128 newdev);
129 close(nfd);
130 return 1;
131 }
132 close(nfd);
133
134 if (ioctl(fd, ADD_NEW_DISK, &info.disk) != 0) {
135 fprintf(stderr, Name ": Cannot add new disk to this array\n");
136 return 1;
137 }
138 /* Well, that seems to have worked.
139 * Now go through and update all superblocks
140 */
141
142 if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
143 fprintf(stderr, Name ": cannot get array info for %s\n", devname);
144 return 1;
145 }
146
147 nd = d;
148 for (d=0 ; d < info.array.raid_disks ; d++) {
149 mdu_disk_info_t disk;
150 char *dv;
151
152 disk.number = d;
153 if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
154 fprintf(stderr, Name ": cannot get device detail for device %d\n",
155 d);
156 return 1;
157 }
158 dv = map_dev(disk.major, disk.minor, 1);
159 if (!dv) {
160 fprintf(stderr, Name ": cannot find device file for device %d\n",
161 d);
162 return 1;
163 }
164 fd2 = dev_open(dv, O_RDWR);
165 if (fd2 < 0) {
166 fprintf(stderr, Name ": cannot open device file %s\n", dv);
167 return 1;
168 }
169 if (st->ss->load_super(st, fd2, NULL)) {
170 fprintf(stderr, Name ": cannot find super block on %s\n", dv);
171 close(fd);
172 return 1;
173 }
174 info.array.raid_disks = nd+1;
175 info.array.nr_disks = nd+1;
176 info.array.active_disks = nd+1;
177 info.array.working_disks = nd+1;
178
179 st->ss->update_super(st, &info, "linear-grow-update", dv,
180 0, 0, NULL);
181
182 if (st->ss->store_super(st, fd2)) {
183 fprintf(stderr, Name ": Cannot store new superblock on %s\n", dv);
184 close(fd2);
185 return 1;
186 }
187 close(fd2);
188 }
189
190 return 0;
191 }
192
193 int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force)
194 {
195 /*
196 * First check that array doesn't have a bitmap
197 * Then create the bitmap
198 * Then add it
199 *
200 * For internal bitmaps, we need to check the version,
201 * find all the active devices, and write the bitmap block
202 * to all devices
203 */
204 mdu_bitmap_file_t bmf;
205 mdu_array_info_t array;
206 struct supertype *st;
207 int major = BITMAP_MAJOR_HI;
208 int vers = md_get_version(fd);
209 unsigned long long bitmapsize, array_size;
210
211 if (vers < 9003) {
212 major = BITMAP_MAJOR_HOSTENDIAN;
213 #ifdef __BIG_ENDIAN
214 fprintf(stderr, Name ": Warning - bitmaps created on this kernel are not portable\n"
215 " between different architectured. Consider upgrading the Linux kernel.\n");
216 #endif
217 }
218
219 if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
220 if (errno == ENOMEM)
221 fprintf(stderr, Name ": Memory allocation failure.\n");
222 else
223 fprintf(stderr, Name ": bitmaps not supported by this kernel.\n");
224 return 1;
225 }
226 if (bmf.pathname[0]) {
227 if (strcmp(file,"none")==0) {
228 if (ioctl(fd, SET_BITMAP_FILE, -1)!= 0) {
229 fprintf(stderr, Name ": failed to remove bitmap %s\n",
230 bmf.pathname);
231 return 1;
232 }
233 return 0;
234 }
235 fprintf(stderr, Name ": %s already has a bitmap (%s)\n",
236 devname, bmf.pathname);
237 return 1;
238 }
239 if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
240 fprintf(stderr, Name ": cannot get array status for %s\n", devname);
241 return 1;
242 }
243 if (array.state & (1<<MD_SB_BITMAP_PRESENT)) {
244 if (strcmp(file, "none")==0) {
245 array.state &= ~(1<<MD_SB_BITMAP_PRESENT);
246 if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
247 fprintf(stderr, Name ": failed to remove internal bitmap.\n");
248 return 1;
249 }
250 return 0;
251 }
252 fprintf(stderr, Name ": Internal bitmap already present on %s\n",
253 devname);
254 return 1;
255 }
256 if (array.level <= 0) {
257 fprintf(stderr, Name ": Bitmaps not meaningful with level %s\n",
258 map_num(pers, array.level)?:"of this array");
259 return 1;
260 }
261 bitmapsize = array.size;
262 bitmapsize <<= 1;
263 if (get_dev_size(fd, NULL, &array_size) &&
264 array_size > (0x7fffffffULL<<9)) {
265 /* Array is big enough that we cannot trust array.size
266 * try other approaches
267 */
268 bitmapsize = get_component_size(fd);
269 }
270 if (bitmapsize == 0) {
271 fprintf(stderr, Name ": Cannot reliably determine size of array to create bitmap - sorry.\n");
272 return 1;
273 }
274
275 if (array.level == 10) {
276 int ncopies = (array.layout&255)*((array.layout>>8)&255);
277 bitmapsize = bitmapsize * array.raid_disks / ncopies;
278 }
279
280 st = super_by_fd(fd);
281 if (!st) {
282 fprintf(stderr, Name ": Cannot understand version %d.%d\n",
283 array.major_version, array.minor_version);
284 return 1;
285 }
286 if (strcmp(file, "none") == 0) {
287 fprintf(stderr, Name ": no bitmap found on %s\n", devname);
288 return 1;
289 } else if (strcmp(file, "internal") == 0) {
290 int d;
291 for (d=0; d< st->max_devs; d++) {
292 mdu_disk_info_t disk;
293 char *dv;
294 disk.number = d;
295 if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
296 continue;
297 if (disk.major == 0 &&
298 disk.minor == 0)
299 continue;
300 if ((disk.state & (1<<MD_DISK_SYNC))==0)
301 continue;
302 dv = map_dev(disk.major, disk.minor, 1);
303 if (dv) {
304 int fd2 = dev_open(dv, O_RDWR);
305 if (fd2 < 0)
306 continue;
307 if (st->ss->load_super(st, fd2, NULL)==0) {
308 if (st->ss->add_internal_bitmap(
309 st,
310 &chunk, delay, write_behind,
311 bitmapsize, 0, major)
312 )
313 st->ss->write_bitmap(st, fd2);
314 else {
315 fprintf(stderr, Name ": failed to create internal bitmap - chunksize problem.\n");
316 close(fd2);
317 return 1;
318 }
319 }
320 close(fd2);
321 }
322 }
323 array.state |= (1<<MD_SB_BITMAP_PRESENT);
324 if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
325 fprintf(stderr, Name ": failed to set internal bitmap.\n");
326 return 1;
327 }
328 } else {
329 int uuid[4];
330 int bitmap_fd;
331 int d;
332 int max_devs = st->max_devs;
333
334 /* try to load a superblock */
335 for (d=0; d<max_devs; d++) {
336 mdu_disk_info_t disk;
337 char *dv;
338 int fd2;
339 disk.number = d;
340 if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
341 continue;
342 if ((disk.major==0 && disk.minor==0) ||
343 (disk.state & (1<<MD_DISK_REMOVED)))
344 continue;
345 dv = map_dev(disk.major, disk.minor, 1);
346 if (!dv) continue;
347 fd2 = dev_open(dv, O_RDONLY);
348 if (fd2 >= 0 &&
349 st->ss->load_super(st, fd2, NULL) == 0) {
350 close(fd2);
351 st->ss->uuid_from_super(st, uuid);
352 break;
353 }
354 close(fd2);
355 }
356 if (d == max_devs) {
357 fprintf(stderr, Name ": cannot find UUID for array!\n");
358 return 1;
359 }
360 if (CreateBitmap(file, force, (char*)uuid, chunk,
361 delay, write_behind, bitmapsize, major)) {
362 return 1;
363 }
364 bitmap_fd = open(file, O_RDWR);
365 if (bitmap_fd < 0) {
366 fprintf(stderr, Name ": weird: %s cannot be opened\n",
367 file);
368 return 1;
369 }
370 if (ioctl(fd, SET_BITMAP_FILE, bitmap_fd) < 0) {
371 fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
372 devname, strerror(errno));
373 return 1;
374 }
375 }
376
377 return 0;
378 }
379
380
381 /*
382 * When reshaping an array we might need to backup some data.
383 * This is written to all spares with a 'super_block' describing it.
384 * The superblock goes 1K form the end of the used space on the
385 * device.
386 * It if written after the backup is complete.
387 * It has the following structure.
388 */
389
390 struct mdp_backup_super {
391 char magic[16]; /* md_backup_data-1 */
392 __u8 set_uuid[16];
393 __u64 mtime;
394 /* start/sizes in 512byte sectors */
395 __u64 devstart;
396 __u64 arraystart;
397 __u64 length;
398 __u32 sb_csum; /* csum of preceeding bytes. */
399 __u8 pad[512-68];
400 } __attribute__((aligned(512))) bsb;
401
402 int bsb_csum(char *buf, int len)
403 {
404 int i;
405 int csum = 0;
406 for (i=0; i<len; i++)
407 csum = (csum<<3) + buf[0];
408 return __cpu_to_le32(csum);
409 }
410
411 int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
412 long long size,
413 int level, char *layout_str, int chunksize, int raid_disks)
414 {
415 /* Make some changes in the shape of an array.
416 * The kernel must support the change.
417 * Different reshapes have subtly different meaning for different
418 * levels, so we need to check the current state of the array
419 * and go from there.
420 */
421 struct mdu_array_info_s array;
422 char *c;
423
424 struct supertype *st;
425
426 int nlevel, olevel;
427 int nchunk, ochunk;
428 int nlayout, olayout;
429 int ndisks, odisks;
430 int ndata, odata;
431 unsigned long long nstripe, ostripe, last_block;
432 int *fdlist;
433 unsigned long long *offsets;
434 int d, i, spares;
435 int nrdisks;
436 int err;
437 char *buf;
438
439 struct mdinfo *sra;
440 struct mdinfo *sd;
441
442 if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
443 fprintf(stderr, Name ": %s is not an active md array - aborting\n",
444 devname);
445 return 1;
446 }
447 c = map_num(pers, array.level);
448 if (c == NULL) c = "-unknown-";
449 switch(array.level) {
450 default: /* raid0, linear, multipath cannot be reconfigured */
451 fprintf(stderr, Name ": %s array %s cannot be reshaped.\n",
452 c, devname);
453 return 1;
454
455 case LEVEL_FAULTY: /* only 'layout' change is permitted */
456
457 if (size >= 0) {
458 fprintf(stderr, Name ": %s: Cannot change size of a 'faulty' array\n",
459 devname);
460 return 1;
461 }
462 if (level != UnSet && level != LEVEL_FAULTY) {
463 fprintf(stderr, Name ": %s: Cannot change RAID level of a 'faulty' array\n",
464 devname);
465 return 1;
466 }
467 if (chunksize || raid_disks) {
468 fprintf(stderr, Name ": %s: Cannot change chunksize or disks of a 'faulty' array\n",
469 devname);
470 return 1;
471 }
472 if (layout_str == NULL)
473 return 0; /* nothing to do.... */
474
475 array.layout = parse_layout_faulty(layout_str);
476 if (array.layout < 0) {
477 fprintf(stderr, Name ": %s: layout %s not understood for 'faulty' array\n",
478 devname, layout_str);
479 return 1;
480 }
481 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
482 fprintf(stderr, Name ": Cannot set layout for %s: %s\n",
483 devname, strerror(errno));
484 return 1;
485 }
486 if (!quiet)
487 printf("layout for %s set to %d\n", devname, array.layout);
488 return 0;
489
490 case 1: /* raid_disks and size can each be changed. They are independant */
491
492 if (level != UnSet && level != 1) {
493 fprintf(stderr, Name ": %s: Cannot change RAID level of a RAID1 array.\n",
494 devname);
495 return 1;
496 }
497 if (chunksize || layout_str != NULL) {
498 fprintf(stderr, Name ": %s: Cannot change chunk size of layout for a RAID1 array.\n",
499 devname);
500 return 1;
501 }
502
503 /* Each can trigger a resync/recovery which will block the
504 * other from happening. Later we could block
505 * resync for the duration via 'sync_action'...
506 */
507 if (raid_disks > 0) {
508 array.raid_disks = raid_disks;
509 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
510 fprintf(stderr, Name ": Cannot set raid-devices for %s: %s\n",
511 devname, strerror(errno));
512 return 1;
513 }
514 }
515 if (size >= 0) {
516 int rv;
517 array.size = size;
518 if (array.size != size) {
519 /* got truncated to 32bit, write to
520 * component_size instead
521 */
522 sra = sysfs_read(fd, 0, 0);
523 if (sra)
524 rv = sysfs_set_num(sra, NULL,
525 "component_size", size);
526 else
527 rv = -1;
528 } else
529 rv = ioctl(fd, SET_ARRAY_INFO, &array);
530 if (rv != 0) {
531 fprintf(stderr, Name ": Cannot set device size for %s: %s\n",
532 devname, strerror(errno));
533 return 1;
534 }
535 }
536 return 0;
537
538 case 4:
539 case 5:
540 case 6:
541 st = super_by_fd(fd);
542
543 /* size can be changed independently.
544 * layout/chunksize/raid_disks/level can be changed
545 * though the kernel may not support it all.
546 * If 'suspend_lo' is not present in devfs, then
547 * these cannot be changed.
548 */
549 if (size >= 0) {
550 /* Cannot change other details as well.. */
551 if (layout_str != NULL ||
552 chunksize != 0 ||
553 raid_disks != 0 ||
554 level != UnSet) {
555 fprintf(stderr, Name ": %s: Cannot change shape as well as size of a %s array.\n",
556 devname, c);
557 return 1;
558 }
559 array.size = size;
560 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
561 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
562 devname, strerror(errno));
563 return 1;
564 }
565 return 0;
566 }
567 /* Ok, just change the shape. This can be awkward.
568 * There are three possibilities.
569 * 1/ The array will shrink. We don't support this
570 * possibility. Maybe one day...
571 * 2/ The array will not change size. This is easy enough
572 * to do, but not reliably. If the process is aborted
573 * the array *will* be corrupted. So maybe we can allow
574 * this but only if the user is really certain. e.g.
575 * --really-risk-everything
576 * 3/ The array will grow. This can be reliably achieved.
577 * However the kernel's restripe routines will cheerfully
578 * overwrite some early data before it is safe. So we
579 * need to make a backup of the early parts of the array
580 * and be ready to restore it if rebuild aborts very early.
581 *
582 * We backup data by writing it to all spares (there must be
583 * at least 1, so even raid6->raid5 requires a spare to be
584 * present).
585 *
586 * So: we enumerate the devices in the array and
587 * make sure we can open all of them.
588 * Then we freeze the early part of the array and
589 * backup to the various spares.
590 * Then we request changes and start the reshape.
591 * Monitor progress until it has passed the danger zone.
592 * and finally invalidate the copied data and unfreeze the
593 * start of the array.
594 *
595 * Before we can do this we need to decide:
596 * - will the array grow? Just calculate size
597 * - how much needs to be saved: count stripes.
598 * - where to save data... good question.
599 *
600 */
601 nlevel = olevel = array.level;
602 nchunk = ochunk = array.chunk_size;
603 nlayout = olayout = array.layout;
604 ndisks = odisks = array.raid_disks;
605
606 if (level != UnSet) nlevel = level;
607 if (chunksize) nchunk = chunksize;
608 if (layout_str != NULL)
609 switch(nlevel) {
610 case 4: /* ignore layout */
611 break;
612 case 5:
613 nlayout = map_name(r5layout, layout_str);
614 if (nlayout == UnSet) {
615 fprintf(stderr, Name ": layout %s not understood for raid5.\n",
616 layout_str);
617 return 1;
618 }
619 break;
620
621 case 6:
622 nlayout = map_name(r6layout, layout_str);
623 if (nlayout == UnSet) {
624 fprintf(stderr, Name ": layout %s not understood for raid6.\n",
625 layout_str);
626 return 1;
627 }
628 break;
629 }
630 if (raid_disks) ndisks = raid_disks;
631
632 odata = odisks-1;
633 if (olevel == 6) odata--; /* number of data disks */
634 ndata = ndisks-1;
635 if (nlevel == 6) ndata--;
636
637 if (ndata < odata) {
638 fprintf(stderr, Name ": %s: Cannot reduce number of data disks (yet).\n",
639 devname);
640 return 1;
641 }
642 if (ndata == odata) {
643 fprintf(stderr, Name ": %s: Cannot reshape array without increasing size (yet).\n",
644 devname);
645 return 1;
646 }
647 /* Well, it is growing... so how much do we need to backup.
648 * Need to backup a full number of new-stripes, such that the
649 * last one does not over-write any place that it would be read
650 * from
651 */
652 nstripe = ostripe = 0;
653 while (nstripe >= ostripe) {
654 nstripe += nchunk/512;
655 last_block = nstripe * ndata;
656 ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
657 }
658 fprintf(stderr, Name ": Need to backup %lluK of critical "
659 "section..\n", last_block/2);
660
661 sra = sysfs_read(fd, 0,
662 GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
663 GET_CACHE);
664 if (!sra) {
665 fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
666 devname);
667 return 1;
668 }
669
670 if (last_block >= sra->component_size/2) {
671 fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n",
672 devname);
673 return 1;
674 }
675 if (sra->array.spare_disks == 0 && backup_file == NULL) {
676 fprintf(stderr, Name ": %s: Cannot grow - need a spare or backup-file to backup critical section\n",
677 devname);
678 return 1;
679 }
680
681 nrdisks = array.nr_disks + sra->array.spare_disks;
682 /* Now we need to open all these devices so we can read/write.
683 */
684 fdlist = malloc((1+nrdisks) * sizeof(int));
685 offsets = malloc((1+nrdisks) * sizeof(offsets[0]));
686 if (!fdlist || !offsets) {
687 fprintf(stderr, Name ": malloc failed: grow aborted\n");
688 return 1;
689 }
690 for (d=0; d <= nrdisks; d++)
691 fdlist[d] = -1;
692 d = array.raid_disks;
693 for (sd = sra->devs; sd; sd=sd->next) {
694 if (sd->disk.state & (1<<MD_DISK_FAULTY))
695 continue;
696 if (sd->disk.state & (1<<MD_DISK_SYNC)) {
697 char *dn = map_dev(sd->disk.major,
698 sd->disk.minor, 1);
699 fdlist[sd->disk.raid_disk]
700 = dev_open(dn, O_RDONLY);
701 offsets[sd->disk.raid_disk] = sd->data_offset;
702 if (fdlist[sd->disk.raid_disk] < 0) {
703 fprintf(stderr, Name ": %s: cannot open component %s\n",
704 devname, dn?dn:"-unknown-");
705 goto abort;
706 }
707 } else {
708 /* spare */
709 char *dn = map_dev(sd->disk.major,
710 sd->disk.minor, 1);
711 fdlist[d] = dev_open(dn, O_RDWR);
712 offsets[d] = sd->data_offset;
713 if (fdlist[d]<0) {
714 fprintf(stderr, Name ": %s: cannot open component %s\n",
715 devname, dn?dn:"-unknown");
716 goto abort;
717 }
718 d++;
719 }
720 }
721 for (i=0 ; i<array.raid_disks; i++)
722 if (fdlist[i] < 0) {
723 fprintf(stderr, Name ": %s: failed to find device %d. Array might be degraded.\n"
724 " --grow aborted\n", devname, i);
725 goto abort;
726 }
727 spares = sra->array.spare_disks;
728 if (backup_file) {
729 fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, S_IRUSR | S_IWUSR);
730 if (fdlist[d] < 0) {
731 fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
732 devname, backup_file, strerror(errno));
733 goto abort;
734 }
735 offsets[d] = 8;
736 d++;
737 spares++;
738 }
739 if (fdlist[array.raid_disks] < 0) {
740 fprintf(stderr, Name ": %s: failed to find a spare and no backup-file given - --grow aborted\n",
741 devname);
742 goto abort;
743 }
744
745 /* Find a superblock */
746 if (st->ss->load_super(st, fdlist[0], NULL)) {
747 fprintf(stderr, Name ": %s: Cannot find a superblock\n",
748 devname);
749 goto abort;
750 }
751
752
753 memcpy(bsb.magic, "md_backup_data-1", 16);
754 st->ss->uuid_from_super(st, (int*)&bsb.set_uuid);
755 bsb.mtime = __cpu_to_le64(time(0));
756 bsb.arraystart = 0;
757 bsb.length = __cpu_to_le64(last_block);
758
759 /* Decide offset for the backup, llseek the spares, and write
760 * a leading superblock 4K earlier.
761 */
762 for (i=array.raid_disks; i<d; i++) {
763 char abuf[4096+512];
764 char *buf = (char*)(((unsigned long)abuf+511)& ~511);
765 if (i==d-1 && backup_file) {
766 /* This is the backup file */
767 offsets[i] = 8;
768 } else
769 offsets[i] += sra->component_size - last_block - 8;
770 if (lseek64(fdlist[i], (offsets[i]<<9) - 4096, 0)
771 != (offsets[i]<<9) - 4096) {
772 fprintf(stderr, Name ": could not seek...\n");
773 goto abort;
774 }
775 memset(buf, 0, 4096);
776 bsb.devstart = __cpu_to_le64(offsets[i]);
777 bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
778 memcpy(buf, &bsb, sizeof(bsb));
779 if (write(fdlist[i], buf, 4096) != 4096) {
780 fprintf(stderr, Name ": could not write leading superblock\n");
781 goto abort;
782 }
783 }
784 array.level = nlevel;
785 array.raid_disks = ndisks;
786 array.chunk_size = nchunk;
787 array.layout = nlayout;
788 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
789 if (errno == ENOSPC) {
790 /* stripe cache is not big enough.
791 * It needs to be 4 times chunksize_size,
792 * and we assume pagesize is 4K
793 */
794 if (sra->cache_size < 4 * (nchunk/4096)) {
795 sysfs_set_num(sra, NULL,
796 "stripe_cache_size",
797 4 * (nchunk/4096) +1);
798 if (ioctl(fd, SET_ARRAY_INFO,
799 &array) == 0)
800 goto ok;
801 }
802 }
803 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
804 devname, strerror(errno));
805 goto abort;
806 }
807 ok: ;
808
809 /* suspend the relevant region */
810 sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */
811 if (sysfs_set_num(sra, NULL, "suspend_lo", 0) < 0 ||
812 sysfs_set_num(sra, NULL, "suspend_hi", last_block) < 0) {
813 fprintf(stderr, Name ": %s: failed to suspend device.\n",
814 devname);
815 goto abort_resume;
816 }
817
818 buf = malloc(odisks * ochunk);
819
820 err = save_stripes(fdlist, offsets,
821 odisks, ochunk, olevel, olayout,
822 spares, fdlist+odisks,
823 0ULL, last_block*512, buf);
824
825 /* abort if there was an error */
826 if (err < 0) {
827 fprintf(stderr, Name ": %s: failed to save critical region\n",
828 devname);
829 goto abort_resume;
830 }
831
832 for (i=odisks; i<d ; i++) {
833 bsb.devstart = __cpu_to_le64(offsets[i]);
834 bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
835 if (lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0) < 0 ||
836 write(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb) ||
837 fsync(fdlist[i]) != 0) {
838 fprintf(stderr, Name ": %s: failed to save metadata for critical region backups.\n",
839 devname);
840 goto abort_resume;
841 }
842 }
843
844 /* start the reshape happening */
845 if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) {
846 fprintf(stderr, Name ": %s: failed to initiate reshape\n",
847 devname);
848 goto abort_resume;
849 }
850 /* wait for reshape to pass the critical region */
851 while(1) {
852 unsigned long long comp;
853
854 if (sysfs_get_ll(sra, NULL, "sync_completed", &comp)<0) {
855 sleep(5);
856 break;
857 }
858 if (comp >= nstripe)
859 break;
860 if (comp == 0) {
861 /* Maybe it finished already */
862 char action[20];
863 if (sysfs_get_str(sra, NULL, "sync_action",
864 action, 20) > 0 &&
865 strncmp(action, "reshape", 7) != 0)
866 break;
867 }
868 sleep(1);
869 }
870
871 /* invalidate superblocks */
872 memset(&bsb, 0, sizeof(bsb));
873 for (i=odisks; i<d ; i++) {
874 lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
875 if (write(fdlist[i], &bsb, sizeof(bsb)) < 0) {
876 fprintf(stderr, Name ": %s: failed to invalidate metadata for raid disk %d\n",
877 devname, i);
878 }
879 }
880
881 /* unsuspend. */
882 sysfs_set_num(sra, NULL, "suspend_lo", last_block);
883
884 for (i=0; i<d; i++)
885 if (fdlist[i] >= 0)
886 close(fdlist[i]);
887 free(fdlist);
888 free(offsets);
889 if (backup_file)
890 unlink(backup_file);
891
892 fprintf(stderr, Name ": ... critical section passed.\n");
893 break;
894 }
895 return 0;
896
897
898 abort_resume:
899 sysfs_set_num(sra, NULL, "suspend_lo", last_block);
900 abort:
901 for (i=0; i<array.nr_disks; i++)
902 if (fdlist[i] >= 0)
903 close(fdlist[i]);
904 free(fdlist);
905 free(offsets);
906 if (backup_file)
907 unlink(backup_file);
908 return 1;
909
910 }
911
912 /*
913 * If any spare contains md_back_data-1 which is recent wrt mtime,
914 * write that data into the array and update the super blocks with
915 * the new reshape_progress
916 */
917 int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt, char *backup_file)
918 {
919 int i, j;
920 int old_disks;
921 unsigned long long *offsets;
922 unsigned long long nstripe, ostripe, last_block;
923 int ndata, odata;
924
925 if (info->delta_disks < 0)
926 return 1; /* cannot handle a shrink */
927 if (info->new_level != info->array.level ||
928 info->new_layout != info->array.layout ||
929 info->new_chunk != info->array.chunk_size)
930 return 1; /* Can only handle change in disks */
931
932 old_disks = info->array.raid_disks - info->delta_disks;
933
934 for (i=old_disks-(backup_file?1:0); i<cnt; i++) {
935 struct mdinfo dinfo;
936 char buf[4096];
937 int fd;
938
939 /* This was a spare and may have some saved data on it.
940 * Load the superblock, find and load the
941 * backup_super_block.
942 * If either fail, go on to next device.
943 * If the backup contains no new info, just return
944 * else restore data and update all superblocks
945 */
946 if (i == old_disks-1) {
947 fd = open(backup_file, O_RDONLY);
948 if (fd<0)
949 continue;
950 } else {
951 fd = fdlist[i];
952 if (fd < 0)
953 continue;
954 if (st->ss->load_super(st, fd, NULL))
955 continue;
956
957 st->ss->getinfo_super(st, &dinfo);
958 st->ss->free_super(st);
959
960 if (lseek64(fd,
961 (dinfo.data_offset + dinfo.component_size - 8) <<9,
962 0) < 0)
963 continue; /* Cannot seek */
964 }
965 if (read(fd, &bsb, sizeof(bsb)) != sizeof(bsb))
966 continue; /* Cannot read */
967 if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0)
968 continue;
969 if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)))
970 continue; /* bad checksum */
971 if (memcmp(bsb.set_uuid,info->uuid, 16) != 0)
972 continue; /* Wrong uuid */
973
974 if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 ||
975 info->array.utime < __le64_to_cpu(bsb.mtime))
976 continue; /* time stamp is too bad */
977
978 if (__le64_to_cpu(bsb.arraystart) != 0)
979 continue; /* Can only handle backup from start of array */
980 if (__le64_to_cpu(bsb.length) <
981 info->reshape_progress)
982 continue; /* No new data here */
983
984 if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0)
985 continue; /* Cannot seek */
986 /* There should be a duplicate backup superblock 4k before here */
987 if (lseek64(fd, -4096, 1) < 0 ||
988 read(fd, buf, 4096) != 4096 ||
989 memcmp(buf, &bsb, sizeof(bsb)) != 0)
990 continue; /* Cannot find leading superblock */
991
992 /* Now need the data offsets for all devices. */
993 offsets = malloc(sizeof(*offsets)*info->array.raid_disks);
994 for(j=0; j<info->array.raid_disks; j++) {
995 if (fdlist[j] < 0)
996 continue;
997 if (st->ss->load_super(st, fdlist[j], NULL))
998 /* FIXME should be this be an error */
999 continue;
1000 st->ss->getinfo_super(st, &dinfo);
1001 st->ss->free_super(st);
1002 offsets[j] = dinfo.data_offset;
1003 }
1004 printf(Name ": restoring critical section\n");
1005
1006 if (restore_stripes(fdlist, offsets,
1007 info->array.raid_disks,
1008 info->new_chunk,
1009 info->new_level,
1010 info->new_layout,
1011 fd, __le64_to_cpu(bsb.devstart)*512,
1012 0, __le64_to_cpu(bsb.length)*512)) {
1013 /* didn't succeed, so giveup */
1014 return 1;
1015 }
1016
1017 /* Ok, so the data is restored. Let's update those superblocks. */
1018
1019 for (j=0; j<info->array.raid_disks; j++) {
1020 if (fdlist[j] < 0) continue;
1021 if (st->ss->load_super(st, fdlist[j], NULL))
1022 continue;
1023 st->ss->getinfo_super(st, &dinfo);
1024 dinfo.reshape_progress = __le64_to_cpu(bsb.length);
1025 st->ss->update_super(st, &dinfo,
1026 "_reshape_progress",
1027 NULL,0, 0, NULL);
1028 st->ss->store_super(st, fdlist[j]);
1029 st->ss->free_super(st);
1030 }
1031
1032 /* And we are done! */
1033 return 0;
1034 }
1035 /* Didn't find any backup data, try to see if any
1036 * was needed.
1037 */
1038 nstripe = ostripe = 0;
1039 odata = info->array.raid_disks - info->delta_disks - 1;
1040 if (info->array.level == 6) odata--; /* number of data disks */
1041 ndata = info->array.raid_disks - 1;
1042 if (info->new_level == 6) ndata--;
1043 last_block = 0;
1044 while (nstripe >= ostripe) {
1045 nstripe += info->new_chunk / 512;
1046 last_block = nstripe * ndata;
1047 ostripe = last_block / odata / (info->array.chunk_size/512) *
1048 (info->array.chunk_size/512);
1049 }
1050
1051 if (info->reshape_progress >= last_block)
1052 return 0;
1053 /* needed to recover critical section! */
1054 return 1;
1055 }