]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Grow.c
Remove spaces/tabs from ends of lines.
[thirdparty/mdadm.git] / Grow.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29 #include "mdadm.h"
30 #include "dlink.h"
31
32 #if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
33 #error no endian defined
34 #endif
35 #include "md_u.h"
36 #include "md_p.h"
37
38 int Grow_Add_device(char *devname, int fd, char *newdev)
39 {
40 /* Add a device to an active array.
41 * Currently, just extend a linear array.
42 * This requires writing a new superblock on the
43 * new device, calling the kernel to add the device,
44 * and if that succeeds, update the superblock on
45 * all other devices.
46 * This means that we need to *find* all other devices.
47 */
48 struct mdinfo info;
49
50 void *super = NULL;
51 struct stat stb;
52 int nfd, fd2;
53 int d, nd;
54 struct supertype *st = NULL;
55
56
57 if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
58 fprintf(stderr, Name ": cannot get array info for %s\n", devname);
59 return 1;
60 }
61
62 st = super_by_version(info.array.major_version, info.array.minor_version);
63 if (!st) {
64 fprintf(stderr, Name ": cannot handle arrays with superblock version %d\n", info.array.major_version);
65 return 1;
66 }
67
68 if (info.array.level != -1) {
69 fprintf(stderr, Name ": can only add devices to linear arrays\n");
70 return 1;
71 }
72
73 nfd = open(newdev, O_RDWR|O_EXCL);
74 if (nfd < 0) {
75 fprintf(stderr, Name ": cannot open %s\n", newdev);
76 return 1;
77 }
78 fstat(nfd, &stb);
79 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
80 fprintf(stderr, Name ": %s is not a block device!\n", newdev);
81 close(nfd);
82 return 1;
83 }
84 /* now check out all the devices and make sure we can read the superblock */
85 for (d=0 ; d < info.array.raid_disks ; d++) {
86 mdu_disk_info_t disk;
87 char *dv;
88
89 disk.number = d;
90 if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
91 fprintf(stderr, Name ": cannot get device detail for device %d\n",
92 d);
93 return 1;
94 }
95 dv = map_dev(disk.major, disk.minor, 1);
96 if (!dv) {
97 fprintf(stderr, Name ": cannot find device file for device %d\n",
98 d);
99 return 1;
100 }
101 fd2 = dev_open(dv, O_RDWR);
102 if (!fd2) {
103 fprintf(stderr, Name ": cannot open device file %s\n", dv);
104 return 1;
105 }
106 if (super) free(super);
107 super= NULL;
108 if (st->ss->load_super(st, fd2, &super, NULL)) {
109 fprintf(stderr, Name ": cannot find super block on %s\n", dv);
110 close(fd2);
111 return 1;
112 }
113 close(fd2);
114 }
115 /* Ok, looks good. Lets update the superblock and write it out to
116 * newdev.
117 */
118
119 info.disk.number = d;
120 info.disk.major = major(stb.st_rdev);
121 info.disk.minor = minor(stb.st_rdev);
122 info.disk.raid_disk = d;
123 info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
124 st->ss->update_super(&info, super, "linear-grow-new", newdev,
125 0, 0, NULL);
126
127 if (st->ss->store_super(st, nfd, super)) {
128 fprintf(stderr, Name ": Cannot store new superblock on %s\n",
129 newdev);
130 close(nfd);
131 return 1;
132 }
133 close(nfd);
134
135 if (ioctl(fd, ADD_NEW_DISK, &info.disk) != 0) {
136 fprintf(stderr, Name ": Cannot add new disk to this array\n");
137 return 1;
138 }
139 /* Well, that seems to have worked.
140 * Now go through and update all superblocks
141 */
142
143 if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
144 fprintf(stderr, Name ": cannot get array info for %s\n", devname);
145 return 1;
146 }
147
148 nd = d;
149 for (d=0 ; d < info.array.raid_disks ; d++) {
150 mdu_disk_info_t disk;
151 char *dv;
152
153 disk.number = d;
154 if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
155 fprintf(stderr, Name ": cannot get device detail for device %d\n",
156 d);
157 return 1;
158 }
159 dv = map_dev(disk.major, disk.minor, 1);
160 if (!dv) {
161 fprintf(stderr, Name ": cannot find device file for device %d\n",
162 d);
163 return 1;
164 }
165 fd2 = dev_open(dv, O_RDWR);
166 if (fd2 < 0) {
167 fprintf(stderr, Name ": cannot open device file %s\n", dv);
168 return 1;
169 }
170 if (st->ss->load_super(st, fd2, &super, NULL)) {
171 fprintf(stderr, Name ": cannot find super block on %s\n", dv);
172 close(fd);
173 return 1;
174 }
175 info.array.raid_disks = nd+1;
176 info.array.nr_disks = nd+1;
177 info.array.active_disks = nd+1;
178 info.array.working_disks = nd+1;
179
180 st->ss->update_super(&info, super, "linear-grow-update", dv,
181 0, 0, NULL);
182
183 if (st->ss->store_super(st, fd2, super)) {
184 fprintf(stderr, Name ": Cannot store new superblock on %s\n", dv);
185 close(fd2);
186 return 1;
187 }
188 close(fd2);
189 }
190
191 return 0;
192 }
193
194 int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force)
195 {
196 /*
197 * First check that array doesn't have a bitmap
198 * Then create the bitmap
199 * Then add it
200 *
201 * For internal bitmaps, we need to check the version,
202 * find all the active devices, and write the bitmap block
203 * to all devices
204 */
205 mdu_bitmap_file_t bmf;
206 mdu_array_info_t array;
207 struct supertype *st;
208 int major = BITMAP_MAJOR_HI;
209 int vers = md_get_version(fd);
210 unsigned long long bitmapsize, array_size;
211
212 if (vers < 9003) {
213 major = BITMAP_MAJOR_HOSTENDIAN;
214 #ifdef __BIG_ENDIAN
215 fprintf(stderr, Name ": Warning - bitmaps created on this kernel are not portable\n"
216 " between different architectured. Consider upgrading the Linux kernel.\n");
217 #endif
218 }
219
220 if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
221 if (errno == ENOMEM)
222 fprintf(stderr, Name ": Memory allocation failure.\n");
223 else
224 fprintf(stderr, Name ": bitmaps not supported by this kernel.\n");
225 return 1;
226 }
227 if (bmf.pathname[0]) {
228 if (strcmp(file,"none")==0) {
229 if (ioctl(fd, SET_BITMAP_FILE, -1)!= 0) {
230 fprintf(stderr, Name ": failed to remove bitmap %s\n",
231 bmf.pathname);
232 return 1;
233 }
234 return 0;
235 }
236 fprintf(stderr, Name ": %s already has a bitmap (%s)\n",
237 devname, bmf.pathname);
238 return 1;
239 }
240 if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
241 fprintf(stderr, Name ": cannot get array status for %s\n", devname);
242 return 1;
243 }
244 if (array.state & (1<<MD_SB_BITMAP_PRESENT)) {
245 if (strcmp(file, "none")==0) {
246 array.state &= ~(1<<MD_SB_BITMAP_PRESENT);
247 if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
248 fprintf(stderr, Name ": failed to remove internal bitmap.\n");
249 return 1;
250 }
251 return 0;
252 }
253 fprintf(stderr, Name ": Internal bitmap already present on %s\n",
254 devname);
255 return 1;
256 }
257 if (array.level <= 0) {
258 fprintf(stderr, Name ": Bitmaps not meaningful with level %s\n",
259 map_num(pers, array.level)?:"of this array");
260 return 1;
261 }
262 bitmapsize = array.size;
263 bitmapsize <<= 1;
264 if (get_dev_size(fd, NULL, &array_size) &&
265 array_size > (0x7fffffffULL<<9)) {
266 /* Array is big enough that we cannot trust array.size
267 * try other approaches
268 */
269 bitmapsize = get_component_size(fd);
270 }
271 if (bitmapsize == 0) {
272 fprintf(stderr, Name ": Cannot reliably determine size of array to create bitmap - sorry.\n");
273 return 1;
274 }
275
276 if (array.level == 10) {
277 int ncopies = (array.layout&255)*((array.layout>>8)&255);
278 bitmapsize = bitmapsize * array.raid_disks / ncopies;
279 }
280
281 st = super_by_version(array.major_version, array.minor_version);
282 if (!st) {
283 fprintf(stderr, Name ": Cannot understand version %d.%d\n",
284 array.major_version, array.minor_version);
285 return 1;
286 }
287 if (strcmp(file, "none") == 0) {
288 fprintf(stderr, Name ": no bitmap found on %s\n", devname);
289 return 1;
290 } else if (strcmp(file, "internal") == 0) {
291 int d;
292 for (d=0; d< st->max_devs; d++) {
293 mdu_disk_info_t disk;
294 char *dv;
295 disk.number = d;
296 if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
297 continue;
298 if (disk.major == 0 &&
299 disk.minor == 0)
300 continue;
301 if ((disk.state & (1<<MD_DISK_SYNC))==0)
302 continue;
303 dv = map_dev(disk.major, disk.minor, 1);
304 if (dv) {
305 void *super;
306 int fd2 = dev_open(dv, O_RDWR);
307 if (fd2 < 0)
308 continue;
309 if (st->ss->load_super(st, fd2, &super, NULL)==0) {
310 if (st->ss->add_internal_bitmap(
311 st, super,
312 &chunk, delay, write_behind,
313 bitmapsize, 0, major)
314 )
315 st->ss->write_bitmap(st, fd2, super);
316 else {
317 fprintf(stderr, Name ": failed to create internal bitmap - chunksize problem.\n");
318 close(fd2);
319 return 1;
320 }
321 }
322 close(fd2);
323 }
324 }
325 array.state |= (1<<MD_SB_BITMAP_PRESENT);
326 if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
327 fprintf(stderr, Name ": failed to set internal bitmap.\n");
328 return 1;
329 }
330 } else {
331 int uuid[4];
332 int bitmap_fd;
333 int d;
334 int max_devs = st->max_devs;
335 void *super = NULL;
336
337 /* try to load a superblock */
338 for (d=0; d<max_devs; d++) {
339 mdu_disk_info_t disk;
340 char *dv;
341 int fd2;
342 disk.number = d;
343 if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
344 continue;
345 if ((disk.major==0 && disk.minor==0) ||
346 (disk.state & (1<<MD_DISK_REMOVED)))
347 continue;
348 dv = map_dev(disk.major, disk.minor, 1);
349 if (!dv) continue;
350 fd2 = dev_open(dv, O_RDONLY);
351 if (fd2 >= 0 &&
352 st->ss->load_super(st, fd2, &super, NULL) == 0) {
353 close(fd2);
354 st->ss->uuid_from_super(uuid, super);
355 break;
356 }
357 close(fd2);
358 }
359 if (d == max_devs) {
360 fprintf(stderr, Name ": cannot find UUID for array!\n");
361 return 1;
362 }
363 if (CreateBitmap(file, force, (char*)uuid, chunk,
364 delay, write_behind, bitmapsize, major)) {
365 return 1;
366 }
367 bitmap_fd = open(file, O_RDWR);
368 if (bitmap_fd < 0) {
369 fprintf(stderr, Name ": weird: %s cannot be opened\n",
370 file);
371 return 1;
372 }
373 if (ioctl(fd, SET_BITMAP_FILE, bitmap_fd) < 0) {
374 fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
375 devname, strerror(errno));
376 return 1;
377 }
378 }
379
380 return 0;
381 }
382
383
384 /*
385 * When reshaping an array we might need to backup some data.
386 * This is written to all spares with a 'super_block' describing it.
387 * The superblock goes 1K form the end of the used space on the
388 * device.
389 * It if written after the backup is complete.
390 * It has the following structure.
391 */
392
393 struct mdp_backup_super {
394 char magic[16]; /* md_backup_data-1 */
395 __u8 set_uuid[16];
396 __u64 mtime;
397 /* start/sizes in 512byte sectors */
398 __u64 devstart;
399 __u64 arraystart;
400 __u64 length;
401 __u32 sb_csum; /* csum of preceeding bytes. */
402 };
403
404 int bsb_csum(char *buf, int len)
405 {
406 int i;
407 int csum = 0;
408 for (i=0; i<len; i++)
409 csum = (csum<<3) + buf[0];
410 return __cpu_to_le32(csum);
411 }
412
413 int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
414 long long size,
415 int level, int layout, int chunksize, int raid_disks)
416 {
417 /* Make some changes in the shape of an array.
418 * The kernel must support the change.
419 * Different reshapes have subtly different meaning for different
420 * levels, so we need to check the current state of the array
421 * and go from there.
422 */
423 struct mdu_array_info_s array;
424 char *c;
425
426 struct mdp_backup_super bsb;
427 struct supertype *st;
428
429 int nlevel, olevel;
430 int nchunk, ochunk;
431 int nlayout, olayout;
432 int ndisks, odisks;
433 int ndata, odata;
434 unsigned long long nstripe, ostripe, last_block;
435 int *fdlist;
436 unsigned long long *offsets;
437 int d, i, spares;
438 int nrdisks;
439 int err;
440 void *super = NULL;
441
442 struct sysarray *sra;
443 struct sysdev *sd;
444
445 if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
446 fprintf(stderr, Name ": %s is not an active md array - aborting\n",
447 devname);
448 return 1;
449 }
450 c = map_num(pers, array.level);
451 if (c == NULL) c = "-unknown-";
452 switch(array.level) {
453 default: /* raid0, linear, multipath cannot be reconfigured */
454 fprintf(stderr, Name ": %s array %s cannot be reshaped.\n",
455 c, devname);
456 return 1;
457
458 case LEVEL_FAULTY: /* only 'layout' change is permitted */
459
460 if (size >= 0) {
461 fprintf(stderr, Name ": %s: Cannot change size of a 'faulty' array\n",
462 devname);
463 return 1;
464 }
465 if (level != UnSet && level != LEVEL_FAULTY) {
466 fprintf(stderr, Name ": %s: Cannot change RAID level of a 'faulty' array\n",
467 devname);
468 return 1;
469 }
470 if (chunksize || raid_disks) {
471 fprintf(stderr, Name ": %s: Cannot change chunksize or disks of a 'faulty' array\n",
472 devname);
473 return 1;
474 }
475 if (layout == UnSet)
476 return 0; /* nothing to do.... */
477
478 array.layout = layout;
479 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
480 fprintf(stderr, Name ": Cannot set layout for %s: %s\n",
481 devname, strerror(errno));
482 return 1;
483 }
484 if (!quiet)
485 printf("layout for %s set to %d\n", devname, array.layout);
486 return 0;
487
488 case 1: /* raid_disks and size can each be changed. They are independant */
489
490 if (level != UnSet && level != 1) {
491 fprintf(stderr, Name ": %s: Cannot change RAID level of a RAID1 array.\n",
492 devname);
493 return 1;
494 }
495 if (chunksize || layout != UnSet) {
496 fprintf(stderr, Name ": %s: Cannot change chunk size of layout for a RAID1 array.\n",
497 devname);
498 return 1;
499 }
500
501 /* Each can trigger a resync/recovery which will block the
502 * other from happening. Later we could block
503 * resync for the duration via 'sync_action'...
504 */
505 if (raid_disks > 0) {
506 array.raid_disks = raid_disks;
507 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
508 fprintf(stderr, Name ": Cannot set raid-devices for %s: %s\n",
509 devname, strerror(errno));
510 return 1;
511 }
512 }
513 if (size >= 0) {
514 array.size = size;
515 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
516 fprintf(stderr, Name ": Cannot set device size for %s: %s\n",
517 devname, strerror(errno));
518 return 1;
519 }
520 }
521 return 0;
522
523 case 4:
524 case 5:
525 case 6:
526 st = super_by_version(array.major_version,
527 array.minor_version);
528 /* size can be changed independently.
529 * layout/chunksize/raid_disks/level can be changed
530 * though the kernel may not support it all.
531 * If 'suspend_lo' is not present in devfs, then
532 * these cannot be changed.
533 */
534 if (size >= 0) {
535 /* Cannot change other details as well.. */
536 if (layout != UnSet ||
537 chunksize != 0 ||
538 raid_disks != 0 ||
539 level != UnSet) {
540 fprintf(stderr, Name ": %s: Cannot change shape as well as size of a %s array.\n",
541 devname, c);
542 return 1;
543 }
544 array.size = size;
545 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
546 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
547 devname, strerror(errno));
548 return 1;
549 }
550 return 0;
551 }
552 /* Ok, just change the shape. This can be awkward.
553 * There are three possibilities.
554 * 1/ The array will shrink. We don't support this
555 * possibility. Maybe one day...
556 * 2/ The array will not change size. This is easy enough
557 * to do, but not reliably. If the process is aborted
558 * the array *will* be corrupted. So maybe we can allow
559 * this but only if the user is really certain. e.g.
560 * --really-risk-everything
561 * 3/ The array will grow. This can be reliably achieved.
562 * However the kernel's restripe routines will cheerfully
563 * overwrite some early data before it is safe. So we
564 * need to make a backup of the early parts of the array
565 * and be ready to restore it if rebuild aborts very early.
566 *
567 * We backup data by writing it to all spares (there must be
568 * at least 1, so even raid6->raid5 requires a spare to be
569 * present).
570 *
571 * So: we enumerate the devices in the array and
572 * make sure we can open all of them.
573 * Then we freeze the early part of the array and
574 * backup to the various spares.
575 * Then we request changes and start the reshape.
576 * Monitor progress until it has passed the danger zone.
577 * and finally invalidate the copied data and unfreeze the
578 * start of the array.
579 *
580 * Before we can do this we need to decide:
581 * - will the array grow? Just calculate size
582 * - how much needs to be saved: count stripes.
583 * - where to save data... good question.
584 *
585 */
586 nlevel = olevel = array.level;
587 nchunk = ochunk = array.chunk_size;
588 nlayout = olayout = array.layout;
589 ndisks = odisks = array.raid_disks;
590
591 if (level != UnSet) nlevel = level;
592 if (chunksize) nchunk = chunksize;
593 if (layout != UnSet) nlayout = layout;
594 if (raid_disks) ndisks = raid_disks;
595
596 odata = odisks-1;
597 if (olevel == 6) odata--; /* number of data disks */
598 ndata = ndisks-1;
599 if (nlevel == 6) ndata--;
600
601 if (ndata < odata) {
602 fprintf(stderr, Name ": %s: Cannot reduce number of data disks (yet).\n",
603 devname);
604 return 1;
605 }
606 if (ndata == odata) {
607 fprintf(stderr, Name ": %s: Cannot reshape array without increasing size (yet).\n",
608 devname);
609 return 1;
610 }
611 /* Well, it is growing... so how much do we need to backup.
612 * Need to backup a full number of new-stripes, such that the
613 * last one does not over-write any place that it would be read
614 * from
615 */
616 nstripe = ostripe = 0;
617 while (nstripe >= ostripe) {
618 nstripe += nchunk/512;
619 last_block = nstripe * ndata;
620 ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
621 }
622 printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2);
623
624 sra = sysfs_read(fd, 0,
625 GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
626 GET_CACHE);
627 if (!sra) {
628 fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
629 devname);
630 return 1;
631 }
632
633 if (last_block >= sra->component_size/2) {
634 fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n",
635 devname);
636 return 1;
637 }
638 if (sra->spares == 0 && backup_file == NULL) {
639 fprintf(stderr, Name ": %s: Cannot grow - need a spare or backup-file to backup critical section\n",
640 devname);
641 return 1;
642 }
643
644 nrdisks = array.nr_disks + sra->spares;
645 /* Now we need to open all these devices so we can read/write.
646 */
647 fdlist = malloc((1+nrdisks) * sizeof(int));
648 offsets = malloc((1+nrdisks) * sizeof(offsets[0]));
649 if (!fdlist || !offsets) {
650 fprintf(stderr, Name ": malloc failed: grow aborted\n");
651 return 1;
652 }
653 for (d=0; d <= nrdisks; d++)
654 fdlist[d] = -1;
655 d = array.raid_disks;
656 for (sd = sra->devs; sd; sd=sd->next) {
657 if (sd->state & (1<<MD_DISK_FAULTY))
658 continue;
659 if (sd->state & (1<<MD_DISK_SYNC)) {
660 char *dn = map_dev(sd->major, sd->minor, 1);
661 fdlist[sd->role] = dev_open(dn, O_RDONLY);
662 offsets[sd->role] = sd->offset;
663 if (fdlist[sd->role] < 0) {
664 fprintf(stderr, Name ": %s: cannot open component %s\n",
665 devname, dn?dn:"-unknown-");
666 goto abort;
667 }
668 } else {
669 /* spare */
670 char *dn = map_dev(sd->major, sd->minor, 1);
671 fdlist[d] = dev_open(dn, O_RDWR);
672 offsets[d] = sd->offset;
673 if (fdlist[d]<0) {
674 fprintf(stderr, Name ": %s: cannot open component %s\n",
675 devname, dn?dn:"-unknown");
676 goto abort;
677 }
678 d++;
679 }
680 }
681 for (i=0 ; i<array.raid_disks; i++)
682 if (fdlist[i] < 0) {
683 fprintf(stderr, Name ": %s: failed to find device %d. Array might be degraded.\n"
684 " --grow aborted\n", devname, i);
685 goto abort;
686 }
687 spares = sra->spares;
688 if (backup_file) {
689 fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, 0600);
690 if (fdlist[d] < 0) {
691 fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
692 devname, backup_file, strerror(errno));
693 goto abort;
694 }
695 offsets[d] = 8;
696 d++;
697 spares++;
698 }
699 if (fdlist[array.raid_disks] < 0) {
700 fprintf(stderr, Name ": %s: failed to find a spare and no backup-file given - --grow aborted\n",
701 devname);
702 goto abort;
703 }
704
705 /* Find a superblock */
706 if (st->ss->load_super(st, fdlist[0], &super, NULL)) {
707 fprintf(stderr, Name ": %s: Cannot find a superblock\n",
708 devname);
709 goto abort;
710 }
711
712
713 memcpy(bsb.magic, "md_backup_data-1", 16);
714 st->ss->uuid_from_super((int*)&bsb.set_uuid, super);
715 bsb.mtime = __cpu_to_le64(time(0));
716 bsb.arraystart = 0;
717 bsb.length = __cpu_to_le64(last_block);
718
719 /* Decide offset for the backup, llseek the spares, and write
720 * a leading superblock 4K earlier.
721 */
722 for (i=array.raid_disks; i<d; i++) {
723 char buf[4096];
724 if (i==d-1 && backup_file) {
725 /* This is the backup file */
726 offsets[i] = 8;
727 } else
728 offsets[i] += sra->component_size - last_block - 8;
729 if (lseek64(fdlist[i], (offsets[i]<<9) - 4096, 0)
730 != (offsets[i]<<9) - 4096) {
731 fprintf(stderr, Name ": could not seek...\n");
732 goto abort;
733 }
734 memset(buf, 0, sizeof(buf));
735 bsb.devstart = __cpu_to_le64(offsets[i]);
736 bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
737 memcpy(buf, &bsb, sizeof(bsb));
738 if (write(fdlist[i], buf, 4096) != 4096) {
739 fprintf(stderr, Name ": could not write leading superblock\n");
740 goto abort;
741 }
742 }
743 array.level = nlevel;
744 array.raid_disks = ndisks;
745 array.chunk_size = nchunk;
746 array.layout = nlayout;
747 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
748 if (errno == ENOSPC) {
749 /* stripe cache is not big enough.
750 * It needs to be 4 times chunksize_size,
751 * and we assume pagesize is 4K
752 */
753 if (sra->cache_size < 4 * (nchunk/4096)) {
754 sysfs_set_num(sra, NULL,
755 "stripe_cache_size",
756 4 * (nchunk/4096) +1);
757 if (ioctl(fd, SET_ARRAY_INFO,
758 &array) == 0)
759 goto ok;
760 }
761 }
762 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
763 devname, strerror(errno));
764 goto abort;
765 }
766 ok: ;
767
768 /* suspend the relevant region */
769 sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */
770 if (sysfs_set_num(sra, NULL, "suspend_lo", 0) < 0 ||
771 sysfs_set_num(sra, NULL, "suspend_hi", last_block) < 0) {
772 fprintf(stderr, Name ": %s: failed to suspend device.\n",
773 devname);
774 goto abort_resume;
775 }
776
777
778 err = save_stripes(fdlist, offsets,
779 odisks, ochunk, olevel, olayout,
780 spares, fdlist+odisks,
781 0ULL, last_block*512);
782
783 /* abort if there was an error */
784 if (err < 0) {
785 fprintf(stderr, Name ": %s: failed to save critical region\n",
786 devname);
787 goto abort_resume;
788 }
789
790 for (i=odisks; i<d ; i++) {
791 bsb.devstart = __cpu_to_le64(offsets[i]);
792 bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
793 if (lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0) < 0 ||
794 write(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb) ||
795 fsync(fdlist[i]) != 0) {
796 fprintf(stderr, Name ": %s: fail to save metadata for critical region backups.\n",
797 devname);
798 goto abort_resume;
799 }
800 }
801
802 /* start the reshape happening */
803 if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) {
804 fprintf(stderr, Name ": %s: failed to initiate reshape\n",
805 devname);
806 goto abort_resume;
807 }
808 /* wait for reshape to pass the critical region */
809 while(1) {
810 unsigned long long comp;
811 if (sysfs_get_ll(sra, NULL, "sync_completed", &comp)<0) {
812 sleep(5);
813 break;
814 }
815 if (comp >= nstripe)
816 break;
817 sleep(1);
818 }
819
820 /* invalidate superblocks */
821 memset(&bsb, 0, sizeof(bsb));
822 for (i=odisks; i<d ; i++) {
823 lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
824 if (write(fdlist[i], &bsb, sizeof(bsb)) < 0) {
825 fprintf(stderr, Name ": %s: failed to invalidate metadata for raid disk %d\n",
826 devname, i);
827 }
828 }
829
830 /* unsuspend. */
831 sysfs_set_num(sra, NULL, "suspend_lo", last_block);
832
833 for (i=0; i<d; i++)
834 if (fdlist[i] >= 0)
835 close(fdlist[i]);
836 free(fdlist);
837 free(offsets);
838 if (backup_file)
839 unlink(backup_file);
840
841 printf(Name ": ... critical section passed.\n");
842 break;
843 }
844 return 0;
845
846
847 abort_resume:
848 sysfs_set_num(sra, NULL, "suspend_lo", last_block);
849 abort:
850 for (i=0; i<array.nr_disks; i++)
851 if (fdlist[i] >= 0)
852 close(fdlist[i]);
853 free(fdlist);
854 free(offsets);
855 if (backup_file)
856 unlink(backup_file);
857 return 1;
858
859 }
860
861 /*
862 * If any spare contains md_back_data-1 which is recent wrt mtime,
863 * write that data into the array and update the super blocks with
864 * the new reshape_progress
865 */
866 int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt, char *backup_file)
867 {
868 int i, j;
869 int old_disks;
870 unsigned long long *offsets;
871 unsigned long long nstripe, ostripe, last_block;
872 int ndata, odata;
873
874 if (info->delta_disks < 0)
875 return 1; /* cannot handle a shrink */
876 if (info->new_level != info->array.level ||
877 info->new_layout != info->array.layout ||
878 info->new_chunk != info->array.chunk_size)
879 return 1; /* Can only handle change in disks */
880
881 old_disks = info->array.raid_disks - info->delta_disks;
882
883 for (i=old_disks-(backup_file?1:0); i<cnt; i++) {
884 void *super = NULL;
885 struct mdinfo dinfo;
886 struct mdp_backup_super bsb;
887 char buf[4096];
888 int fd;
889
890 /* This was a spare and may have some saved data on it.
891 * Load the superblock, find and load the
892 * backup_super_block.
893 * If either fail, go on to next device.
894 * If the backup contains no new info, just return
895 * else restore data and update all superblocks
896 */
897 if (i == old_disks-1) {
898 fd = open(backup_file, O_RDONLY);
899 if (fd<0)
900 continue;
901 } else {
902 fd = fdlist[i];
903 if (fd < 0)
904 continue;
905 if (st->ss->load_super(st, fd, &super, NULL))
906 continue;
907
908 st->ss->getinfo_super(&dinfo, super);
909 free(super); super = NULL;
910 if (lseek64(fd,
911 (dinfo.data_offset + dinfo.component_size - 8) <<9,
912 0) < 0)
913 continue; /* Cannot seek */
914 }
915 if (read(fd, &bsb, sizeof(bsb)) != sizeof(bsb))
916 continue; /* Cannot read */
917 if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0)
918 continue;
919 if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)))
920 continue; /* bad checksum */
921 if (memcmp(bsb.set_uuid,info->uuid, 16) != 0)
922 continue; /* Wrong uuid */
923
924 if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 ||
925 info->array.utime < __le64_to_cpu(bsb.mtime))
926 continue; /* time stamp is too bad */
927
928 if (__le64_to_cpu(bsb.arraystart) != 0)
929 continue; /* Can only handle backup from start of array */
930 if (__le64_to_cpu(bsb.length) <
931 info->reshape_progress)
932 continue; /* No new data here */
933
934 if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0)
935 continue; /* Cannot seek */
936 /* There should be a duplicate backup superblock 4k before here */
937 if (lseek64(fd, -4096, 1) < 0 ||
938 read(fd, buf, 4096) != 4096 ||
939 memcmp(buf, &bsb, sizeof(bsb)) != 0)
940 continue; /* Cannot find leading superblock */
941
942 /* Now need the data offsets for all devices. */
943 offsets = malloc(sizeof(*offsets)*info->array.raid_disks);
944 for(j=0; j<info->array.raid_disks; j++) {
945 if (fdlist[j] < 0)
946 continue;
947 if (st->ss->load_super(st, fdlist[j], &super, NULL))
948 /* FIXME should be this be an error */
949 continue;
950 st->ss->getinfo_super(&dinfo, super);
951 free(super); super = NULL;
952 offsets[j] = dinfo.data_offset;
953 }
954 printf(Name ": restoring critical section\n");
955
956 if (restore_stripes(fdlist, offsets,
957 info->array.raid_disks,
958 info->new_chunk,
959 info->new_level,
960 info->new_layout,
961 fd, __le64_to_cpu(bsb.devstart)*512,
962 0, __le64_to_cpu(bsb.length)*512)) {
963 /* didn't succeed, so giveup */
964 return 1;
965 }
966
967 /* Ok, so the data is restored. Let's update those superblocks. */
968
969 for (j=0; j<info->array.raid_disks; j++) {
970 if (fdlist[j] < 0) continue;
971 if (st->ss->load_super(st, fdlist[j], &super, NULL))
972 continue;
973 st->ss->getinfo_super(&dinfo, super);
974 dinfo.reshape_progress = __le64_to_cpu(bsb.length);
975 st->ss->update_super(&dinfo, super, "_reshape_progress",NULL,0, 0, NULL);
976 st->ss->store_super(st, fdlist[j], super);
977 free(super);
978 }
979
980 /* And we are done! */
981 return 0;
982 }
983 /* Didn't find any backup data, try to see if any
984 * was needed.
985 */
986 nstripe = ostripe = 0;
987 odata = info->array.raid_disks - info->delta_disks - 1;
988 if (info->array.level == 6) odata--; /* number of data disks */
989 ndata = info->array.raid_disks - 1;
990 if (info->new_level == 6) ndata--;
991 last_block = 0;
992 while (nstripe >= ostripe) {
993 nstripe += info->new_chunk / 512;
994 last_block = nstripe * ndata;
995 ostripe = last_block / odata / (info->array.chunk_size/512) *
996 (info->array.chunk_size/512);
997 }
998
999 if (info->reshape_progress >= last_block)
1000 return 0;
1001 /* needed to recover critical section! */
1002 return 1;
1003 }