Add ANNOUNCE-2.4-pre1
[thirdparty/mdadm.git] / Grow.c
CommitLineData
e5329c37
NB
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2004 Neil Brown <neilb@cse.unsw.edu.au>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29#include "mdadm.h"
30#include "dlink.h"
31
32#if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
33#error no endian defined
34#endif
35#include "md_u.h"
36#include "md_p.h"
37
38int Grow_Add_device(char *devname, int fd, char *newdev)
39{
40 /* Add a device to an active array.
41 * Currently, just extend a linear array.
42 * This requires writing a new superblock on the
43 * new device, calling the kernel to add the device,
44 * and if that succeeds, update the superblock on
45 * all other devices.
46 * This means that we need to *find* all other devices.
47 */
4b1ac34b
NB
48 struct mdinfo info;
49
50 void *super = NULL;
e5329c37
NB
51 struct stat stb;
52 int nfd, fd2;
53 int d, nd;
82d9eba6 54 struct supertype *st = NULL;
e5329c37
NB
55
56
4b1ac34b 57 if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
e5329c37
NB
58 fprintf(stderr, Name ": cannot get array info for %s\n", devname);
59 return 1;
60 }
61
82d9eba6
NB
62 st = super_by_version(info.array.major_version, info.array.minor_version);
63 if (!st) {
f9ce90ba
NB
64 fprintf(stderr, Name ": cannot handle arrays with superblock version %d\n", info.array.major_version);
65 return 1;
66 }
67
4b1ac34b 68 if (info.array.level != -1) {
e5329c37
NB
69 fprintf(stderr, Name ": can only add devices to linear arrays\n");
70 return 1;
71 }
72
73 nfd = open(newdev, O_RDWR|O_EXCL);
74 if (nfd < 0) {
75 fprintf(stderr, Name ": cannot open %s\n", newdev);
76 return 1;
77 }
78 fstat(nfd, &stb);
79 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
80 fprintf(stderr, Name ": %s is not a block device!\n", newdev);
81 close(nfd);
82 return 1;
83 }
84 /* now check out all the devices and make sure we can read the superblock */
4b1ac34b 85 for (d=0 ; d < info.array.raid_disks ; d++) {
e5329c37
NB
86 mdu_disk_info_t disk;
87 char *dv;
88
89 disk.number = d;
90 if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
91 fprintf(stderr, Name ": cannot get device detail for device %d\n",
92 d);
93 return 1;
94 }
95 dv = map_dev(disk.major, disk.minor);
96 if (!dv) {
97 fprintf(stderr, Name ": cannot find device file for device %d\n",
98 d);
99 return 1;
100 }
101 fd2 = open(dv, O_RDWR);
102 if (!fd2) {
103 fprintf(stderr, Name ": cannot open device file %s\n", dv);
104 return 1;
105 }
4b1ac34b
NB
106 if (super) free(super);
107 super= NULL;
82d9eba6 108 if (st->ss->load_super(st, fd2, &super, NULL)) {
e5329c37
NB
109 fprintf(stderr, Name ": cannot find super block on %s\n", dv);
110 close(fd2);
111 return 1;
112 }
113 close(fd2);
114 }
115 /* Ok, looks good. Lets update the superblock and write it out to
116 * newdev.
117 */
118
4b1ac34b
NB
119 info.disk.number = d;
120 info.disk.major = major(stb.st_rdev);
121 info.disk.minor = minor(stb.st_rdev);
122 info.disk.raid_disk = d;
123 info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
82d9eba6 124 st->ss->update_super(&info, super, "grow", newdev, 0);
e5329c37 125
96395475 126 if (st->ss->store_super(st, nfd, super)) {
e5329c37
NB
127 fprintf(stderr, Name ": Cannot store new superblock on %s\n", newdev);
128 close(nfd);
129 return 1;
130 }
e5329c37 131 close(nfd);
4b1ac34b
NB
132
133 if (ioctl(fd, ADD_NEW_DISK, &info.disk) != 0) {
e5329c37
NB
134 fprintf(stderr, Name ": Cannot add new disk to this array\n");
135 return 1;
136 }
137 /* Well, that seems to have worked.
138 * Now go through and update all superblocks
139 */
140
4b1ac34b 141 if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
e5329c37
NB
142 fprintf(stderr, Name ": cannot get array info for %s\n", devname);
143 return 1;
144 }
145
146 nd = d;
4b1ac34b 147 for (d=0 ; d < info.array.raid_disks ; d++) {
e5329c37
NB
148 mdu_disk_info_t disk;
149 char *dv;
150
151 disk.number = d;
152 if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
153 fprintf(stderr, Name ": cannot get device detail for device %d\n",
154 d);
155 return 1;
156 }
157 dv = map_dev(disk.major, disk.minor);
158 if (!dv) {
159 fprintf(stderr, Name ": cannot find device file for device %d\n",
160 d);
161 return 1;
162 }
163 fd2 = open(dv, O_RDWR);
164 if (fd2 < 0) {
165 fprintf(stderr, Name ": cannot open device file %s\n", dv);
166 return 1;
167 }
82d9eba6 168 if (st->ss->load_super(st, fd2, &super, NULL)) {
e5329c37
NB
169 fprintf(stderr, Name ": cannot find super block on %s\n", dv);
170 close(fd);
171 return 1;
172 }
4b1ac34b
NB
173 info.array.raid_disks = nd+1;
174 info.array.nr_disks = nd+1;
175 info.array.active_disks = nd+1;
176 info.array.working_disks = nd+1;
177 info.disk.number = nd;
178 info.disk.major = major(stb.st_rdev);
179 info.disk.minor = minor(stb.st_rdev);
180 info.disk.raid_disk = nd;
181 info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
82d9eba6 182 st->ss->update_super(&info, super, "grow", dv, 0);
4b1ac34b 183
96395475 184 if (st->ss->store_super(st, fd2, super)) {
e5329c37
NB
185 fprintf(stderr, Name ": Cannot store new superblock on %s\n", dv);
186 close(fd2);
187 return 1;
188 }
189 close(fd2);
190 }
191
192 return 0;
193}
f5e166fe 194
8fac0577 195int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force)
f5e166fe
NB
196{
197 /*
198 * First check that array doesn't have a bitmap
199 * Then create the bitmap
200 * Then add it
201 *
202 * For internal bitmaps, we need to check the version,
203 * find all the active devices, and write the bitmap block
204 * to all devices
205 */
206 mdu_bitmap_file_t bmf;
207 mdu_array_info_t array;
208 struct supertype *st;
dcec9ee5
NB
209 int major = BITMAP_MAJOR_HI;
210 int vers = md_get_version(fd);
8fac0577 211 unsigned long long bitmapsize, array_size;
dcec9ee5
NB
212
213 if (vers < 9003) {
214 major = BITMAP_MAJOR_HOSTENDIAN;
215#ifdef __BIG_ENDIAN
216 fprintf(stderr, Name ": Warning - bitmaps created on this kernel are not portable\n"
217 " between different architectured. Consider upgrading the Linux kernel.\n");
218#endif
219 }
f5e166fe
NB
220
221 if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
353632d9 222 if (errno == ENOMEM)
f5e166fe
NB
223 fprintf(stderr, Name ": Memory allocation failure.\n");
224 else
225 fprintf(stderr, Name ": bitmaps not supported by this kernel.\n");
226 return 1;
227 }
228 if (bmf.pathname[0]) {
fe80f49b
NB
229 if (strcmp(file,"none")==0) {
230 if (ioctl(fd, SET_BITMAP_FILE, -1)!= 0) {
231 fprintf(stderr, Name ": failed to remove bitmap %s\n",
232 bmf.pathname);
233 return 1;
234 }
235 return 0;
236 }
f5e166fe
NB
237 fprintf(stderr, Name ": %s already has a bitmap (%s)\n",
238 devname, bmf.pathname);
239 return 1;
240 }
241 if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
242 fprintf(stderr, Name ": cannot get array status for %s\n", devname);
243 return 1;
244 }
245 if (array.state & (1<<MD_SB_BITMAP_PRESENT)) {
fe80f49b
NB
246 if (strcmp(file, "none")==0) {
247 array.state &= ~(1<<MD_SB_BITMAP_PRESENT);
248 if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
249 fprintf(stderr, Name ": failed to remove internal bitmap.\n");
250 return 1;
251 }
252 return 0;
253 }
f5e166fe
NB
254 fprintf(stderr, Name ": Internal bitmap already present on %s\n",
255 devname);
256 return 1;
257 }
8fac0577
NB
258 bitmapsize = array.size;
259 bitmapsize <<= 1;
260#ifdef BLKGETSIZE64
261 if (ioctl(fd, BLKGETSIZE64, &array_size) == 0 &&
262 array_size > (0x7fffffffULL<<9)) {
263 /* Array is big enough that we cannot trust array.size
264 * try other approaches
265 */
266 bitmapsize = get_component_size(fd);
267 }
268#endif
269 if (bitmapsize == 0) {
270 fprintf(stderr, Name ": Cannot reliably determine size of array to create bitmap - sorry.\n");
271 return 1;
272 }
273
f9c25f1d
NB
274 if (array.level == 10) {
275 int ncopies = (array.layout&255)*(array.layout>>8);
276 bitmapsize = bitmapsize * array.raid_disks / ncopies;
277 }
278
f5e166fe
NB
279 st = super_by_version(array.major_version, array.minor_version);
280 if (!st) {
281 fprintf(stderr, Name ": Cannot understand version %d.%d\n",
282 array.major_version, array.minor_version);
283 return 1;
284 }
fe80f49b
NB
285 if (strcmp(file, "none") == 0) {
286 fprintf(stderr, Name ": no bitmap found on %s\n", devname);
287 return 1;
288 } else if (strcmp(file, "internal") == 0) {
f5e166fe 289 int d;
ea329559 290 for (d=0; d< st->max_devs; d++) {
f5e166fe
NB
291 mdu_disk_info_t disk;
292 char *dv;
293 disk.number = d;
294 if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
295 continue;
296 if (disk.major == 0 &&
297 disk.minor == 0)
298 continue;
299 if ((disk.state & (1<<MD_DISK_SYNC))==0)
300 continue;
301 dv = map_dev(disk.major, disk.minor);
302 if (dv) {
303 void *super;
304 int fd2 = open(dv, O_RDWR);
305 if (fd2 < 0)
306 continue;
307 if (st->ss->load_super(st, fd2, &super, NULL)==0) {
21e92547 308 if (st->ss->add_internal_bitmap(st, super,
e86c9dd6 309 chunk, delay, write_behind,
21e92547
NB
310 bitmapsize, 0, major))
311 st->ss->write_bitmap(st, fd2, super);
312 else {
313 fprintf(stderr, Name ": failed to create internal bitmap - chunksize problem.\n");
314 close(fd2);
315 return 1;
316 }
f5e166fe
NB
317 }
318 close(fd2);
319 }
320 }
321 array.state |= (1<<MD_SB_BITMAP_PRESENT);
322 if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
323 fprintf(stderr, Name ": failed to set internal bitmap.\n");
324 return 1;
325 }
fe80f49b
NB
326 } else {
327 int uuid[4];
328 int bitmap_fd;
329 int d;
330 int max_devs = st->max_devs;
331 void *super = NULL;
332 if (chunk == UnSet)
333 chunk = DEFAULT_BITMAP_CHUNK;
334
335 /* try to load a superblock */
336 for (d=0; d<max_devs; d++) {
337 mdu_disk_info_t disk;
338 char *dv;
339 int fd2;
340 disk.number = d;
341 if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
342 continue;
343 if ((disk.major==0 && disk.minor==0) ||
344 (disk.state & (1<<MD_DISK_REMOVED)))
345 continue;
346 dv = map_dev(disk.major, disk.minor);
347 if (!dv) continue;
348 fd2 = open(dv, O_RDONLY);
349 if (fd2 >= 0 &&
350 st->ss->load_super(st, fd2, &super, NULL) == 0) {
351 close(fd2);
352 st->ss->uuid_from_super(uuid, super);
353 break;
354 }
355 close(fd2);
356 }
357 if (d == max_devs) {
358 fprintf(stderr, Name ": cannot find UUID for array!\n");
359 return 1;
360 }
8fac0577 361 if (CreateBitmap(file, force, (char*)uuid, chunk,
f9c25f1d 362 delay, write_behind, bitmapsize, major)) {
fe80f49b
NB
363 return 1;
364 }
365 bitmap_fd = open(file, O_RDWR);
366 if (bitmap_fd < 0) {
8fac0577 367 fprintf(stderr, Name ": weird: %s cannot be opened\n",
fe80f49b
NB
368 file);
369 return 1;
370 }
371 if (ioctl(fd, SET_BITMAP_FILE, bitmap_fd) < 0) {
372 fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
373 devname, strerror(errno));
374 return 1;
375 }
376 }
f5e166fe
NB
377
378 return 0;
379}
380
e86c9dd6
NB
381
382/*
383 * When reshaping an array we might need to backup some data.
384 * This is written to all spares with a 'super_block' describing it.
385 * The superblock goes 1K form the end of the used space on the
386 * device.
387 * It if written after the backup is complete.
388 * It has the following structure.
389 */
390
391struct mdp_backup_super {
392 char magic[16]; /* md_backup_data-1 */
393 __u8 set_uuid[16];
394 __u64 mtime;
395 /* start/sizes in 512byte sectors */
396 __u64 devstart;
397 __u64 arraystart;
398 __u64 length;
399 __u32 sb_csum; /* csum of preceeding bytes. */
400};
401
402int bsb_csum(char *buf, int len)
403{
404 int i;
405 int csum = 0;
406 for (i=0; i<len; i++)
407 csum = (csum<<3) + buf[0];
408 return __cpu_to_le32(csum);
409}
410
411int Grow_reshape(char *devname, int fd, int quiet,
412 long long size,
413 int level, int layout, int chunksize, int raid_disks)
414{
415 /* Make some changes in the shape of an array.
416 * The kernel must support the change.
417 * Different reshapes have subtly different meaning for different
418 * levels, so we need to check the current state of the array
419 * and go from there.
420 */
421 struct mdu_array_info_s array;
422 char *c;
423
424 struct mdp_backup_super bsb;
425 struct supertype *st;
426
427 int nlevel, olevel;
428 int nchunk, ochunk;
429 int nlayout, olayout;
430 int ndisks, odisks;
431 int ndata, odata;
432 unsigned long long nstripe, ostripe, last_block;
433 int *fdlist;
434 unsigned long long *offsets;
435 int d, i, spares;
436 int nrdisks;
437 int err;
438 void *super = NULL;
439
440 struct sysarray *sra;
441 struct sysdev *sd;
442
443 if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
444 fprintf(stderr, Name ": %s is not an active md array - aborting\n",
445 devname);
446 return 1;
447 }
448 c = map_num(pers, array.level);
449 if (c == NULL) c = "-unknown-";
450 switch(array.level) {
451 default: /* raid0, linear, multipath cannot be reconfigured */
452 fprintf(stderr, Name ": %s array %s cannot be reshaped.\n",
453 c, devname);
454 return 1;
455
456 case LEVEL_FAULTY: /* only 'layout' change is permitted */
457
458 if (size >= 0) {
459 fprintf(stderr, Name ": %s: Cannot change size of a 'faulty' array\n",
460 devname);
461 return 1;
462 }
463 if (level != UnSet && level != LEVEL_FAULTY) {
464 fprintf(stderr, Name ": %s: Cannot change RAID level of a 'faulty' array\n",
465 devname);
466 return 1;
467 }
468 if (chunksize || raid_disks) {
469 fprintf(stderr, Name ": %s: Cannot change chunksize or disks of a 'faulty' array\n",
470 devname);
471 return 1;
472 }
473 if (layout == UnSet)
474 return 0; /* nothing to do.... */
475
476 array.layout = layout;
477 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
478 fprintf(stderr, Name ": Cannot set layout for %s: %s\n",
479 devname, strerror(errno));
480 return 1;
481 }
482 if (!quiet)
483 printf("layout for %s set to %d\n", devname, array.layout);
484 return 0;
485
486 case 1: /* raid_disks and size can each be changed. They are independant */
487
488 if (level != UnSet && level != 1) {
489 fprintf(stderr, Name ": %s: Cannot change RAID level of a RAID1 array.\n",
490 devname);
491 return 1;
492 }
493 if (chunksize || layout != UnSet) {
494 fprintf(stderr, Name ": %s: Cannot change chunk size of layout for a RAID1 array.\n",
495 devname);
496 return 1;
497 }
498
499 /* Each can trigger a resync/recovery which will block the
500 * other from happening. Later we could block
501 * resync for the duration via 'sync_action'...
502 */
503 if (raid_disks >= 0)
504 array.raid_disks = raid_disks;
505 if (size >= 0)
506 array.size = size;
507 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
508 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
509 devname, strerror(errno));
510 return 1;
511 }
512 return 0;
513
514 case 4:
515 case 5:
516 case 6:
517 st = super_by_version(array.major_version,
518 array.minor_version);
519 /* size can be changed independantly.
520 * layout/chunksize/raid_disks/level can be changed
521 * though the kernel may not support it all.
522 * If 'suspend_lo' is not present in devfs, then
523 * these cannot be changed.
524 */
525 if (size >= 0) {
526 /* Cannot change other details as well.. */
527 if (layout != UnSet ||
528 chunksize != 0 ||
529 raid_disks != 0 ||
530 level != UnSet) {
531 fprintf(stderr, Name ": %s: Cannot change shape as well as size of a %s array.\n",
532 devname, c);
533 return 1;
534 }
535 array.size = size;
536 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
537 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
538 devname, strerror(errno));
539 return 1;
540 }
541 return 0;
542 }
543 /* Ok, just change the shape. This can be awkward.
544 * There are three possibilities.
545 * 1/ The array will shrink. We don't support this
546 * possibility. Maybe one day...
547 * 2/ The array will not change size. This is easy enough
548 * to do, but not reliably. If the process is aborted
549 * the array *will* be corrupted. So maybe we can allow
550 * this but only if the user is really certain. e.g.
551 * --really-risk-everything
552 * 3/ The array will grow. This can be reliably achieved.
553 * However the kernel's restripe routines will cheerfully
554 * overwrite some early data before it is safe. So we
555 * need to make a backup of the early parts of the array
556 * and be ready to restore it if rebuild aborts very early.
557 *
558 * We backup data by writing it to all spares (there must be
559 * at least 1, so even raid6->raid5 requires a spare to be
560 * present).
561 *
562 * So: we enumerate the devices in the array and
563 * make sure we can open all of them.
564 * Then we freeze the early part of the array and
565 * backup to the various spares.
566 * Then we request changes and start the reshape.
567 * Monitor progress until it has passed the danger zone.
568 * and finally invalidate the copied data and unfreeze the
569 * start of the array.
570 *
571 * Before we can do this we need to decide:
572 * - will the array grow? Just calculate size
573 * - how much needs to be saved: count stripes.
574 * - where to save data... good question.
575 *
576 */
577 nlevel = olevel = array.level;
578 nchunk = ochunk = array.chunk_size;
579 nlayout = olayout = array.layout;
580 ndisks = odisks = array.raid_disks;
581
582 if (level != UnSet) nlevel = level;
583 if (chunksize) nchunk = chunksize;
584 if (layout != UnSet) nlayout = layout;
585 if (raid_disks) ndisks = raid_disks;
586
587 odata = odisks-1;
588 if (olevel == 6) odata--; /* number of data disks */
589 ndata = ndisks-1;
590 if (nlevel == 6) ndata--;
591
592 if (ndata < odata) {
593 fprintf(stderr, Name ": %s: Cannot reduce number of data disks (yet).\n",
594 devname);
595 return 1;
596 }
597 if (ndata == odata) {
598 fprintf(stderr, Name ": %s: Cannot reshape array without increasing size (yet).\n",
599 devname);
600 return 1;
601 }
602 /* Well, it is growing... so how much do we need to backup.
603 * Need to backup a full number of new-stripes, such that the
604 * last one does not over-write any place that it would be read
605 * from
606 */
607 nstripe = ostripe = 0;
353632d9 608 while (nstripe >= ostripe) {
e86c9dd6
NB
609 nstripe += nchunk/512;
610 last_block = nstripe * ndata;
353632d9 611 ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
e86c9dd6 612 }
353632d9 613 printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2);
e86c9dd6
NB
614
615 sra = sysfs_read(fd, 0,
616 GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE);
617 if (!sra) {
618 fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
619 devname);
620 return 1;
621 }
622
623 if (last_block >= sra->component_size/2) {
624 fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n",
625 devname);
626 return 1;
627 }
353632d9
NB
628 if (sra->spares == 0) {
629 fprintf(stderr, Name ": %s: Cannot grow - need a spare to backup critical section\n",
630 devname);
631 return 1;
632 }
e86c9dd6
NB
633
634 nrdisks = array.nr_disks + sra->spares;
635 /* Now we need to open all these devices so we can read/write.
636 */
637 fdlist = malloc(nrdisks * sizeof(int));
638 offsets = malloc(nrdisks * sizeof(offsets[0]));
639 if (!fdlist || !offsets) {
640 fprintf(stderr, Name ": malloc failed: grow aborted\n");
641 return 1;
642 }
643 for (d=0; d< nrdisks; d++)
644 fdlist[d] = -1;
645 d = array.raid_disks;
646 for (sd = sra->devs; sd; sd=sd->next) {
647 if (sd->state & (1<<MD_DISK_FAULTY))
648 continue;
649 if (sd->state & (1<<MD_DISK_SYNC)) {
650 char *dn = map_dev(sd->major, sd->minor);
651 fdlist[sd->role] = open(dn, O_RDONLY);
652 offsets[sd->role] = sd->offset;
653 if (fdlist[sd->role] < 0) {
654 fprintf(stderr, Name ": %s: cannot open component %s\n",
655 devname, dn);
656 goto abort;
657 }
658 } else {
659 /* spare */
660 char *dn = map_dev(sd->major, sd->minor);
661 fdlist[d] = open(dn, O_RDWR);
662 offsets[d] = sd->offset;
663 if (fdlist[d]<0) {
664 fprintf(stderr, Name ": %s: cannot open component %s\n",
665 devname, dn);
666 goto abort;
667 }
668 d++;
669 }
670 }
671 for (i=0 ; i<array.raid_disks; i++)
672 if (fdlist[i] < 0) {
673 fprintf(stderr, Name ": %s: failed to find device %d. Array might be degraded.\n"
674 " --grow aborted\n", devname, i);
675 goto abort;
676 }
677 if (fdlist[array.raid_disks] < 0) {
678 fprintf(stderr, Name ": %s: failed to find a spare - --grow aborted\n",
679 devname);
680 goto abort;
681 }
682
683 /* Find a superblock */
684 if (st->ss->load_super(st, fdlist[0], &super, NULL)) {
685 fprintf(stderr, Name ": %s: Cannot find a superblock\n",
686 devname);
687 goto abort;
688 }
689
690 spares = sra->spares;
691
692 /* Decide offset for the backup and llseek the spares */
693 for (i=array.raid_disks; i<d; i++) {
694 offsets[i] += sra->component_size - last_block - 8;
695 if (lseek64(fdlist[i], offsets[i]<<9, 0) != offsets[i]<<9) {
696 fprintf(stderr, Name ": could not seek...\n");
697 goto abort;
698 }
699 }
700 array.level = nlevel;
701 array.raid_disks = ndisks;
702 array.chunk_size = nchunk;
703 array.layout = nlayout;
704 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
705 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
706 devname, strerror(errno));
707 goto abort;
708 }
709
710 /* suspend the relevant region */
711 sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */
712 if (sysfs_set_num(sra, NULL, "suspend_lo", 0) < 0 ||
713 sysfs_set_num(sra, NULL, "suspend_hi", last_block) < 0) {
714 fprintf(stderr, Name ": %s: failed to suspend device.\n",
715 devname);
716 goto abort_resume;
717 }
718
719
720 err = save_stripes(fdlist, offsets,
721 odisks, ochunk, olevel, olayout,
722 spares, fdlist+odisks,
723 0ULL, nstripe*512);
724
725 /* abort if there was an error */
726 if (err < 0) {
727 fprintf(stderr, Name ": %s: failed to save critical region\n",
728 devname);
729 goto abort_resume;
730 }
731 /* FIXME write superblocks */
353632d9 732 memcpy(bsb.magic, "md_backup_data-1", 16);
e86c9dd6 733 st->ss->uuid_from_super((int*)&bsb.set_uuid, super);
353632d9 734 bsb.mtime = __cpu_to_le64(time(0));
e86c9dd6 735 bsb.arraystart = 0;
353632d9 736 bsb.length = __cpu_to_le64(last_block);
e86c9dd6 737 for (i=odisks; i<d ; i++) {
353632d9 738 bsb.devstart = __cpu_to_le64(offsets[i]);
e86c9dd6 739 bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
206c5eae
NB
740 if (lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0) < 0 ||
741 write(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb)) {
742 fprintf(stderr, Name ": %s: fail to save metadata for critical region backups.\n",
743 devname);
744 goto abort_resume;
745 }
e86c9dd6
NB
746 }
747
748 /* start the reshape happening */
749 if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) {
750 fprintf(stderr, Name ": %s: failed to initiate reshape\n",
751 devname);
752 goto abort_resume;
753 }
754 /* wait for reshape to pass the critical region */
755 while(1) {
756 unsigned long long comp;
206c5eae
NB
757 if (sysfs_get_ll(sra, NULL, "sync_completed", &comp)<0) {
758 sleep(5);
e86c9dd6 759 break;
206c5eae 760 }
e86c9dd6
NB
761 if (comp >= nstripe)
762 break;
763 sleep(1);
764 }
f5e166fe 765
e86c9dd6
NB
766 /* invalidate superblocks */
767 memset(&bsb, 0, sizeof(bsb));
768 for (i=odisks; i<d ; i++) {
769 lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
770 write(fdlist[i], &bsb, sizeof(bsb));
771 }
772
773 /* unsuspend. */
774 sysfs_set_num(sra, NULL, "suspend_lo", last_block);
775
776 for (i=0; i<d; i++)
777 if (fdlist[i] >= 0)
778 close(fdlist[i]);
779 free(fdlist);
780 free(offsets);
781
206c5eae 782 printf(Name ": ... critical section passed.\n");
e86c9dd6
NB
783 break;
784 }
785 return 0;
786
787
788 abort_resume:
789 sysfs_set_num(sra, NULL, "suspend_lo", last_block);
790 abort:
791 for (i=0; i<array.nr_disks; i++)
792 if (fdlist[i] >= 0)
793 close(fdlist[i]);
794 free(fdlist);
795 free(offsets);
796 return 1;
797
798}
353632d9
NB
799
800/*
801 * If any spare contains md_back_data-1 which is recent wrt mtime,
802 * write that data into the array and update the super blocks with
803 * the new reshape_progress
804 */
805int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt)
806{
807 int i, j;
808 int old_disks;
809 int err = 0;
810 unsigned long long *offsets;
811
812 if (info->delta_disks < 0)
813 return 1; /* cannot handle a shrink */
814 if (info->new_level != info->array.level ||
815 info->new_layout != info->array.layout ||
816 info->new_chunk != info->array.chunk_size)
817 return 1; /* Can only handle change in disks */
818
819 old_disks = info->array.raid_disks - info->delta_disks;
820
821 for (i=old_disks; i<cnt; i++) {
822 void *super = NULL;
823 struct mdinfo dinfo;
824 struct mddev_ident_s id;
825 struct mdp_backup_super bsb;
826
827 /* This was a spare and may have some saved data on it.
828 * Load the superblock, find and load the
829 * backup_super_block.
830 * If either fail, go on to next device.
831 * If the backup contains no new info, just return
206c5eae 832 * else restore data and update all superblocks
353632d9
NB
833 */
834 if (fdlist[i] < 0)
835 continue;
836 if (st->ss->load_super(st, fdlist[i], &super, NULL))
837 continue;
838
839 st->ss->getinfo_super(&dinfo, &id, super);
840 free(super); super = NULL;
841 if (lseek64(fdlist[i],
842 (dinfo.data_offset + dinfo.component_size - 8) <<9,
843 0) < 0)
844 continue; /* Cannot seek */
845 if (read(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb))
846 continue; /* Cannot read */
847 if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0)
848 continue;
849 if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)))
850 continue; /* bad checksum */
851 if (memcmp(bsb.set_uuid,info->uuid, 16) != 0)
852 continue; /* Wrong uuid */
853
854 if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 ||
855 info->array.utime < __le64_to_cpu(bsb.mtime))
856 continue; /* time stamp is too bad */
857
858 if (__le64_to_cpu(bsb.arraystart) != 0)
859 continue; /* Can only handle backup from start of array */
860 if (__le64_to_cpu(bsb.length) <
861 info->reshape_progress)
862 continue; /* No new data here */
863
864 if (lseek64(fdlist[i], __le64_to_cpu(bsb.devstart)*512, 0)< 0)
865 continue; /* Cannot seek */
866
867 /* Now need the data offsets for all devices. */
868 offsets = malloc(sizeof(*offsets)*info->array.raid_disks);
869 for(j=0; j<info->array.raid_disks; j++) {
870 if (fdlist[j] < 0)
871 continue;
872 if (st->ss->load_super(st, fdlist[j], &super, NULL))
873 /* FIXME should be this be an error */
874 continue;
875 st->ss->getinfo_super(&dinfo, &id, super);
876 free(super); super = NULL;
877 offsets[j] = dinfo.data_offset;
878 }
879 printf(Name ": restoring critical section\n");
880
881 if (restore_stripes(fdlist, offsets,
882 info->array.raid_disks,
883 info->new_chunk,
884 info->new_level,
885 info->new_layout,
886 fdlist[i], __le64_to_cpu(bsb.devstart)*512,
887 0, __le64_to_cpu(bsb.length)*512)) {
888 /* didn't succeed, so giveup */
206c5eae 889 return -1;
353632d9
NB
890 }
891
892 /* Ok, so the data is restored. Let's update those superblocks. */
893
894 for (j=0; j<info->array.raid_disks; j++) {
895 if (fdlist[j] < 0) continue;
896 if (st->ss->load_super(st, fdlist[j], &super, NULL))
897 continue;
898 st->ss->getinfo_super(&dinfo, &id, super);
899 dinfo.reshape_progress = __le64_to_cpu(bsb.length);
900 st->ss->update_super(&dinfo, super, "_reshape_progress",NULL,0);
901 st->ss->store_super(st, fdlist[j], super);
902 free(super);
903 }
904
905 /* And we are done! */
906 return 0;
907 }
908 return err;
909}