Add 'supertype' arg to almost all metadata methods.
[thirdparty/mdadm.git] / Grow.c
CommitLineData
e5329c37
NB
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4f589ad0 4 * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
e5329c37
NB
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29#include "mdadm.h"
30#include "dlink.h"
31
32#if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
33#error no endian defined
34#endif
35#include "md_u.h"
36#include "md_p.h"
37
38int Grow_Add_device(char *devname, int fd, char *newdev)
39{
40 /* Add a device to an active array.
41 * Currently, just extend a linear array.
42 * This requires writing a new superblock on the
43 * new device, calling the kernel to add the device,
44 * and if that succeeds, update the superblock on
45 * all other devices.
46 * This means that we need to *find* all other devices.
47 */
4b1ac34b
NB
48 struct mdinfo info;
49
50 void *super = NULL;
e5329c37
NB
51 struct stat stb;
52 int nfd, fd2;
53 int d, nd;
82d9eba6 54 struct supertype *st = NULL;
aba69144 55
e5329c37 56
4b1ac34b 57 if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
e5329c37
NB
58 fprintf(stderr, Name ": cannot get array info for %s\n", devname);
59 return 1;
60 }
61
82d9eba6
NB
62 st = super_by_version(info.array.major_version, info.array.minor_version);
63 if (!st) {
f9ce90ba
NB
64 fprintf(stderr, Name ": cannot handle arrays with superblock version %d\n", info.array.major_version);
65 return 1;
66 }
67
4b1ac34b 68 if (info.array.level != -1) {
e5329c37
NB
69 fprintf(stderr, Name ": can only add devices to linear arrays\n");
70 return 1;
71 }
72
73 nfd = open(newdev, O_RDWR|O_EXCL);
74 if (nfd < 0) {
75 fprintf(stderr, Name ": cannot open %s\n", newdev);
76 return 1;
77 }
78 fstat(nfd, &stb);
79 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
80 fprintf(stderr, Name ": %s is not a block device!\n", newdev);
81 close(nfd);
82 return 1;
83 }
84 /* now check out all the devices and make sure we can read the superblock */
4b1ac34b 85 for (d=0 ; d < info.array.raid_disks ; d++) {
e5329c37
NB
86 mdu_disk_info_t disk;
87 char *dv;
88
89 disk.number = d;
90 if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
91 fprintf(stderr, Name ": cannot get device detail for device %d\n",
92 d);
93 return 1;
94 }
16c6fa80 95 dv = map_dev(disk.major, disk.minor, 1);
e5329c37
NB
96 if (!dv) {
97 fprintf(stderr, Name ": cannot find device file for device %d\n",
98 d);
99 return 1;
100 }
16c6fa80 101 fd2 = dev_open(dv, O_RDWR);
e5329c37
NB
102 if (!fd2) {
103 fprintf(stderr, Name ": cannot open device file %s\n", dv);
104 return 1;
105 }
df37ffc0 106 if (super)
68c7d6d7 107 st->ss->free_super(st, super);
4b1ac34b 108 super= NULL;
82d9eba6 109 if (st->ss->load_super(st, fd2, &super, NULL)) {
e5329c37
NB
110 fprintf(stderr, Name ": cannot find super block on %s\n", dv);
111 close(fd2);
112 return 1;
113 }
114 close(fd2);
115 }
116 /* Ok, looks good. Lets update the superblock and write it out to
117 * newdev.
118 */
aba69144 119
4b1ac34b
NB
120 info.disk.number = d;
121 info.disk.major = major(stb.st_rdev);
122 info.disk.minor = minor(stb.st_rdev);
123 info.disk.raid_disk = d;
124 info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
68c7d6d7 125 st->ss->update_super(st, &info, super, "linear-grow-new", newdev,
f752781f 126 0, 0, NULL);
e5329c37 127
96395475 128 if (st->ss->store_super(st, nfd, super)) {
f752781f
NB
129 fprintf(stderr, Name ": Cannot store new superblock on %s\n",
130 newdev);
e5329c37
NB
131 close(nfd);
132 return 1;
133 }
e5329c37 134 close(nfd);
4b1ac34b
NB
135
136 if (ioctl(fd, ADD_NEW_DISK, &info.disk) != 0) {
e5329c37
NB
137 fprintf(stderr, Name ": Cannot add new disk to this array\n");
138 return 1;
139 }
140 /* Well, that seems to have worked.
141 * Now go through and update all superblocks
142 */
143
4b1ac34b 144 if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
e5329c37
NB
145 fprintf(stderr, Name ": cannot get array info for %s\n", devname);
146 return 1;
147 }
148
149 nd = d;
4b1ac34b 150 for (d=0 ; d < info.array.raid_disks ; d++) {
e5329c37
NB
151 mdu_disk_info_t disk;
152 char *dv;
153
154 disk.number = d;
155 if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
156 fprintf(stderr, Name ": cannot get device detail for device %d\n",
157 d);
158 return 1;
159 }
16c6fa80 160 dv = map_dev(disk.major, disk.minor, 1);
e5329c37
NB
161 if (!dv) {
162 fprintf(stderr, Name ": cannot find device file for device %d\n",
163 d);
164 return 1;
165 }
16c6fa80 166 fd2 = dev_open(dv, O_RDWR);
e5329c37
NB
167 if (fd2 < 0) {
168 fprintf(stderr, Name ": cannot open device file %s\n", dv);
169 return 1;
170 }
82d9eba6 171 if (st->ss->load_super(st, fd2, &super, NULL)) {
e5329c37
NB
172 fprintf(stderr, Name ": cannot find super block on %s\n", dv);
173 close(fd);
174 return 1;
175 }
4b1ac34b
NB
176 info.array.raid_disks = nd+1;
177 info.array.nr_disks = nd+1;
178 info.array.active_disks = nd+1;
179 info.array.working_disks = nd+1;
f752781f 180
68c7d6d7 181 st->ss->update_super(st, &info, super, "linear-grow-update", dv,
f752781f 182 0, 0, NULL);
aba69144 183
96395475 184 if (st->ss->store_super(st, fd2, super)) {
e5329c37
NB
185 fprintf(stderr, Name ": Cannot store new superblock on %s\n", dv);
186 close(fd2);
187 return 1;
188 }
189 close(fd2);
190 }
191
192 return 0;
193}
f5e166fe 194
8fac0577 195int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force)
f5e166fe
NB
196{
197 /*
198 * First check that array doesn't have a bitmap
199 * Then create the bitmap
200 * Then add it
201 *
202 * For internal bitmaps, we need to check the version,
203 * find all the active devices, and write the bitmap block
204 * to all devices
205 */
206 mdu_bitmap_file_t bmf;
207 mdu_array_info_t array;
208 struct supertype *st;
dcec9ee5
NB
209 int major = BITMAP_MAJOR_HI;
210 int vers = md_get_version(fd);
8fac0577 211 unsigned long long bitmapsize, array_size;
dcec9ee5
NB
212
213 if (vers < 9003) {
214 major = BITMAP_MAJOR_HOSTENDIAN;
215#ifdef __BIG_ENDIAN
216 fprintf(stderr, Name ": Warning - bitmaps created on this kernel are not portable\n"
217 " between different architectured. Consider upgrading the Linux kernel.\n");
218#endif
219 }
f5e166fe
NB
220
221 if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
353632d9 222 if (errno == ENOMEM)
f5e166fe
NB
223 fprintf(stderr, Name ": Memory allocation failure.\n");
224 else
225 fprintf(stderr, Name ": bitmaps not supported by this kernel.\n");
226 return 1;
227 }
228 if (bmf.pathname[0]) {
fe80f49b
NB
229 if (strcmp(file,"none")==0) {
230 if (ioctl(fd, SET_BITMAP_FILE, -1)!= 0) {
231 fprintf(stderr, Name ": failed to remove bitmap %s\n",
232 bmf.pathname);
233 return 1;
234 }
235 return 0;
236 }
f5e166fe
NB
237 fprintf(stderr, Name ": %s already has a bitmap (%s)\n",
238 devname, bmf.pathname);
239 return 1;
240 }
241 if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
242 fprintf(stderr, Name ": cannot get array status for %s\n", devname);
243 return 1;
244 }
245 if (array.state & (1<<MD_SB_BITMAP_PRESENT)) {
fe80f49b
NB
246 if (strcmp(file, "none")==0) {
247 array.state &= ~(1<<MD_SB_BITMAP_PRESENT);
248 if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
249 fprintf(stderr, Name ": failed to remove internal bitmap.\n");
250 return 1;
251 }
252 return 0;
253 }
f5e166fe
NB
254 fprintf(stderr, Name ": Internal bitmap already present on %s\n",
255 devname);
256 return 1;
257 }
5b28bd56
NB
258 if (array.level <= 0) {
259 fprintf(stderr, Name ": Bitmaps not meaningful with level %s\n",
260 map_num(pers, array.level)?:"of this array");
261 return 1;
262 }
8fac0577
NB
263 bitmapsize = array.size;
264 bitmapsize <<= 1;
beae1dfe 265 if (get_dev_size(fd, NULL, &array_size) &&
8fac0577
NB
266 array_size > (0x7fffffffULL<<9)) {
267 /* Array is big enough that we cannot trust array.size
268 * try other approaches
269 */
270 bitmapsize = get_component_size(fd);
271 }
8fac0577
NB
272 if (bitmapsize == 0) {
273 fprintf(stderr, Name ": Cannot reliably determine size of array to create bitmap - sorry.\n");
274 return 1;
275 }
276
f9c25f1d 277 if (array.level == 10) {
8686f3ed 278 int ncopies = (array.layout&255)*((array.layout>>8)&255);
f9c25f1d
NB
279 bitmapsize = bitmapsize * array.raid_disks / ncopies;
280 }
281
f5e166fe
NB
282 st = super_by_version(array.major_version, array.minor_version);
283 if (!st) {
284 fprintf(stderr, Name ": Cannot understand version %d.%d\n",
285 array.major_version, array.minor_version);
286 return 1;
287 }
fe80f49b
NB
288 if (strcmp(file, "none") == 0) {
289 fprintf(stderr, Name ": no bitmap found on %s\n", devname);
290 return 1;
291 } else if (strcmp(file, "internal") == 0) {
f5e166fe 292 int d;
ea329559 293 for (d=0; d< st->max_devs; d++) {
f5e166fe
NB
294 mdu_disk_info_t disk;
295 char *dv;
296 disk.number = d;
297 if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
298 continue;
299 if (disk.major == 0 &&
300 disk.minor == 0)
301 continue;
302 if ((disk.state & (1<<MD_DISK_SYNC))==0)
303 continue;
16c6fa80 304 dv = map_dev(disk.major, disk.minor, 1);
f5e166fe
NB
305 if (dv) {
306 void *super;
16c6fa80 307 int fd2 = dev_open(dv, O_RDWR);
f5e166fe
NB
308 if (fd2 < 0)
309 continue;
310 if (st->ss->load_super(st, fd2, &super, NULL)==0) {
199171a2
NB
311 if (st->ss->add_internal_bitmap(
312 st, super,
313 &chunk, delay, write_behind,
314 bitmapsize, 0, major)
315 )
21e92547
NB
316 st->ss->write_bitmap(st, fd2, super);
317 else {
318 fprintf(stderr, Name ": failed to create internal bitmap - chunksize problem.\n");
319 close(fd2);
320 return 1;
321 }
f5e166fe
NB
322 }
323 close(fd2);
324 }
325 }
326 array.state |= (1<<MD_SB_BITMAP_PRESENT);
327 if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
328 fprintf(stderr, Name ": failed to set internal bitmap.\n");
329 return 1;
330 }
fe80f49b
NB
331 } else {
332 int uuid[4];
333 int bitmap_fd;
334 int d;
335 int max_devs = st->max_devs;
336 void *super = NULL;
fe80f49b
NB
337
338 /* try to load a superblock */
339 for (d=0; d<max_devs; d++) {
340 mdu_disk_info_t disk;
341 char *dv;
342 int fd2;
343 disk.number = d;
344 if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
345 continue;
346 if ((disk.major==0 && disk.minor==0) ||
347 (disk.state & (1<<MD_DISK_REMOVED)))
348 continue;
16c6fa80 349 dv = map_dev(disk.major, disk.minor, 1);
fe80f49b 350 if (!dv) continue;
16c6fa80 351 fd2 = dev_open(dv, O_RDONLY);
fe80f49b
NB
352 if (fd2 >= 0 &&
353 st->ss->load_super(st, fd2, &super, NULL) == 0) {
354 close(fd2);
68c7d6d7 355 st->ss->uuid_from_super(st, uuid, super);
fe80f49b
NB
356 break;
357 }
358 close(fd2);
359 }
360 if (d == max_devs) {
361 fprintf(stderr, Name ": cannot find UUID for array!\n");
362 return 1;
363 }
8fac0577 364 if (CreateBitmap(file, force, (char*)uuid, chunk,
f9c25f1d 365 delay, write_behind, bitmapsize, major)) {
fe80f49b
NB
366 return 1;
367 }
368 bitmap_fd = open(file, O_RDWR);
369 if (bitmap_fd < 0) {
8fac0577 370 fprintf(stderr, Name ": weird: %s cannot be opened\n",
fe80f49b
NB
371 file);
372 return 1;
373 }
374 if (ioctl(fd, SET_BITMAP_FILE, bitmap_fd) < 0) {
375 fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
376 devname, strerror(errno));
377 return 1;
378 }
379 }
f5e166fe
NB
380
381 return 0;
382}
383
e86c9dd6
NB
384
385/*
386 * When reshaping an array we might need to backup some data.
387 * This is written to all spares with a 'super_block' describing it.
388 * The superblock goes 1K form the end of the used space on the
389 * device.
390 * It if written after the backup is complete.
391 * It has the following structure.
392 */
393
394struct mdp_backup_super {
395 char magic[16]; /* md_backup_data-1 */
396 __u8 set_uuid[16];
397 __u64 mtime;
398 /* start/sizes in 512byte sectors */
399 __u64 devstart;
400 __u64 arraystart;
401 __u64 length;
402 __u32 sb_csum; /* csum of preceeding bytes. */
403};
404
405int bsb_csum(char *buf, int len)
406{
407 int i;
408 int csum = 0;
409 for (i=0; i<len; i++)
410 csum = (csum<<3) + buf[0];
411 return __cpu_to_le32(csum);
412}
413
06b0d786 414int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
e86c9dd6
NB
415 long long size,
416 int level, int layout, int chunksize, int raid_disks)
417{
418 /* Make some changes in the shape of an array.
419 * The kernel must support the change.
420 * Different reshapes have subtly different meaning for different
421 * levels, so we need to check the current state of the array
422 * and go from there.
423 */
424 struct mdu_array_info_s array;
425 char *c;
426
427 struct mdp_backup_super bsb;
428 struct supertype *st;
429
430 int nlevel, olevel;
431 int nchunk, ochunk;
432 int nlayout, olayout;
433 int ndisks, odisks;
434 int ndata, odata;
435 unsigned long long nstripe, ostripe, last_block;
436 int *fdlist;
437 unsigned long long *offsets;
438 int d, i, spares;
439 int nrdisks;
440 int err;
441 void *super = NULL;
442
443 struct sysarray *sra;
444 struct sysdev *sd;
445
446 if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
447 fprintf(stderr, Name ": %s is not an active md array - aborting\n",
448 devname);
449 return 1;
450 }
451 c = map_num(pers, array.level);
452 if (c == NULL) c = "-unknown-";
453 switch(array.level) {
454 default: /* raid0, linear, multipath cannot be reconfigured */
455 fprintf(stderr, Name ": %s array %s cannot be reshaped.\n",
456 c, devname);
457 return 1;
458
459 case LEVEL_FAULTY: /* only 'layout' change is permitted */
460
461 if (size >= 0) {
462 fprintf(stderr, Name ": %s: Cannot change size of a 'faulty' array\n",
463 devname);
464 return 1;
465 }
466 if (level != UnSet && level != LEVEL_FAULTY) {
467 fprintf(stderr, Name ": %s: Cannot change RAID level of a 'faulty' array\n",
468 devname);
469 return 1;
470 }
471 if (chunksize || raid_disks) {
472 fprintf(stderr, Name ": %s: Cannot change chunksize or disks of a 'faulty' array\n",
473 devname);
474 return 1;
475 }
476 if (layout == UnSet)
477 return 0; /* nothing to do.... */
478
479 array.layout = layout;
480 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
481 fprintf(stderr, Name ": Cannot set layout for %s: %s\n",
482 devname, strerror(errno));
483 return 1;
484 }
485 if (!quiet)
486 printf("layout for %s set to %d\n", devname, array.layout);
487 return 0;
488
489 case 1: /* raid_disks and size can each be changed. They are independant */
490
491 if (level != UnSet && level != 1) {
492 fprintf(stderr, Name ": %s: Cannot change RAID level of a RAID1 array.\n",
493 devname);
494 return 1;
495 }
496 if (chunksize || layout != UnSet) {
497 fprintf(stderr, Name ": %s: Cannot change chunk size of layout for a RAID1 array.\n",
498 devname);
499 return 1;
500 }
501
502 /* Each can trigger a resync/recovery which will block the
503 * other from happening. Later we could block
504 * resync for the duration via 'sync_action'...
505 */
9860f271 506 if (raid_disks > 0) {
e86c9dd6 507 array.raid_disks = raid_disks;
9860f271
NB
508 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
509 fprintf(stderr, Name ": Cannot set raid-devices for %s: %s\n",
510 devname, strerror(errno));
511 return 1;
512 }
513 }
514 if (size >= 0) {
e86c9dd6 515 array.size = size;
9860f271
NB
516 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
517 fprintf(stderr, Name ": Cannot set device size for %s: %s\n",
518 devname, strerror(errno));
519 return 1;
520 }
e86c9dd6
NB
521 }
522 return 0;
523
524 case 4:
525 case 5:
526 case 6:
527 st = super_by_version(array.major_version,
528 array.minor_version);
758d3a8e 529 /* size can be changed independently.
e86c9dd6
NB
530 * layout/chunksize/raid_disks/level can be changed
531 * though the kernel may not support it all.
532 * If 'suspend_lo' is not present in devfs, then
533 * these cannot be changed.
534 */
535 if (size >= 0) {
536 /* Cannot change other details as well.. */
537 if (layout != UnSet ||
538 chunksize != 0 ||
539 raid_disks != 0 ||
540 level != UnSet) {
541 fprintf(stderr, Name ": %s: Cannot change shape as well as size of a %s array.\n",
542 devname, c);
543 return 1;
544 }
545 array.size = size;
546 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
547 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
548 devname, strerror(errno));
549 return 1;
550 }
551 return 0;
552 }
553 /* Ok, just change the shape. This can be awkward.
554 * There are three possibilities.
555 * 1/ The array will shrink. We don't support this
556 * possibility. Maybe one day...
557 * 2/ The array will not change size. This is easy enough
558 * to do, but not reliably. If the process is aborted
559 * the array *will* be corrupted. So maybe we can allow
560 * this but only if the user is really certain. e.g.
561 * --really-risk-everything
562 * 3/ The array will grow. This can be reliably achieved.
563 * However the kernel's restripe routines will cheerfully
564 * overwrite some early data before it is safe. So we
565 * need to make a backup of the early parts of the array
566 * and be ready to restore it if rebuild aborts very early.
567 *
568 * We backup data by writing it to all spares (there must be
569 * at least 1, so even raid6->raid5 requires a spare to be
570 * present).
571 *
572 * So: we enumerate the devices in the array and
573 * make sure we can open all of them.
574 * Then we freeze the early part of the array and
575 * backup to the various spares.
576 * Then we request changes and start the reshape.
577 * Monitor progress until it has passed the danger zone.
578 * and finally invalidate the copied data and unfreeze the
579 * start of the array.
580 *
581 * Before we can do this we need to decide:
582 * - will the array grow? Just calculate size
583 * - how much needs to be saved: count stripes.
584 * - where to save data... good question.
585 *
586 */
587 nlevel = olevel = array.level;
588 nchunk = ochunk = array.chunk_size;
589 nlayout = olayout = array.layout;
590 ndisks = odisks = array.raid_disks;
591
592 if (level != UnSet) nlevel = level;
593 if (chunksize) nchunk = chunksize;
594 if (layout != UnSet) nlayout = layout;
595 if (raid_disks) ndisks = raid_disks;
596
597 odata = odisks-1;
598 if (olevel == 6) odata--; /* number of data disks */
599 ndata = ndisks-1;
600 if (nlevel == 6) ndata--;
601
602 if (ndata < odata) {
603 fprintf(stderr, Name ": %s: Cannot reduce number of data disks (yet).\n",
604 devname);
605 return 1;
606 }
607 if (ndata == odata) {
608 fprintf(stderr, Name ": %s: Cannot reshape array without increasing size (yet).\n",
609 devname);
610 return 1;
611 }
612 /* Well, it is growing... so how much do we need to backup.
613 * Need to backup a full number of new-stripes, such that the
614 * last one does not over-write any place that it would be read
615 * from
616 */
617 nstripe = ostripe = 0;
353632d9 618 while (nstripe >= ostripe) {
e86c9dd6
NB
619 nstripe += nchunk/512;
620 last_block = nstripe * ndata;
353632d9 621 ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
e86c9dd6 622 }
353632d9 623 printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2);
e86c9dd6
NB
624
625 sra = sysfs_read(fd, 0,
758d3a8e
NB
626 GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
627 GET_CACHE);
e86c9dd6
NB
628 if (!sra) {
629 fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
630 devname);
631 return 1;
632 }
633
634 if (last_block >= sra->component_size/2) {
635 fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n",
636 devname);
637 return 1;
638 }
06b0d786
NB
639 if (sra->spares == 0 && backup_file == NULL) {
640 fprintf(stderr, Name ": %s: Cannot grow - need a spare or backup-file to backup critical section\n",
353632d9
NB
641 devname);
642 return 1;
643 }
e86c9dd6
NB
644
645 nrdisks = array.nr_disks + sra->spares;
646 /* Now we need to open all these devices so we can read/write.
647 */
06b0d786
NB
648 fdlist = malloc((1+nrdisks) * sizeof(int));
649 offsets = malloc((1+nrdisks) * sizeof(offsets[0]));
e86c9dd6
NB
650 if (!fdlist || !offsets) {
651 fprintf(stderr, Name ": malloc failed: grow aborted\n");
652 return 1;
653 }
06b0d786 654 for (d=0; d <= nrdisks; d++)
e86c9dd6
NB
655 fdlist[d] = -1;
656 d = array.raid_disks;
657 for (sd = sra->devs; sd; sd=sd->next) {
658 if (sd->state & (1<<MD_DISK_FAULTY))
659 continue;
660 if (sd->state & (1<<MD_DISK_SYNC)) {
16c6fa80
NB
661 char *dn = map_dev(sd->major, sd->minor, 1);
662 fdlist[sd->role] = dev_open(dn, O_RDONLY);
e86c9dd6
NB
663 offsets[sd->role] = sd->offset;
664 if (fdlist[sd->role] < 0) {
665 fprintf(stderr, Name ": %s: cannot open component %s\n",
e81cdd9f 666 devname, dn?dn:"-unknown-");
e86c9dd6
NB
667 goto abort;
668 }
669 } else {
670 /* spare */
16c6fa80
NB
671 char *dn = map_dev(sd->major, sd->minor, 1);
672 fdlist[d] = dev_open(dn, O_RDWR);
e86c9dd6
NB
673 offsets[d] = sd->offset;
674 if (fdlist[d]<0) {
675 fprintf(stderr, Name ": %s: cannot open component %s\n",
e81cdd9f 676 devname, dn?dn:"-unknown");
e86c9dd6
NB
677 goto abort;
678 }
679 d++;
680 }
681 }
682 for (i=0 ; i<array.raid_disks; i++)
683 if (fdlist[i] < 0) {
684 fprintf(stderr, Name ": %s: failed to find device %d. Array might be degraded.\n"
685 " --grow aborted\n", devname, i);
686 goto abort;
687 }
06b0d786
NB
688 spares = sra->spares;
689 if (backup_file) {
690 fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, 0600);
691 if (fdlist[d] < 0) {
692 fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
693 devname, backup_file, strerror(errno));
694 goto abort;
695 }
696 offsets[d] = 8;
697 d++;
698 spares++;
699 }
e86c9dd6 700 if (fdlist[array.raid_disks] < 0) {
06b0d786 701 fprintf(stderr, Name ": %s: failed to find a spare and no backup-file given - --grow aborted\n",
e86c9dd6
NB
702 devname);
703 goto abort;
704 }
705
706 /* Find a superblock */
707 if (st->ss->load_super(st, fdlist[0], &super, NULL)) {
708 fprintf(stderr, Name ": %s: Cannot find a superblock\n",
709 devname);
710 goto abort;
711 }
712
2efedc7b
NB
713
714 memcpy(bsb.magic, "md_backup_data-1", 16);
68c7d6d7 715 st->ss->uuid_from_super(st, (int*)&bsb.set_uuid, super);
2efedc7b
NB
716 bsb.mtime = __cpu_to_le64(time(0));
717 bsb.arraystart = 0;
718 bsb.length = __cpu_to_le64(last_block);
719
720 /* Decide offset for the backup, llseek the spares, and write
721 * a leading superblock 4K earlier.
722 */
e86c9dd6 723 for (i=array.raid_disks; i<d; i++) {
2efedc7b 724 char buf[4096];
06b0d786
NB
725 if (i==d-1 && backup_file) {
726 /* This is the backup file */
727 offsets[i] = 8;
728 } else
729 offsets[i] += sra->component_size - last_block - 8;
2efedc7b
NB
730 if (lseek64(fdlist[i], (offsets[i]<<9) - 4096, 0)
731 != (offsets[i]<<9) - 4096) {
e86c9dd6
NB
732 fprintf(stderr, Name ": could not seek...\n");
733 goto abort;
734 }
2efedc7b
NB
735 memset(buf, 0, sizeof(buf));
736 bsb.devstart = __cpu_to_le64(offsets[i]);
737 bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
738 memcpy(buf, &bsb, sizeof(bsb));
739 if (write(fdlist[i], buf, 4096) != 4096) {
740 fprintf(stderr, Name ": could not write leading superblock\n");
741 goto abort;
742 }
e86c9dd6
NB
743 }
744 array.level = nlevel;
745 array.raid_disks = ndisks;
746 array.chunk_size = nchunk;
747 array.layout = nlayout;
748 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
758d3a8e
NB
749 if (errno == ENOSPC) {
750 /* stripe cache is not big enough.
751 * It needs to be 4 times chunksize_size,
752 * and we assume pagesize is 4K
753 */
754 if (sra->cache_size < 4 * (nchunk/4096)) {
755 sysfs_set_num(sra, NULL,
756 "stripe_cache_size",
757 4 * (nchunk/4096) +1);
758 if (ioctl(fd, SET_ARRAY_INFO,
759 &array) == 0)
760 goto ok;
761 }
762 }
e86c9dd6
NB
763 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
764 devname, strerror(errno));
765 goto abort;
766 }
758d3a8e 767 ok: ;
e86c9dd6
NB
768
769 /* suspend the relevant region */
770 sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */
771 if (sysfs_set_num(sra, NULL, "suspend_lo", 0) < 0 ||
772 sysfs_set_num(sra, NULL, "suspend_hi", last_block) < 0) {
773 fprintf(stderr, Name ": %s: failed to suspend device.\n",
774 devname);
775 goto abort_resume;
776 }
777
778
779 err = save_stripes(fdlist, offsets,
780 odisks, ochunk, olevel, olayout,
781 spares, fdlist+odisks,
06b0d786 782 0ULL, last_block*512);
e86c9dd6
NB
783
784 /* abort if there was an error */
785 if (err < 0) {
786 fprintf(stderr, Name ": %s: failed to save critical region\n",
787 devname);
788 goto abort_resume;
789 }
2efedc7b 790
e86c9dd6 791 for (i=odisks; i<d ; i++) {
353632d9 792 bsb.devstart = __cpu_to_le64(offsets[i]);
e86c9dd6 793 bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
206c5eae 794 if (lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0) < 0 ||
06b0d786
NB
795 write(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb) ||
796 fsync(fdlist[i]) != 0) {
206c5eae
NB
797 fprintf(stderr, Name ": %s: fail to save metadata for critical region backups.\n",
798 devname);
799 goto abort_resume;
800 }
e86c9dd6
NB
801 }
802
803 /* start the reshape happening */
804 if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) {
805 fprintf(stderr, Name ": %s: failed to initiate reshape\n",
806 devname);
807 goto abort_resume;
808 }
809 /* wait for reshape to pass the critical region */
810 while(1) {
811 unsigned long long comp;
206c5eae
NB
812 if (sysfs_get_ll(sra, NULL, "sync_completed", &comp)<0) {
813 sleep(5);
e86c9dd6 814 break;
206c5eae 815 }
e86c9dd6
NB
816 if (comp >= nstripe)
817 break;
818 sleep(1);
819 }
aba69144 820
e86c9dd6
NB
821 /* invalidate superblocks */
822 memset(&bsb, 0, sizeof(bsb));
823 for (i=odisks; i<d ; i++) {
824 lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
9fca7d62
NB
825 if (write(fdlist[i], &bsb, sizeof(bsb)) < 0) {
826 fprintf(stderr, Name ": %s: failed to invalidate metadata for raid disk %d\n",
827 devname, i);
828 }
e86c9dd6
NB
829 }
830
831 /* unsuspend. */
832 sysfs_set_num(sra, NULL, "suspend_lo", last_block);
833
834 for (i=0; i<d; i++)
835 if (fdlist[i] >= 0)
836 close(fdlist[i]);
837 free(fdlist);
838 free(offsets);
06b0d786
NB
839 if (backup_file)
840 unlink(backup_file);
e86c9dd6 841
206c5eae 842 printf(Name ": ... critical section passed.\n");
e86c9dd6
NB
843 break;
844 }
845 return 0;
846
847
848 abort_resume:
849 sysfs_set_num(sra, NULL, "suspend_lo", last_block);
850 abort:
851 for (i=0; i<array.nr_disks; i++)
852 if (fdlist[i] >= 0)
853 close(fdlist[i]);
854 free(fdlist);
855 free(offsets);
06b0d786
NB
856 if (backup_file)
857 unlink(backup_file);
e86c9dd6
NB
858 return 1;
859
860}
353632d9
NB
861
862/*
863 * If any spare contains md_back_data-1 which is recent wrt mtime,
864 * write that data into the array and update the super blocks with
865 * the new reshape_progress
866 */
06b0d786 867int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt, char *backup_file)
353632d9
NB
868{
869 int i, j;
870 int old_disks;
353632d9 871 unsigned long long *offsets;
6e9eac4f
NB
872 unsigned long long nstripe, ostripe, last_block;
873 int ndata, odata;
353632d9
NB
874
875 if (info->delta_disks < 0)
876 return 1; /* cannot handle a shrink */
877 if (info->new_level != info->array.level ||
878 info->new_layout != info->array.layout ||
879 info->new_chunk != info->array.chunk_size)
880 return 1; /* Can only handle change in disks */
881
882 old_disks = info->array.raid_disks - info->delta_disks;
883
06b0d786 884 for (i=old_disks-(backup_file?1:0); i<cnt; i++) {
353632d9
NB
885 void *super = NULL;
886 struct mdinfo dinfo;
353632d9 887 struct mdp_backup_super bsb;
2efedc7b 888 char buf[4096];
06b0d786 889 int fd;
353632d9
NB
890
891 /* This was a spare and may have some saved data on it.
892 * Load the superblock, find and load the
893 * backup_super_block.
894 * If either fail, go on to next device.
895 * If the backup contains no new info, just return
206c5eae 896 * else restore data and update all superblocks
353632d9 897 */
06b0d786
NB
898 if (i == old_disks-1) {
899 fd = open(backup_file, O_RDONLY);
900 if (fd<0)
901 continue;
06b0d786
NB
902 } else {
903 fd = fdlist[i];
904 if (fd < 0)
905 continue;
906 if (st->ss->load_super(st, fd, &super, NULL))
907 continue;
353632d9 908
68c7d6d7
NB
909 st->ss->getinfo_super(st, &dinfo, super);
910 st->ss->free_super(st, super);
df37ffc0 911 super = NULL;
06b0d786
NB
912 if (lseek64(fd,
913 (dinfo.data_offset + dinfo.component_size - 8) <<9,
914 0) < 0)
915 continue; /* Cannot seek */
916 }
917 if (read(fd, &bsb, sizeof(bsb)) != sizeof(bsb))
353632d9
NB
918 continue; /* Cannot read */
919 if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0)
920 continue;
921 if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)))
922 continue; /* bad checksum */
923 if (memcmp(bsb.set_uuid,info->uuid, 16) != 0)
924 continue; /* Wrong uuid */
925
926 if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 ||
927 info->array.utime < __le64_to_cpu(bsb.mtime))
928 continue; /* time stamp is too bad */
929
930 if (__le64_to_cpu(bsb.arraystart) != 0)
931 continue; /* Can only handle backup from start of array */
932 if (__le64_to_cpu(bsb.length) <
933 info->reshape_progress)
934 continue; /* No new data here */
935
06b0d786 936 if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0)
353632d9 937 continue; /* Cannot seek */
2efedc7b 938 /* There should be a duplicate backup superblock 4k before here */
06b0d786
NB
939 if (lseek64(fd, -4096, 1) < 0 ||
940 read(fd, buf, 4096) != 4096 ||
9860f271 941 memcmp(buf, &bsb, sizeof(bsb)) != 0)
2efedc7b
NB
942 continue; /* Cannot find leading superblock */
943
353632d9
NB
944 /* Now need the data offsets for all devices. */
945 offsets = malloc(sizeof(*offsets)*info->array.raid_disks);
946 for(j=0; j<info->array.raid_disks; j++) {
947 if (fdlist[j] < 0)
948 continue;
949 if (st->ss->load_super(st, fdlist[j], &super, NULL))
950 /* FIXME should be this be an error */
951 continue;
68c7d6d7
NB
952 st->ss->getinfo_super(st, &dinfo, super);
953 st->ss->free_super(st, super);
df37ffc0 954 super = NULL;
353632d9
NB
955 offsets[j] = dinfo.data_offset;
956 }
957 printf(Name ": restoring critical section\n");
958
959 if (restore_stripes(fdlist, offsets,
960 info->array.raid_disks,
961 info->new_chunk,
962 info->new_level,
963 info->new_layout,
06b0d786 964 fd, __le64_to_cpu(bsb.devstart)*512,
353632d9
NB
965 0, __le64_to_cpu(bsb.length)*512)) {
966 /* didn't succeed, so giveup */
2295250a 967 return 1;
353632d9
NB
968 }
969
970 /* Ok, so the data is restored. Let's update those superblocks. */
971
972 for (j=0; j<info->array.raid_disks; j++) {
973 if (fdlist[j] < 0) continue;
974 if (st->ss->load_super(st, fdlist[j], &super, NULL))
975 continue;
68c7d6d7 976 st->ss->getinfo_super(st, &dinfo, super);
353632d9 977 dinfo.reshape_progress = __le64_to_cpu(bsb.length);
68c7d6d7
NB
978 st->ss->update_super(st, &dinfo, super,
979 "_reshape_progress",
980 NULL,0, 0, NULL);
353632d9 981 st->ss->store_super(st, fdlist[j], super);
68c7d6d7 982 st->ss->free_super(st, super);
353632d9
NB
983 }
984
985 /* And we are done! */
986 return 0;
987 }
6e9eac4f
NB
988 /* Didn't find any backup data, try to see if any
989 * was needed.
990 */
991 nstripe = ostripe = 0;
992 odata = info->array.raid_disks - info->delta_disks - 1;
993 if (info->array.level == 6) odata--; /* number of data disks */
994 ndata = info->array.raid_disks - 1;
995 if (info->new_level == 6) ndata--;
996 last_block = 0;
997 while (nstripe >= ostripe) {
998 nstripe += info->new_chunk / 512;
999 last_block = nstripe * ndata;
1000 ostripe = last_block / odata / (info->array.chunk_size/512) *
1001 (info->array.chunk_size/512);
1002 }
1003
1004 if (info->reshape_progress >= last_block)
1005 return 0;
1006 /* needed to recover critical section! */
2295250a 1007 return 1;
353632d9 1008}