]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Grow.c
imsm: fix activate_spare off-by-one
[thirdparty/mdadm.git] / Grow.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24 #include "mdadm.h"
25 #include "dlink.h"
26
27 #if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
28 #error no endian defined
29 #endif
30 #include "md_u.h"
31 #include "md_p.h"
32
33 int Grow_Add_device(char *devname, int fd, char *newdev)
34 {
35 /* Add a device to an active array.
36 * Currently, just extend a linear array.
37 * This requires writing a new superblock on the
38 * new device, calling the kernel to add the device,
39 * and if that succeeds, update the superblock on
40 * all other devices.
41 * This means that we need to *find* all other devices.
42 */
43 struct mdinfo info;
44
45 struct stat stb;
46 int nfd, fd2;
47 int d, nd;
48 struct supertype *st = NULL;
49
50
51 if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
52 fprintf(stderr, Name ": cannot get array info for %s\n", devname);
53 return 1;
54 }
55
56 st = super_by_fd(fd);
57 if (!st) {
58 fprintf(stderr, Name ": cannot handle arrays with superblock version %d\n", info.array.major_version);
59 return 1;
60 }
61
62 if (info.array.level != -1) {
63 fprintf(stderr, Name ": can only add devices to linear arrays\n");
64 return 1;
65 }
66
67 nfd = open(newdev, O_RDWR|O_EXCL|O_DIRECT);
68 if (nfd < 0) {
69 fprintf(stderr, Name ": cannot open %s\n", newdev);
70 return 1;
71 }
72 fstat(nfd, &stb);
73 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
74 fprintf(stderr, Name ": %s is not a block device!\n", newdev);
75 close(nfd);
76 return 1;
77 }
78 /* now check out all the devices and make sure we can read the superblock */
79 for (d=0 ; d < info.array.raid_disks ; d++) {
80 mdu_disk_info_t disk;
81 char *dv;
82
83 disk.number = d;
84 if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
85 fprintf(stderr, Name ": cannot get device detail for device %d\n",
86 d);
87 return 1;
88 }
89 dv = map_dev(disk.major, disk.minor, 1);
90 if (!dv) {
91 fprintf(stderr, Name ": cannot find device file for device %d\n",
92 d);
93 return 1;
94 }
95 fd2 = dev_open(dv, O_RDWR);
96 if (!fd2) {
97 fprintf(stderr, Name ": cannot open device file %s\n", dv);
98 return 1;
99 }
100 st->ss->free_super(st);
101
102 if (st->ss->load_super(st, fd2, NULL)) {
103 fprintf(stderr, Name ": cannot find super block on %s\n", dv);
104 close(fd2);
105 return 1;
106 }
107 close(fd2);
108 }
109 /* Ok, looks good. Lets update the superblock and write it out to
110 * newdev.
111 */
112
113 info.disk.number = d;
114 info.disk.major = major(stb.st_rdev);
115 info.disk.minor = minor(stb.st_rdev);
116 info.disk.raid_disk = d;
117 info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
118 st->ss->update_super(st, &info, "linear-grow-new", newdev,
119 0, 0, NULL);
120
121 if (st->ss->store_super(st, nfd)) {
122 fprintf(stderr, Name ": Cannot store new superblock on %s\n",
123 newdev);
124 close(nfd);
125 return 1;
126 }
127 close(nfd);
128
129 if (ioctl(fd, ADD_NEW_DISK, &info.disk) != 0) {
130 fprintf(stderr, Name ": Cannot add new disk to this array\n");
131 return 1;
132 }
133 /* Well, that seems to have worked.
134 * Now go through and update all superblocks
135 */
136
137 if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
138 fprintf(stderr, Name ": cannot get array info for %s\n", devname);
139 return 1;
140 }
141
142 nd = d;
143 for (d=0 ; d < info.array.raid_disks ; d++) {
144 mdu_disk_info_t disk;
145 char *dv;
146
147 disk.number = d;
148 if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
149 fprintf(stderr, Name ": cannot get device detail for device %d\n",
150 d);
151 return 1;
152 }
153 dv = map_dev(disk.major, disk.minor, 1);
154 if (!dv) {
155 fprintf(stderr, Name ": cannot find device file for device %d\n",
156 d);
157 return 1;
158 }
159 fd2 = dev_open(dv, O_RDWR);
160 if (fd2 < 0) {
161 fprintf(stderr, Name ": cannot open device file %s\n", dv);
162 return 1;
163 }
164 if (st->ss->load_super(st, fd2, NULL)) {
165 fprintf(stderr, Name ": cannot find super block on %s\n", dv);
166 close(fd);
167 return 1;
168 }
169 info.array.raid_disks = nd+1;
170 info.array.nr_disks = nd+1;
171 info.array.active_disks = nd+1;
172 info.array.working_disks = nd+1;
173
174 st->ss->update_super(st, &info, "linear-grow-update", dv,
175 0, 0, NULL);
176
177 if (st->ss->store_super(st, fd2)) {
178 fprintf(stderr, Name ": Cannot store new superblock on %s\n", dv);
179 close(fd2);
180 return 1;
181 }
182 close(fd2);
183 }
184
185 return 0;
186 }
187
188 int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force)
189 {
190 /*
191 * First check that array doesn't have a bitmap
192 * Then create the bitmap
193 * Then add it
194 *
195 * For internal bitmaps, we need to check the version,
196 * find all the active devices, and write the bitmap block
197 * to all devices
198 */
199 mdu_bitmap_file_t bmf;
200 mdu_array_info_t array;
201 struct supertype *st;
202 int major = BITMAP_MAJOR_HI;
203 int vers = md_get_version(fd);
204 unsigned long long bitmapsize, array_size;
205
206 if (vers < 9003) {
207 major = BITMAP_MAJOR_HOSTENDIAN;
208 #ifdef __BIG_ENDIAN
209 fprintf(stderr, Name ": Warning - bitmaps created on this kernel are not portable\n"
210 " between different architectured. Consider upgrading the Linux kernel.\n");
211 #endif
212 }
213
214 if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
215 if (errno == ENOMEM)
216 fprintf(stderr, Name ": Memory allocation failure.\n");
217 else
218 fprintf(stderr, Name ": bitmaps not supported by this kernel.\n");
219 return 1;
220 }
221 if (bmf.pathname[0]) {
222 if (strcmp(file,"none")==0) {
223 if (ioctl(fd, SET_BITMAP_FILE, -1)!= 0) {
224 fprintf(stderr, Name ": failed to remove bitmap %s\n",
225 bmf.pathname);
226 return 1;
227 }
228 return 0;
229 }
230 fprintf(stderr, Name ": %s already has a bitmap (%s)\n",
231 devname, bmf.pathname);
232 return 1;
233 }
234 if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
235 fprintf(stderr, Name ": cannot get array status for %s\n", devname);
236 return 1;
237 }
238 if (array.state & (1<<MD_SB_BITMAP_PRESENT)) {
239 if (strcmp(file, "none")==0) {
240 array.state &= ~(1<<MD_SB_BITMAP_PRESENT);
241 if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
242 fprintf(stderr, Name ": failed to remove internal bitmap.\n");
243 return 1;
244 }
245 return 0;
246 }
247 fprintf(stderr, Name ": Internal bitmap already present on %s\n",
248 devname);
249 return 1;
250 }
251 if (array.level <= 0) {
252 fprintf(stderr, Name ": Bitmaps not meaningful with level %s\n",
253 map_num(pers, array.level)?:"of this array");
254 return 1;
255 }
256 bitmapsize = array.size;
257 bitmapsize <<= 1;
258 if (get_dev_size(fd, NULL, &array_size) &&
259 array_size > (0x7fffffffULL<<9)) {
260 /* Array is big enough that we cannot trust array.size
261 * try other approaches
262 */
263 bitmapsize = get_component_size(fd);
264 }
265 if (bitmapsize == 0) {
266 fprintf(stderr, Name ": Cannot reliably determine size of array to create bitmap - sorry.\n");
267 return 1;
268 }
269
270 if (array.level == 10) {
271 int ncopies = (array.layout&255)*((array.layout>>8)&255);
272 bitmapsize = bitmapsize * array.raid_disks / ncopies;
273 }
274
275 st = super_by_fd(fd);
276 if (!st) {
277 fprintf(stderr, Name ": Cannot understand version %d.%d\n",
278 array.major_version, array.minor_version);
279 return 1;
280 }
281 if (strcmp(file, "none") == 0) {
282 fprintf(stderr, Name ": no bitmap found on %s\n", devname);
283 return 1;
284 } else if (strcmp(file, "internal") == 0) {
285 int d;
286 for (d=0; d< st->max_devs; d++) {
287 mdu_disk_info_t disk;
288 char *dv;
289 disk.number = d;
290 if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
291 continue;
292 if (disk.major == 0 &&
293 disk.minor == 0)
294 continue;
295 if ((disk.state & (1<<MD_DISK_SYNC))==0)
296 continue;
297 dv = map_dev(disk.major, disk.minor, 1);
298 if (dv) {
299 int fd2 = dev_open(dv, O_RDWR);
300 if (fd2 < 0)
301 continue;
302 if (st->ss->load_super(st, fd2, NULL)==0) {
303 if (st->ss->add_internal_bitmap(
304 st,
305 &chunk, delay, write_behind,
306 bitmapsize, 0, major)
307 )
308 st->ss->write_bitmap(st, fd2);
309 else {
310 fprintf(stderr, Name ": failed to create internal bitmap - chunksize problem.\n");
311 close(fd2);
312 return 1;
313 }
314 }
315 close(fd2);
316 }
317 }
318 array.state |= (1<<MD_SB_BITMAP_PRESENT);
319 if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
320 fprintf(stderr, Name ": failed to set internal bitmap.\n");
321 return 1;
322 }
323 } else {
324 int uuid[4];
325 int bitmap_fd;
326 int d;
327 int max_devs = st->max_devs;
328
329 /* try to load a superblock */
330 for (d=0; d<max_devs; d++) {
331 mdu_disk_info_t disk;
332 char *dv;
333 int fd2;
334 disk.number = d;
335 if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
336 continue;
337 if ((disk.major==0 && disk.minor==0) ||
338 (disk.state & (1<<MD_DISK_REMOVED)))
339 continue;
340 dv = map_dev(disk.major, disk.minor, 1);
341 if (!dv) continue;
342 fd2 = dev_open(dv, O_RDONLY);
343 if (fd2 >= 0 &&
344 st->ss->load_super(st, fd2, NULL) == 0) {
345 close(fd2);
346 st->ss->uuid_from_super(st, uuid);
347 break;
348 }
349 close(fd2);
350 }
351 if (d == max_devs) {
352 fprintf(stderr, Name ": cannot find UUID for array!\n");
353 return 1;
354 }
355 if (CreateBitmap(file, force, (char*)uuid, chunk,
356 delay, write_behind, bitmapsize, major)) {
357 return 1;
358 }
359 bitmap_fd = open(file, O_RDWR);
360 if (bitmap_fd < 0) {
361 fprintf(stderr, Name ": weird: %s cannot be opened\n",
362 file);
363 return 1;
364 }
365 if (ioctl(fd, SET_BITMAP_FILE, bitmap_fd) < 0) {
366 fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
367 devname, strerror(errno));
368 return 1;
369 }
370 }
371
372 return 0;
373 }
374
375
376 /*
377 * When reshaping an array we might need to backup some data.
378 * This is written to all spares with a 'super_block' describing it.
379 * The superblock goes 1K form the end of the used space on the
380 * device.
381 * It if written after the backup is complete.
382 * It has the following structure.
383 */
384
385 struct mdp_backup_super {
386 char magic[16]; /* md_backup_data-1 */
387 __u8 set_uuid[16];
388 __u64 mtime;
389 /* start/sizes in 512byte sectors */
390 __u64 devstart;
391 __u64 arraystart;
392 __u64 length;
393 __u32 sb_csum; /* csum of preceeding bytes. */
394 __u8 pad[512-68];
395 } __attribute__((aligned(512))) bsb;
396
397 int bsb_csum(char *buf, int len)
398 {
399 int i;
400 int csum = 0;
401 for (i=0; i<len; i++)
402 csum = (csum<<3) + buf[0];
403 return __cpu_to_le32(csum);
404 }
405
406 int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
407 long long size,
408 int level, int layout, int chunksize, int raid_disks)
409 {
410 /* Make some changes in the shape of an array.
411 * The kernel must support the change.
412 * Different reshapes have subtly different meaning for different
413 * levels, so we need to check the current state of the array
414 * and go from there.
415 */
416 struct mdu_array_info_s array;
417 char *c;
418
419 struct supertype *st;
420
421 int nlevel, olevel;
422 int nchunk, ochunk;
423 int nlayout, olayout;
424 int ndisks, odisks;
425 int ndata, odata;
426 unsigned long long nstripe, ostripe, last_block;
427 int *fdlist;
428 unsigned long long *offsets;
429 int d, i, spares;
430 int nrdisks;
431 int err;
432
433 struct mdinfo *sra;
434 struct mdinfo *sd;
435
436 if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
437 fprintf(stderr, Name ": %s is not an active md array - aborting\n",
438 devname);
439 return 1;
440 }
441 c = map_num(pers, array.level);
442 if (c == NULL) c = "-unknown-";
443 switch(array.level) {
444 default: /* raid0, linear, multipath cannot be reconfigured */
445 fprintf(stderr, Name ": %s array %s cannot be reshaped.\n",
446 c, devname);
447 return 1;
448
449 case LEVEL_FAULTY: /* only 'layout' change is permitted */
450
451 if (size >= 0) {
452 fprintf(stderr, Name ": %s: Cannot change size of a 'faulty' array\n",
453 devname);
454 return 1;
455 }
456 if (level != UnSet && level != LEVEL_FAULTY) {
457 fprintf(stderr, Name ": %s: Cannot change RAID level of a 'faulty' array\n",
458 devname);
459 return 1;
460 }
461 if (chunksize || raid_disks) {
462 fprintf(stderr, Name ": %s: Cannot change chunksize or disks of a 'faulty' array\n",
463 devname);
464 return 1;
465 }
466 if (layout == UnSet)
467 return 0; /* nothing to do.... */
468
469 array.layout = layout;
470 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
471 fprintf(stderr, Name ": Cannot set layout for %s: %s\n",
472 devname, strerror(errno));
473 return 1;
474 }
475 if (!quiet)
476 printf("layout for %s set to %d\n", devname, array.layout);
477 return 0;
478
479 case 1: /* raid_disks and size can each be changed. They are independant */
480
481 if (level != UnSet && level != 1) {
482 fprintf(stderr, Name ": %s: Cannot change RAID level of a RAID1 array.\n",
483 devname);
484 return 1;
485 }
486 if (chunksize || layout != UnSet) {
487 fprintf(stderr, Name ": %s: Cannot change chunk size of layout for a RAID1 array.\n",
488 devname);
489 return 1;
490 }
491
492 /* Each can trigger a resync/recovery which will block the
493 * other from happening. Later we could block
494 * resync for the duration via 'sync_action'...
495 */
496 if (raid_disks > 0) {
497 array.raid_disks = raid_disks;
498 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
499 fprintf(stderr, Name ": Cannot set raid-devices for %s: %s\n",
500 devname, strerror(errno));
501 return 1;
502 }
503 }
504 if (size >= 0) {
505 array.size = size;
506 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
507 fprintf(stderr, Name ": Cannot set device size for %s: %s\n",
508 devname, strerror(errno));
509 return 1;
510 }
511 }
512 return 0;
513
514 case 4:
515 case 5:
516 case 6:
517 st = super_by_fd(fd);
518
519 /* size can be changed independently.
520 * layout/chunksize/raid_disks/level can be changed
521 * though the kernel may not support it all.
522 * If 'suspend_lo' is not present in devfs, then
523 * these cannot be changed.
524 */
525 if (size >= 0) {
526 /* Cannot change other details as well.. */
527 if (layout != UnSet ||
528 chunksize != 0 ||
529 raid_disks != 0 ||
530 level != UnSet) {
531 fprintf(stderr, Name ": %s: Cannot change shape as well as size of a %s array.\n",
532 devname, c);
533 return 1;
534 }
535 array.size = size;
536 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
537 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
538 devname, strerror(errno));
539 return 1;
540 }
541 return 0;
542 }
543 /* Ok, just change the shape. This can be awkward.
544 * There are three possibilities.
545 * 1/ The array will shrink. We don't support this
546 * possibility. Maybe one day...
547 * 2/ The array will not change size. This is easy enough
548 * to do, but not reliably. If the process is aborted
549 * the array *will* be corrupted. So maybe we can allow
550 * this but only if the user is really certain. e.g.
551 * --really-risk-everything
552 * 3/ The array will grow. This can be reliably achieved.
553 * However the kernel's restripe routines will cheerfully
554 * overwrite some early data before it is safe. So we
555 * need to make a backup of the early parts of the array
556 * and be ready to restore it if rebuild aborts very early.
557 *
558 * We backup data by writing it to all spares (there must be
559 * at least 1, so even raid6->raid5 requires a spare to be
560 * present).
561 *
562 * So: we enumerate the devices in the array and
563 * make sure we can open all of them.
564 * Then we freeze the early part of the array and
565 * backup to the various spares.
566 * Then we request changes and start the reshape.
567 * Monitor progress until it has passed the danger zone.
568 * and finally invalidate the copied data and unfreeze the
569 * start of the array.
570 *
571 * Before we can do this we need to decide:
572 * - will the array grow? Just calculate size
573 * - how much needs to be saved: count stripes.
574 * - where to save data... good question.
575 *
576 */
577 nlevel = olevel = array.level;
578 nchunk = ochunk = array.chunk_size;
579 nlayout = olayout = array.layout;
580 ndisks = odisks = array.raid_disks;
581
582 if (level != UnSet) nlevel = level;
583 if (chunksize) nchunk = chunksize;
584 if (layout != UnSet) nlayout = layout;
585 if (raid_disks) ndisks = raid_disks;
586
587 odata = odisks-1;
588 if (olevel == 6) odata--; /* number of data disks */
589 ndata = ndisks-1;
590 if (nlevel == 6) ndata--;
591
592 if (ndata < odata) {
593 fprintf(stderr, Name ": %s: Cannot reduce number of data disks (yet).\n",
594 devname);
595 return 1;
596 }
597 if (ndata == odata) {
598 fprintf(stderr, Name ": %s: Cannot reshape array without increasing size (yet).\n",
599 devname);
600 return 1;
601 }
602 /* Well, it is growing... so how much do we need to backup.
603 * Need to backup a full number of new-stripes, such that the
604 * last one does not over-write any place that it would be read
605 * from
606 */
607 nstripe = ostripe = 0;
608 while (nstripe >= ostripe) {
609 nstripe += nchunk/512;
610 last_block = nstripe * ndata;
611 ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
612 }
613 fprintf(stderr, Name ": Need to backup %lluK of critical "
614 "section..\n", last_block/2);
615
616 sra = sysfs_read(fd, 0,
617 GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
618 GET_CACHE);
619 if (!sra) {
620 fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
621 devname);
622 return 1;
623 }
624
625 if (last_block >= sra->component_size/2) {
626 fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n",
627 devname);
628 return 1;
629 }
630 if (sra->array.spare_disks == 0 && backup_file == NULL) {
631 fprintf(stderr, Name ": %s: Cannot grow - need a spare or backup-file to backup critical section\n",
632 devname);
633 return 1;
634 }
635
636 nrdisks = array.nr_disks + sra->array.spare_disks;
637 /* Now we need to open all these devices so we can read/write.
638 */
639 fdlist = malloc((1+nrdisks) * sizeof(int));
640 offsets = malloc((1+nrdisks) * sizeof(offsets[0]));
641 if (!fdlist || !offsets) {
642 fprintf(stderr, Name ": malloc failed: grow aborted\n");
643 return 1;
644 }
645 for (d=0; d <= nrdisks; d++)
646 fdlist[d] = -1;
647 d = array.raid_disks;
648 for (sd = sra->devs; sd; sd=sd->next) {
649 if (sd->disk.state & (1<<MD_DISK_FAULTY))
650 continue;
651 if (sd->disk.state & (1<<MD_DISK_SYNC)) {
652 char *dn = map_dev(sd->disk.major,
653 sd->disk.minor, 1);
654 fdlist[sd->disk.raid_disk]
655 = dev_open(dn, O_RDONLY);
656 offsets[sd->disk.raid_disk] = sd->data_offset;
657 if (fdlist[sd->disk.raid_disk] < 0) {
658 fprintf(stderr, Name ": %s: cannot open component %s\n",
659 devname, dn?dn:"-unknown-");
660 goto abort;
661 }
662 } else {
663 /* spare */
664 char *dn = map_dev(sd->disk.major,
665 sd->disk.minor, 1);
666 fdlist[d] = dev_open(dn, O_RDWR);
667 offsets[d] = sd->data_offset;
668 if (fdlist[d]<0) {
669 fprintf(stderr, Name ": %s: cannot open component %s\n",
670 devname, dn?dn:"-unknown");
671 goto abort;
672 }
673 d++;
674 }
675 }
676 for (i=0 ; i<array.raid_disks; i++)
677 if (fdlist[i] < 0) {
678 fprintf(stderr, Name ": %s: failed to find device %d. Array might be degraded.\n"
679 " --grow aborted\n", devname, i);
680 goto abort;
681 }
682 spares = sra->array.spare_disks;
683 if (backup_file) {
684 fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, S_IRUSR | S_IWUSR);
685 if (fdlist[d] < 0) {
686 fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
687 devname, backup_file, strerror(errno));
688 goto abort;
689 }
690 offsets[d] = 8;
691 d++;
692 spares++;
693 }
694 if (fdlist[array.raid_disks] < 0) {
695 fprintf(stderr, Name ": %s: failed to find a spare and no backup-file given - --grow aborted\n",
696 devname);
697 goto abort;
698 }
699
700 /* Find a superblock */
701 if (st->ss->load_super(st, fdlist[0], NULL)) {
702 fprintf(stderr, Name ": %s: Cannot find a superblock\n",
703 devname);
704 goto abort;
705 }
706
707
708 memcpy(bsb.magic, "md_backup_data-1", 16);
709 st->ss->uuid_from_super(st, (int*)&bsb.set_uuid);
710 bsb.mtime = __cpu_to_le64(time(0));
711 bsb.arraystart = 0;
712 bsb.length = __cpu_to_le64(last_block);
713
714 /* Decide offset for the backup, llseek the spares, and write
715 * a leading superblock 4K earlier.
716 */
717 for (i=array.raid_disks; i<d; i++) {
718 char abuf[4096+512];
719 char *buf = (char*)(((unsigned long)abuf+511)& ~511);
720 if (i==d-1 && backup_file) {
721 /* This is the backup file */
722 offsets[i] = 8;
723 } else
724 offsets[i] += sra->component_size - last_block - 8;
725 if (lseek64(fdlist[i], (offsets[i]<<9) - 4096, 0)
726 != (offsets[i]<<9) - 4096) {
727 fprintf(stderr, Name ": could not seek...\n");
728 goto abort;
729 }
730 memset(buf, 0, 4096);
731 bsb.devstart = __cpu_to_le64(offsets[i]);
732 bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
733 memcpy(buf, &bsb, sizeof(bsb));
734 if (write(fdlist[i], buf, 4096) != 4096) {
735 fprintf(stderr, Name ": could not write leading superblock\n");
736 goto abort;
737 }
738 }
739 array.level = nlevel;
740 array.raid_disks = ndisks;
741 array.chunk_size = nchunk;
742 array.layout = nlayout;
743 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
744 if (errno == ENOSPC) {
745 /* stripe cache is not big enough.
746 * It needs to be 4 times chunksize_size,
747 * and we assume pagesize is 4K
748 */
749 if (sra->cache_size < 4 * (nchunk/4096)) {
750 sysfs_set_num(sra, NULL,
751 "stripe_cache_size",
752 4 * (nchunk/4096) +1);
753 if (ioctl(fd, SET_ARRAY_INFO,
754 &array) == 0)
755 goto ok;
756 }
757 }
758 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
759 devname, strerror(errno));
760 goto abort;
761 }
762 ok: ;
763
764 /* suspend the relevant region */
765 sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */
766 if (sysfs_set_num(sra, NULL, "suspend_lo", 0) < 0 ||
767 sysfs_set_num(sra, NULL, "suspend_hi", last_block) < 0) {
768 fprintf(stderr, Name ": %s: failed to suspend device.\n",
769 devname);
770 goto abort_resume;
771 }
772
773
774 err = save_stripes(fdlist, offsets,
775 odisks, ochunk, olevel, olayout,
776 spares, fdlist+odisks,
777 0ULL, last_block*512);
778
779 /* abort if there was an error */
780 if (err < 0) {
781 fprintf(stderr, Name ": %s: failed to save critical region\n",
782 devname);
783 goto abort_resume;
784 }
785
786 for (i=odisks; i<d ; i++) {
787 bsb.devstart = __cpu_to_le64(offsets[i]);
788 bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
789 if (lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0) < 0 ||
790 write(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb) ||
791 fsync(fdlist[i]) != 0) {
792 fprintf(stderr, Name ": %s: failed to save metadata for critical region backups.\n",
793 devname);
794 goto abort_resume;
795 }
796 }
797
798 /* start the reshape happening */
799 if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) {
800 fprintf(stderr, Name ": %s: failed to initiate reshape\n",
801 devname);
802 goto abort_resume;
803 }
804 /* wait for reshape to pass the critical region */
805 while(1) {
806 unsigned long long comp;
807
808 if (sysfs_get_ll(sra, NULL, "sync_completed", &comp)<0) {
809 sleep(5);
810 break;
811 }
812 if (comp >= nstripe)
813 break;
814 if (comp == 0) {
815 /* Maybe it finished already */
816 char action[20];
817 if (sysfs_get_str(sra, NULL, "sync_action",
818 action, 20) > 0 &&
819 strncmp(action, "reshape", 7) != 0)
820 break;
821 }
822 sleep(1);
823 }
824
825 /* invalidate superblocks */
826 memset(&bsb, 0, sizeof(bsb));
827 for (i=odisks; i<d ; i++) {
828 lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
829 if (write(fdlist[i], &bsb, sizeof(bsb)) < 0) {
830 fprintf(stderr, Name ": %s: failed to invalidate metadata for raid disk %d\n",
831 devname, i);
832 }
833 }
834
835 /* unsuspend. */
836 sysfs_set_num(sra, NULL, "suspend_lo", last_block);
837
838 for (i=0; i<d; i++)
839 if (fdlist[i] >= 0)
840 close(fdlist[i]);
841 free(fdlist);
842 free(offsets);
843 if (backup_file)
844 unlink(backup_file);
845
846 fprintf(stderr, Name ": ... critical section passed.\n");
847 break;
848 }
849 return 0;
850
851
852 abort_resume:
853 sysfs_set_num(sra, NULL, "suspend_lo", last_block);
854 abort:
855 for (i=0; i<array.nr_disks; i++)
856 if (fdlist[i] >= 0)
857 close(fdlist[i]);
858 free(fdlist);
859 free(offsets);
860 if (backup_file)
861 unlink(backup_file);
862 return 1;
863
864 }
865
866 /*
867 * If any spare contains md_back_data-1 which is recent wrt mtime,
868 * write that data into the array and update the super blocks with
869 * the new reshape_progress
870 */
871 int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt, char *backup_file)
872 {
873 int i, j;
874 int old_disks;
875 unsigned long long *offsets;
876 unsigned long long nstripe, ostripe, last_block;
877 int ndata, odata;
878
879 if (info->delta_disks < 0)
880 return 1; /* cannot handle a shrink */
881 if (info->new_level != info->array.level ||
882 info->new_layout != info->array.layout ||
883 info->new_chunk != info->array.chunk_size)
884 return 1; /* Can only handle change in disks */
885
886 old_disks = info->array.raid_disks - info->delta_disks;
887
888 for (i=old_disks-(backup_file?1:0); i<cnt; i++) {
889 struct mdinfo dinfo;
890 char buf[4096];
891 int fd;
892
893 /* This was a spare and may have some saved data on it.
894 * Load the superblock, find and load the
895 * backup_super_block.
896 * If either fail, go on to next device.
897 * If the backup contains no new info, just return
898 * else restore data and update all superblocks
899 */
900 if (i == old_disks-1) {
901 fd = open(backup_file, O_RDONLY);
902 if (fd<0)
903 continue;
904 } else {
905 fd = fdlist[i];
906 if (fd < 0)
907 continue;
908 if (st->ss->load_super(st, fd, NULL))
909 continue;
910
911 st->ss->getinfo_super(st, &dinfo);
912 st->ss->free_super(st);
913
914 if (lseek64(fd,
915 (dinfo.data_offset + dinfo.component_size - 8) <<9,
916 0) < 0)
917 continue; /* Cannot seek */
918 }
919 if (read(fd, &bsb, sizeof(bsb)) != sizeof(bsb))
920 continue; /* Cannot read */
921 if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0)
922 continue;
923 if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)))
924 continue; /* bad checksum */
925 if (memcmp(bsb.set_uuid,info->uuid, 16) != 0)
926 continue; /* Wrong uuid */
927
928 if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 ||
929 info->array.utime < __le64_to_cpu(bsb.mtime))
930 continue; /* time stamp is too bad */
931
932 if (__le64_to_cpu(bsb.arraystart) != 0)
933 continue; /* Can only handle backup from start of array */
934 if (__le64_to_cpu(bsb.length) <
935 info->reshape_progress)
936 continue; /* No new data here */
937
938 if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0)
939 continue; /* Cannot seek */
940 /* There should be a duplicate backup superblock 4k before here */
941 if (lseek64(fd, -4096, 1) < 0 ||
942 read(fd, buf, 4096) != 4096 ||
943 memcmp(buf, &bsb, sizeof(bsb)) != 0)
944 continue; /* Cannot find leading superblock */
945
946 /* Now need the data offsets for all devices. */
947 offsets = malloc(sizeof(*offsets)*info->array.raid_disks);
948 for(j=0; j<info->array.raid_disks; j++) {
949 if (fdlist[j] < 0)
950 continue;
951 if (st->ss->load_super(st, fdlist[j], NULL))
952 /* FIXME should be this be an error */
953 continue;
954 st->ss->getinfo_super(st, &dinfo);
955 st->ss->free_super(st);
956 offsets[j] = dinfo.data_offset;
957 }
958 printf(Name ": restoring critical section\n");
959
960 if (restore_stripes(fdlist, offsets,
961 info->array.raid_disks,
962 info->new_chunk,
963 info->new_level,
964 info->new_layout,
965 fd, __le64_to_cpu(bsb.devstart)*512,
966 0, __le64_to_cpu(bsb.length)*512)) {
967 /* didn't succeed, so giveup */
968 return 1;
969 }
970
971 /* Ok, so the data is restored. Let's update those superblocks. */
972
973 for (j=0; j<info->array.raid_disks; j++) {
974 if (fdlist[j] < 0) continue;
975 if (st->ss->load_super(st, fdlist[j], NULL))
976 continue;
977 st->ss->getinfo_super(st, &dinfo);
978 dinfo.reshape_progress = __le64_to_cpu(bsb.length);
979 st->ss->update_super(st, &dinfo,
980 "_reshape_progress",
981 NULL,0, 0, NULL);
982 st->ss->store_super(st, fdlist[j]);
983 st->ss->free_super(st);
984 }
985
986 /* And we are done! */
987 return 0;
988 }
989 /* Didn't find any backup data, try to see if any
990 * was needed.
991 */
992 nstripe = ostripe = 0;
993 odata = info->array.raid_disks - info->delta_disks - 1;
994 if (info->array.level == 6) odata--; /* number of data disks */
995 ndata = info->array.raid_disks - 1;
996 if (info->new_level == 6) ndata--;
997 last_block = 0;
998 while (nstripe >= ostripe) {
999 nstripe += info->new_chunk / 512;
1000 last_block = nstripe * ndata;
1001 ostripe = last_block / odata / (info->array.chunk_size/512) *
1002 (info->array.chunk_size/512);
1003 }
1004
1005 if (info->reshape_progress >= last_block)
1006 return 0;
1007 /* needed to recover critical section! */
1008 return 1;
1009 }