Create missing /dev files where needed.
[thirdparty/mdadm.git] / Grow.c
1 /*
2  * mdadm - manage Linux "md" devices aka RAID arrays.
3  *
4  * Copyright (C) 2001-2004 Neil Brown <neilb@cse.unsw.edu.au>
5  *
6  *
7  *    This program is free software; you can redistribute it and/or modify
8  *    it under the terms of the GNU General Public License as published by
9  *    the Free Software Foundation; either version 2 of the License, or
10  *    (at your option) any later version.
11  *
12  *    This program is distributed in the hope that it will be useful,
13  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *    GNU General Public License for more details.
16  *
17  *    You should have received a copy of the GNU General Public License
18  *    along with this program; if not, write to the Free Software
19  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  *
21  *    Author: Neil Brown
22  *    Email: <neilb@cse.unsw.edu.au>
23  *    Paper: Neil Brown
24  *           School of Computer Science and Engineering
25  *           The University of New South Wales
26  *           Sydney, 2052
27  *           Australia
28  */
29 #include        "mdadm.h"
30 #include        "dlink.h"
31
32 #if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
33 #error no endian defined
34 #endif
35 #include        "md_u.h"
36 #include        "md_p.h"
37
38 int Grow_Add_device(char *devname, int fd, char *newdev)
39 {
40         /* Add a device to an active array.
41          * Currently, just extend a linear array.
42          * This requires writing a new superblock on the
43          * new device, calling the kernel to add the device,
44          * and if that succeeds, update the superblock on
45          * all other devices.
46          * This means that we need to *find* all other devices.
47          */
48         struct mdinfo info;
49
50         void *super = NULL;
51         struct stat stb;
52         int nfd, fd2;
53         int d, nd;
54         struct supertype *st = NULL;
55         
56
57         if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
58                 fprintf(stderr, Name ": cannot get array info for %s\n", devname);
59                 return 1;
60         }
61
62         st = super_by_version(info.array.major_version, info.array.minor_version);
63         if (!st) {
64                 fprintf(stderr, Name ": cannot handle arrays with superblock version %d\n", info.array.major_version);
65                 return 1;
66         }
67
68         if (info.array.level != -1) {
69                 fprintf(stderr, Name ": can only add devices to linear arrays\n");
70                 return 1;
71         }
72
73         nfd = open(newdev, O_RDWR|O_EXCL);
74         if (nfd < 0) {
75                 fprintf(stderr, Name ": cannot open %s\n", newdev);
76                 return 1;
77         }
78         fstat(nfd, &stb);
79         if ((stb.st_mode & S_IFMT) != S_IFBLK) {
80                 fprintf(stderr, Name ": %s is not a block device!\n", newdev);
81                 close(nfd);
82                 return 1;
83         }
84         /* now check out all the devices and make sure we can read the superblock */
85         for (d=0 ; d < info.array.raid_disks ; d++) {
86                 mdu_disk_info_t disk;
87                 char *dv;
88
89                 disk.number = d;
90                 if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
91                         fprintf(stderr, Name ": cannot get device detail for device %d\n",
92                                 d);
93                         return 1;
94                 }
95                 dv = map_dev(disk.major, disk.minor, 1);
96                 if (!dv) {
97                         fprintf(stderr, Name ": cannot find device file for device %d\n",
98                                 d);
99                         return 1;
100                 }
101                 fd2 = dev_open(dv, O_RDWR);
102                 if (!fd2) {
103                         fprintf(stderr, Name ": cannot open device file %s\n", dv);
104                         return 1;
105                 }
106                 if (super) free(super);
107                 super= NULL;
108                 if (st->ss->load_super(st, fd2, &super, NULL)) {
109                         fprintf(stderr, Name ": cannot find super block on %s\n", dv);
110                         close(fd2);
111                         return 1;
112                 }
113                 close(fd2);
114         }
115         /* Ok, looks good. Lets update the superblock and write it out to
116          * newdev.
117          */
118         
119         info.disk.number = d;
120         info.disk.major = major(stb.st_rdev);
121         info.disk.minor = minor(stb.st_rdev);
122         info.disk.raid_disk = d;
123         info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
124         st->ss->update_super(&info, super, "grow", newdev, 0);
125
126         if (st->ss->store_super(st, nfd, super)) {
127                 fprintf(stderr, Name ": Cannot store new superblock on %s\n", newdev);
128                 close(nfd);
129                 return 1;
130         }
131         close(nfd);
132
133         if (ioctl(fd, ADD_NEW_DISK, &info.disk) != 0) {
134                 fprintf(stderr, Name ": Cannot add new disk to this array\n");
135                 return 1;
136         }
137         /* Well, that seems to have worked.
138          * Now go through and update all superblocks
139          */
140
141         if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
142                 fprintf(stderr, Name ": cannot get array info for %s\n", devname);
143                 return 1;
144         }
145
146         nd = d;
147         for (d=0 ; d < info.array.raid_disks ; d++) {
148                 mdu_disk_info_t disk;
149                 char *dv;
150
151                 disk.number = d;
152                 if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
153                         fprintf(stderr, Name ": cannot get device detail for device %d\n",
154                                 d);
155                         return 1;
156                 }
157                 dv = map_dev(disk.major, disk.minor, 1);
158                 if (!dv) {
159                         fprintf(stderr, Name ": cannot find device file for device %d\n",
160                                 d);
161                         return 1;
162                 }
163                 fd2 = dev_open(dv, O_RDWR);
164                 if (fd2 < 0) {
165                         fprintf(stderr, Name ": cannot open device file %s\n", dv);
166                         return 1;
167                 }
168                 if (st->ss->load_super(st, fd2, &super, NULL)) {
169                         fprintf(stderr, Name ": cannot find super block on %s\n", dv);
170                         close(fd);
171                         return 1;
172                 }
173                 info.array.raid_disks = nd+1;
174                 info.array.nr_disks = nd+1;
175                 info.array.active_disks = nd+1;
176                 info.array.working_disks = nd+1;
177                 info.disk.number = nd;
178                 info.disk.major = major(stb.st_rdev);
179                 info.disk.minor = minor(stb.st_rdev);
180                 info.disk.raid_disk = nd;
181                 info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
182                 st->ss->update_super(&info, super, "grow", dv, 0);
183                 
184                 if (st->ss->store_super(st, fd2, super)) {
185                         fprintf(stderr, Name ": Cannot store new superblock on %s\n", dv);
186                         close(fd2);
187                         return 1;
188                 }
189                 close(fd2);
190         }
191
192         return 0;
193 }
194
195 int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force)
196 {
197         /*
198          * First check that array doesn't have a bitmap
199          * Then create the bitmap
200          * Then add it
201          *
202          * For internal bitmaps, we need to check the version,
203          * find all the active devices, and write the bitmap block
204          * to all devices
205          */
206         mdu_bitmap_file_t bmf;
207         mdu_array_info_t array;
208         struct supertype *st;
209         int major = BITMAP_MAJOR_HI;
210         int vers = md_get_version(fd);
211         unsigned long long bitmapsize, array_size;
212
213         if (vers < 9003) {
214                 major = BITMAP_MAJOR_HOSTENDIAN;
215 #ifdef __BIG_ENDIAN
216                 fprintf(stderr, Name ": Warning - bitmaps created on this kernel are not portable\n"
217                         "  between different architectured.  Consider upgrading the Linux kernel.\n");
218 #endif
219         }
220
221         if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
222                 if (errno == ENOMEM)
223                         fprintf(stderr, Name ": Memory allocation failure.\n");
224                 else
225                         fprintf(stderr, Name ": bitmaps not supported by this kernel.\n");
226                 return 1;
227         }
228         if (bmf.pathname[0]) {
229                 if (strcmp(file,"none")==0) {
230                         if (ioctl(fd, SET_BITMAP_FILE, -1)!= 0) {
231                                 fprintf(stderr, Name ": failed to remove bitmap %s\n",
232                                         bmf.pathname);
233                                 return 1;
234                         }
235                         return 0;
236                 }
237                 fprintf(stderr, Name ": %s already has a bitmap (%s)\n",
238                         devname, bmf.pathname);
239                 return 1;
240         }
241         if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
242                 fprintf(stderr, Name ": cannot get array status for %s\n", devname);
243                 return 1;
244         }
245         if (array.state & (1<<MD_SB_BITMAP_PRESENT)) {
246                 if (strcmp(file, "none")==0) {
247                         array.state &= ~(1<<MD_SB_BITMAP_PRESENT);
248                         if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
249                                 fprintf(stderr, Name ": failed to remove internal bitmap.\n");
250                                 return 1;
251                         }
252                         return 0;
253                 }
254                 fprintf(stderr, Name ": Internal bitmap already present on %s\n",
255                         devname);
256                 return 1;
257         }
258         bitmapsize = array.size;
259         bitmapsize <<= 1;
260 #ifdef BLKGETSIZE64
261         if (ioctl(fd, BLKGETSIZE64, &array_size) == 0 &&
262             array_size > (0x7fffffffULL<<9)) {
263                 /* Array is big enough that we cannot trust array.size
264                  * try other approaches
265                  */
266                 bitmapsize = get_component_size(fd);
267         }
268 #endif
269         if (bitmapsize == 0) {
270                 fprintf(stderr, Name ": Cannot reliably determine size of array to create bitmap - sorry.\n");
271                 return 1;
272         }
273
274         if (array.level == 10) {
275                 int ncopies = (array.layout&255)*(array.layout>>8);
276                 bitmapsize = bitmapsize * array.raid_disks / ncopies;
277         }
278
279         st = super_by_version(array.major_version, array.minor_version);
280         if (!st) {
281                 fprintf(stderr, Name ": Cannot understand version %d.%d\n",
282                         array.major_version, array.minor_version);
283                 return 1;
284         }
285         if (strcmp(file, "none") == 0) {
286                 fprintf(stderr, Name ": no bitmap found on %s\n", devname);
287                 return 1;
288         } else if (strcmp(file, "internal") == 0) {
289                 int d;
290                 for (d=0; d< st->max_devs; d++) {
291                         mdu_disk_info_t disk;
292                         char *dv;
293                         disk.number = d;
294                         if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
295                                 continue;
296                         if (disk.major == 0 &&
297                             disk.minor == 0)
298                                 continue;
299                         if ((disk.state & (1<<MD_DISK_SYNC))==0)
300                                 continue;
301                         dv = map_dev(disk.major, disk.minor, 1);
302                         if (dv) {
303                                 void *super;
304                                 int fd2 = dev_open(dv, O_RDWR);
305                                 if (fd2 < 0)
306                                         continue;
307                                 if (st->ss->load_super(st, fd2, &super, NULL)==0) {
308                                         if (st->ss->add_internal_bitmap(st, super,
309                                                                         chunk, delay, write_behind,
310                                                                         bitmapsize, 0, major))
311                                                 st->ss->write_bitmap(st, fd2, super);
312                                         else {
313                                                 fprintf(stderr, Name ": failed to create internal bitmap - chunksize problem.\n");
314                                                 close(fd2);
315                                                 return 1;
316                                         }
317                                 }
318                                 close(fd2);
319                         }
320                 }
321                 array.state |= (1<<MD_SB_BITMAP_PRESENT);
322                 if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
323                         fprintf(stderr, Name ": failed to set internal bitmap.\n");
324                         return 1;
325                 }
326         } else {
327                 int uuid[4];
328                 int bitmap_fd;
329                 int d;
330                 int max_devs = st->max_devs;
331                 void *super = NULL;
332                 if (chunk == UnSet)
333                         chunk = DEFAULT_BITMAP_CHUNK;
334
335                 /* try to load a superblock */
336                 for (d=0; d<max_devs; d++) {
337                         mdu_disk_info_t disk;
338                         char *dv;
339                         int fd2;
340                         disk.number = d;
341                         if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
342                                 continue;
343                         if ((disk.major==0 && disk.minor==0) ||
344                             (disk.state & (1<<MD_DISK_REMOVED)))
345                                 continue;
346                         dv = map_dev(disk.major, disk.minor, 1);
347                         if (!dv) continue;
348                         fd2 = dev_open(dv, O_RDONLY);
349                         if (fd2 >= 0 &&
350                             st->ss->load_super(st, fd2, &super, NULL) == 0) {
351                                 close(fd2);
352                                 st->ss->uuid_from_super(uuid, super);
353                                 break;
354                         }
355                         close(fd2);
356                 }
357                 if (d == max_devs) {
358                         fprintf(stderr, Name ": cannot find UUID for array!\n");
359                         return 1;
360                 }
361                 if (CreateBitmap(file, force, (char*)uuid, chunk,
362                                  delay, write_behind, bitmapsize, major)) {
363                         return 1;
364                 }
365                 bitmap_fd = open(file, O_RDWR);
366                 if (bitmap_fd < 0) {
367                         fprintf(stderr, Name ": weird: %s cannot be opened\n",
368                                 file);
369                         return 1;
370                 }
371                 if (ioctl(fd, SET_BITMAP_FILE, bitmap_fd) < 0) {
372                         fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
373                                 devname, strerror(errno));
374                         return 1;
375                 }
376         }
377
378         return 0;
379 }
380
381
382 /*
383  * When reshaping an array we might need to backup some data.
384  * This is written to all spares with a 'super_block' describing it.
385  * The superblock goes 1K form the end of the used space on the
386  * device.
387  * It if written after the backup is complete.
388  * It has the following structure.
389  */
390
391 struct mdp_backup_super {
392         char    magic[16];  /* md_backup_data-1 */
393         __u8    set_uuid[16];
394         __u64   mtime;
395         /* start/sizes in 512byte sectors */
396         __u64   devstart;
397         __u64   arraystart;
398         __u64   length;
399         __u32   sb_csum;        /* csum of preceeding bytes. */
400 };
401
402 int bsb_csum(char *buf, int len)
403 {
404         int i;
405         int csum = 0;
406         for (i=0; i<len; i++)
407                 csum = (csum<<3) + buf[0];
408         return __cpu_to_le32(csum);
409 }
410
411 int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
412                  long long size,
413                  int level, int layout, int chunksize, int raid_disks)
414 {
415         /* Make some changes in the shape of an array.
416          * The kernel must support the change.
417          * Different reshapes have subtly different meaning for different
418          * levels, so we need to check the current state of the array
419          * and go from there.
420          */
421         struct mdu_array_info_s array;
422         char *c;
423
424         struct mdp_backup_super bsb;
425         struct supertype *st;
426
427         int nlevel, olevel;
428         int nchunk, ochunk;
429         int nlayout, olayout;
430         int ndisks, odisks;
431         int ndata, odata;
432         unsigned long long nstripe, ostripe, last_block;
433         int *fdlist;
434         unsigned long long *offsets;
435         int d, i, spares;
436         int nrdisks;
437         int err;
438         void *super = NULL;
439
440         struct sysarray *sra;
441         struct sysdev *sd;
442
443         if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
444                 fprintf(stderr, Name ": %s is not an active md array - aborting\n",
445                         devname);
446                 return 1;
447         }
448         c = map_num(pers, array.level);
449         if (c == NULL) c = "-unknown-";
450         switch(array.level) {
451         default: /* raid0, linear, multipath cannot be reconfigured */
452                 fprintf(stderr, Name ": %s array %s cannot be reshaped.\n",
453                         c, devname);
454                 return 1;
455
456         case LEVEL_FAULTY: /* only 'layout' change is permitted */
457
458                 if (size >= 0) {
459                         fprintf(stderr, Name ": %s: Cannot change size of a 'faulty' array\n",
460                                 devname);
461                         return 1;
462                 }
463                 if (level != UnSet && level != LEVEL_FAULTY) {
464                         fprintf(stderr, Name ": %s: Cannot change RAID level of a 'faulty' array\n",
465                                 devname);
466                         return 1;
467                 }
468                 if (chunksize  || raid_disks) {
469                         fprintf(stderr, Name ": %s: Cannot change chunksize or disks of a 'faulty' array\n",
470                                 devname);
471                         return 1;
472                 }
473                 if (layout == UnSet)
474                         return 0; /* nothing to do.... */
475
476                 array.layout = layout;
477                 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
478                         fprintf(stderr, Name ": Cannot set layout for %s: %s\n",
479                                 devname, strerror(errno));
480                         return 1;
481                 }
482                 if (!quiet)
483                         printf("layout for %s set to %d\n", devname, array.layout);
484                 return 0;
485
486         case 1: /* raid_disks and size can each be changed.  They are independant */
487
488                 if (level != UnSet && level != 1) {
489                         fprintf(stderr, Name ": %s: Cannot change RAID level of a RAID1 array.\n",
490                                 devname);
491                         return 1;
492                 }
493                 if (chunksize || layout != UnSet) {
494                         fprintf(stderr, Name ": %s: Cannot change chunk size of layout for a RAID1 array.\n",
495                                 devname);
496                         return 1;
497                 }
498
499                 /* Each can trigger a resync/recovery which will block the
500                  * other from happening.  Later we could block
501                  * resync for the duration via 'sync_action'...
502                  */
503                 if (raid_disks >= 0)
504                         array.raid_disks = raid_disks;
505                 if (size >= 0)
506                         array.size = size;
507                 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
508                         fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
509                                 devname, strerror(errno));
510                         return 1;
511                 }
512                 return 0;
513
514         case 4:
515         case 5:
516         case 6:
517                 st = super_by_version(array.major_version,
518                                       array.minor_version);
519                 /* size can be changed independantly.
520                  * layout/chunksize/raid_disks/level can be changed
521                  * though the kernel may not support it all.
522                  * If 'suspend_lo' is not present in devfs, then
523                  * these cannot be changed.
524                  */
525                 if (size >= 0) {
526                         /* Cannot change other details as well.. */
527                         if (layout != UnSet ||
528                             chunksize != 0 ||
529                             raid_disks != 0 ||
530                             level != UnSet) {
531                                 fprintf(stderr, Name ": %s: Cannot change shape as well as size of a %s array.\n",
532                                         devname, c);
533                                 return 1;
534                         }
535                         array.size = size;
536                         if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
537                                 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
538                                         devname, strerror(errno));
539                                 return 1;
540                         }
541                         return 0;
542                 }
543                 /* Ok, just change the shape. This can be awkward.
544                  *  There are three possibilities.
545                  * 1/ The array will shrink.  We don't support this
546                  *    possibility.  Maybe one day...
547                  * 2/ The array will not change size.  This is easy enough
548                  *    to do, but not reliably.  If the process is aborted
549                  *    the array *will* be corrupted.  So maybe we can allow
550                  *    this but only if the user is really certain.  e.g.
551                  *    --really-risk-everything
552                  * 3/ The array will grow. This can be reliably achieved.
553                  *    However the kernel's restripe routines will cheerfully
554                  *    overwrite some early data before it is safe.  So we
555                  *    need to make a backup of the early parts of the array
556                  *    and be ready to restore it if rebuild aborts very early.
557                  *
558                  *    We backup data by writing it to all spares (there must be
559                  *    at least 1, so even raid6->raid5 requires a spare to be
560                  *    present).
561                  *
562                  *    So: we enumerate the devices in the array and
563                  *    make sure we can open all of them.
564                  *    Then we freeze the early part of the array and
565                  *    backup to the various spares.
566                  *    Then we request changes and start the reshape.
567                  *    Monitor progress until it has passed the danger zone.
568                  *    and finally invalidate the copied data and unfreeze the
569                  *    start of the array.
570                  *
571                  *    Before we can do this we need to decide:
572                  *     - will the array grow?  Just calculate size
573                  *     - how much needs to be saved: count stripes.
574                  *     - where to save data... good question.
575                  *
576                  */
577                 nlevel = olevel = array.level;
578                 nchunk = ochunk = array.chunk_size;
579                 nlayout = olayout = array.layout;
580                 ndisks = odisks = array.raid_disks;
581
582                 if (level != UnSet) nlevel = level;
583                 if (chunksize) nchunk = chunksize;
584                 if (layout != UnSet) nlayout = layout;
585                 if (raid_disks) ndisks = raid_disks;
586
587                 odata = odisks-1;
588                 if (olevel == 6) odata--; /* number of data disks */
589                 ndata = ndisks-1;
590                 if (nlevel == 6) ndata--;
591
592                 if (ndata < odata) {
593                         fprintf(stderr, Name ": %s: Cannot reduce number of data disks (yet).\n",
594                                 devname);
595                         return 1;
596                 }
597                 if (ndata == odata) {
598                         fprintf(stderr, Name ": %s: Cannot reshape array without increasing size (yet).\n",
599                                 devname);
600                         return 1;
601                 }
602                 /* Well, it is growing... so how much do we need to backup.
603                  * Need to backup a full number of new-stripes, such that the
604                  * last one does not over-write any place that it would be read
605                  * from
606                  */
607                 nstripe = ostripe = 0;
608                 while (nstripe >= ostripe) {
609                         nstripe += nchunk/512;
610                         last_block = nstripe * ndata;
611                         ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
612                 }
613                 printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2);
614
615                 sra = sysfs_read(fd, 0,
616                                  GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE);
617                 if (!sra) {
618                         fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
619                                 devname);
620                         return 1;
621                 }
622
623                 if (last_block >= sra->component_size/2) {
624                         fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n",
625                                 devname);
626                         return 1;
627                 }
628                 if (sra->spares == 0 && backup_file == NULL) {
629                         fprintf(stderr, Name ": %s: Cannot grow - need a spare or backup-file to backup critical section\n",
630                                 devname);
631                         return 1;
632                 }
633
634                 nrdisks = array.nr_disks + sra->spares;
635                 /* Now we need to open all these devices so we can read/write.
636                  */
637                 fdlist = malloc((1+nrdisks) * sizeof(int));
638                 offsets = malloc((1+nrdisks) * sizeof(offsets[0]));
639                 if (!fdlist || !offsets) {
640                         fprintf(stderr, Name ": malloc failed: grow aborted\n");
641                         return 1;
642                 }
643                 for (d=0; d <= nrdisks; d++)
644                         fdlist[d] = -1;
645                 d = array.raid_disks;
646                 for (sd = sra->devs; sd; sd=sd->next) {
647                         if (sd->state & (1<<MD_DISK_FAULTY))
648                                 continue;
649                         if (sd->state & (1<<MD_DISK_SYNC)) {
650                                 char *dn = map_dev(sd->major, sd->minor, 1);
651                                 fdlist[sd->role] = dev_open(dn, O_RDONLY);
652                                 offsets[sd->role] = sd->offset;
653                                 if (fdlist[sd->role] < 0) {
654                                         fprintf(stderr, Name ": %s: cannot open component %s\n",
655                                                 devname, dn);
656                                         goto abort;
657                                 }
658                         } else {
659                                 /* spare */
660                                 char *dn = map_dev(sd->major, sd->minor, 1);
661                                 fdlist[d] = dev_open(dn, O_RDWR);
662                                 offsets[d] = sd->offset;
663                                 if (fdlist[d]<0) {
664                                         fprintf(stderr, Name ": %s: cannot open component %s\n",
665                                                 devname, dn);
666                                         goto abort;
667                                 }
668                                 d++;
669                         }
670                 }
671                 for (i=0 ; i<array.raid_disks; i++)
672                         if (fdlist[i] < 0) {
673                                 fprintf(stderr, Name ": %s: failed to find device %d. Array might be degraded.\n"
674                                         " --grow aborted\n", devname, i);
675                                 goto abort;
676                         }
677                 spares = sra->spares;
678                 if (backup_file) {
679                         fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, 0600);
680                         if (fdlist[d] < 0) {
681                                 fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
682                                         devname, backup_file, strerror(errno));
683                                 goto abort;
684                         }
685                         offsets[d] = 8;
686                         d++;
687                         spares++;
688                 }
689                 if (fdlist[array.raid_disks] < 0) {
690                         fprintf(stderr, Name ": %s: failed to find a spare and no backup-file given - --grow aborted\n",
691                                 devname);
692                         goto abort;
693                 }
694
695                 /* Find a superblock */
696                 if (st->ss->load_super(st, fdlist[0], &super, NULL)) {
697                         fprintf(stderr, Name ": %s: Cannot find a superblock\n",
698                                 devname);
699                         goto abort;
700                 }
701
702
703                 memcpy(bsb.magic, "md_backup_data-1", 16);
704                 st->ss->uuid_from_super((int*)&bsb.set_uuid, super);
705                 bsb.mtime = __cpu_to_le64(time(0));
706                 bsb.arraystart = 0;
707                 bsb.length = __cpu_to_le64(last_block);
708
709                 /* Decide offset for the backup, llseek the spares, and write
710                  * a leading superblock 4K earlier.
711                  */
712                 for (i=array.raid_disks; i<d; i++) {
713                         char buf[4096];
714                         if (i==d-1 && backup_file) {
715                                 /* This is the backup file */
716                                 offsets[i] = 8;
717                         } else
718                                 offsets[i] += sra->component_size - last_block - 8;
719                         if (lseek64(fdlist[i], (offsets[i]<<9) - 4096, 0)
720                             != (offsets[i]<<9) - 4096) {
721                                 fprintf(stderr, Name ": could not seek...\n");
722                                 goto abort;
723                         }
724                         memset(buf, 0, sizeof(buf));
725                         bsb.devstart = __cpu_to_le64(offsets[i]);
726                         bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
727                         memcpy(buf, &bsb, sizeof(bsb));
728                         if (write(fdlist[i], buf, 4096) != 4096) {
729                                 fprintf(stderr, Name ": could not write leading superblock\n");
730                                 goto abort;
731                         }
732                 }
733                 array.level = nlevel;
734                 array.raid_disks = ndisks;
735                 array.chunk_size = nchunk;
736                 array.layout = nlayout;
737                 if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
738                         fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
739                                 devname, strerror(errno));
740                         goto abort;
741                 }
742
743                 /* suspend the relevant region */
744                 sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */
745                 if (sysfs_set_num(sra, NULL, "suspend_lo", 0) < 0 ||
746                     sysfs_set_num(sra, NULL, "suspend_hi", last_block) < 0) {
747                         fprintf(stderr, Name ": %s: failed to suspend device.\n",
748                                 devname);
749                         goto abort_resume;
750                 }
751
752
753                 err = save_stripes(fdlist, offsets,
754                                    odisks, ochunk, olevel, olayout,
755                                    spares, fdlist+odisks,
756                                    0ULL, last_block*512);
757
758                 /* abort if there was an error */
759                 if (err < 0) {
760                         fprintf(stderr, Name ": %s: failed to save critical region\n",
761                                 devname);
762                         goto abort_resume;
763                 }
764
765                 for (i=odisks; i<d ; i++) {
766                         bsb.devstart = __cpu_to_le64(offsets[i]);
767                         bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
768                         if (lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0) < 0 ||
769                             write(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb) ||
770                             fsync(fdlist[i]) != 0) {
771                                 fprintf(stderr, Name ": %s: fail to save metadata for critical region backups.\n",
772                                         devname);
773                                 goto abort_resume;
774                         }
775                 }
776
777                 /* start the reshape happening */
778                 if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) {
779                         fprintf(stderr, Name ": %s: failed to initiate reshape\n",
780                                 devname);
781                         goto abort_resume;
782                 }
783                 /* wait for reshape to pass the critical region */
784                 while(1) {
785                         unsigned long long comp;
786                         if (sysfs_get_ll(sra, NULL, "sync_completed", &comp)<0) {
787                                 sleep(5);
788                                 break;
789                         }
790                         if (comp >= nstripe)
791                                 break;
792                         sleep(1);
793                 }
794                 
795                 /* invalidate superblocks */
796                 memset(&bsb, 0, sizeof(bsb));
797                 for (i=odisks; i<d ; i++) {
798                         lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
799                         write(fdlist[i], &bsb, sizeof(bsb));
800                 }
801
802                 /* unsuspend. */
803                 sysfs_set_num(sra, NULL, "suspend_lo", last_block);
804
805                 for (i=0; i<d; i++)
806                         if (fdlist[i] >= 0)
807                                 close(fdlist[i]);
808                 free(fdlist);
809                 free(offsets);
810                 if (backup_file)
811                         unlink(backup_file);
812
813                 printf(Name ": ... critical section passed.\n");
814                 break;
815         }
816         return 0;
817
818
819  abort_resume:
820         sysfs_set_num(sra, NULL, "suspend_lo", last_block);
821  abort:
822         for (i=0; i<array.nr_disks; i++)
823                 if (fdlist[i] >= 0)
824                         close(fdlist[i]);
825         free(fdlist);
826         free(offsets);
827         if (backup_file)
828                 unlink(backup_file);
829         return 1;
830
831 }
832
833 /*
834  * If any spare contains md_back_data-1 which is recent wrt mtime,
835  * write that data into the array and update the super blocks with
836  * the new reshape_progress
837  */
838 int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt, char *backup_file)
839 {
840         int i, j;
841         int old_disks;
842         int err = 0;
843         unsigned long long *offsets;
844
845         if (info->delta_disks < 0)
846                 return 1; /* cannot handle a shrink */
847         if (info->new_level != info->array.level ||
848             info->new_layout != info->array.layout ||
849             info->new_chunk != info->array.chunk_size)
850                 return 1; /* Can only handle change in disks */
851
852         old_disks = info->array.raid_disks - info->delta_disks;
853
854         for (i=old_disks-(backup_file?1:0); i<cnt; i++) {
855                 void *super = NULL;
856                 struct mdinfo dinfo;
857                 struct mdp_backup_super bsb;
858                 char buf[4096];
859                 int fd;
860
861                 /* This was a spare and may have some saved data on it.
862                  * Load the superblock, find and load the
863                  * backup_super_block.
864                  * If either fail, go on to next device.
865                  * If the backup contains no new info, just return
866                  * else restore data and update all superblocks
867                  */
868                 if (i == old_disks-1) {
869                         fd = open(backup_file, O_RDONLY);
870                         if (fd<0)
871                                 continue;
872                         if (lseek(fd, 4096, 0) != 4096)
873                                 continue;
874                 } else {
875                         fd = fdlist[i];
876                         if (fd < 0)
877                                 continue;
878                         if (st->ss->load_super(st, fd, &super, NULL))
879                                 continue;
880
881                         st->ss->getinfo_super(&dinfo, super);
882                         free(super); super = NULL;
883                         if (lseek64(fd,
884                                     (dinfo.data_offset + dinfo.component_size - 8) <<9,
885                                     0) < 0)
886                                 continue; /* Cannot seek */
887                 }
888                 if (read(fd, &bsb, sizeof(bsb)) != sizeof(bsb))
889                         continue; /* Cannot read */
890                 if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0)
891                         continue;
892                 if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)))
893                         continue; /* bad checksum */
894                 if (memcmp(bsb.set_uuid,info->uuid, 16) != 0)
895                         continue; /* Wrong uuid */
896
897                 if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 ||
898                     info->array.utime < __le64_to_cpu(bsb.mtime))
899                         continue; /* time stamp is too bad */
900
901                 if (__le64_to_cpu(bsb.arraystart) != 0)
902                         continue; /* Can only handle backup from start of array */
903                 if (__le64_to_cpu(bsb.length) <
904                     info->reshape_progress)
905                         continue; /* No new data here */
906
907                 if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0)
908                         continue; /* Cannot seek */
909                 /* There should be a duplicate backup superblock 4k before here */
910                 if (lseek64(fd, -4096, 1) < 0 ||
911                     read(fd, buf, 4096) != 4096 ||
912                     memcmp(buf, &bsb, sizeof(buf)) != 0)
913                         continue; /* Cannot find leading superblock */
914
915                 /* Now need the data offsets for all devices. */
916                 offsets = malloc(sizeof(*offsets)*info->array.raid_disks);
917                 for(j=0; j<info->array.raid_disks; j++) {
918                         if (fdlist[j] < 0)
919                                 continue;
920                         if (st->ss->load_super(st, fdlist[j], &super, NULL))
921                                 /* FIXME should be this be an error */
922                                 continue;
923                         st->ss->getinfo_super(&dinfo, super);
924                         free(super); super = NULL;
925                         offsets[j] = dinfo.data_offset;
926                 }
927                 printf(Name ": restoring critical section\n");
928
929                 if (restore_stripes(fdlist, offsets,
930                                     info->array.raid_disks,
931                                     info->new_chunk,
932                                     info->new_level,
933                                     info->new_layout,
934                                     fd, __le64_to_cpu(bsb.devstart)*512,
935                                     0, __le64_to_cpu(bsb.length)*512)) {
936                         /* didn't succeed, so giveup */
937                         return -1;
938                 }
939
940                 /* Ok, so the data is restored. Let's update those superblocks. */
941
942                 for (j=0; j<info->array.raid_disks; j++) {
943                         if (fdlist[j] < 0) continue;
944                         if (st->ss->load_super(st, fdlist[j], &super, NULL))
945                                 continue;
946                         st->ss->getinfo_super(&dinfo, super);
947                         dinfo.reshape_progress = __le64_to_cpu(bsb.length);
948                         st->ss->update_super(&dinfo, super, "_reshape_progress",NULL,0);
949                         st->ss->store_super(st, fdlist[j], super);
950                         free(super);
951                 }
952
953                 /* And we are done! */
954                 return 0;
955         }
956         return err;
957 }