]> git.ipfire.org Git - thirdparty/mdadm.git/blame - Assemble.c
mdadm-0.7.2
[thirdparty/mdadm.git] / Assemble.c
CommitLineData
64c4757e 1/*
9a9dab36 2 * mdadm - manage Linux "md" devices aka RAID arrays.
64c4757e 3 *
cd29a5c8 4 * Copyright (C) 2001-2002 Neil Brown <neilb@cse.unsw.edu.au>
64c4757e
NB
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29
9a9dab36 30#include "mdadm.h"
64c4757e 31#include "md_u.h"
52826846 32#include "md_p.h"
64c4757e
NB
33
34int Assemble(char *mddev, int mdfd,
52826846 35 mddev_ident_t ident, char *conffile,
cd29a5c8 36 mddev_dev_t devlist,
64c4757e
NB
37 int readonly, int runstop,
38 int verbose, int force)
39{
40 /*
52826846
NB
41 * The task of Assemble is to find a collection of
42 * devices that should (according to their superblocks)
43 * form an array, and to give this collection to the MD driver.
44 * In Linux-2.4 and later, this involves submitting a
64c4757e
NB
45 * SET_ARRAY_INFO ioctl with no arg - to prepare
46 * the array - and then submit a number of
47 * ADD_NEW_DISK ioctls to add disks into
48 * the array. Finally RUN_ARRAY might
49 * be submitted to start the array.
50 *
51 * Much of the work of Assemble is in finding and/or
52 * checking the disks to make sure they look right.
53 *
54 * If mddev is not set, then scan must be and we
55 * read through the config file for dev+uuid mapping
56 * We recurse, setting mddev, for each device that
57 * - isn't running
58 * - has a valid uuid (or any uuid if !uuidset
59 *
60 * If mddev is set, we try to determine state of md.
61 * check version - must be at least 0.90.0
62 * check kernel version. must be at least 2.4.
63 * If not, we can possibly fall back on START_ARRAY
64 * Try to GET_ARRAY_INFO.
65 * If possible, give up
66 * If not, try to STOP_ARRAY just to make sure
67 *
68 * If !uuidset and scan, look in conf-file for uuid
69 * If not found, give up
cd29a5c8 70 * If !devlist and scan and uuidset, get list of devs from conf-file
64c4757e
NB
71 *
72 * For each device:
73 * Check superblock - discard if bad
74 * Check uuid (set if we don't have one) - discard if no match
75 * Check superblock similarity if we have a superbloc - discard if different
76 * Record events, devicenum, utime
77 * This should give us a list of devices for the array
78 * We should collect the most recent event and utime numbers
79 *
80 * Count disks with recent enough event count
81 * While force && !enough disks
82 * Choose newest rejected disks, update event count
83 * mark clean and rewrite superblock
84 * If recent kernel:
85 * SET_ARRAY_INFO
86 * foreach device with recent events : ADD_NEW_DISK
87 * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY
88 * If old kernel:
89 * Check the device numbers in superblock are right
90 * update superblock if any changes
91 * START_ARRAY
92 *
93 */
94 int old_linux = 0;
95 int vers;
96 mdu_array_info_t array;
64c4757e
NB
97 mdp_super_t first_super, super;
98 struct {
99 char *devname;
100 int major, minor;
11a3e71d 101 int oldmajor, oldminor;
64c4757e
NB
102 long long events;
103 time_t utime;
104 int uptodate;
52826846 105 int raid_disk;
64c4757e
NB
106 } devices[MD_SB_DISKS];
107 int best[MD_SB_DISKS]; /* indexed by raid_disk */
52826846 108 int devcnt = 0, okcnt, sparecnt;
64c4757e
NB
109 int i;
110 int most_recent = 0;
cd29a5c8 111 int chosen_drive;
52826846 112 int change = 0;
cd29a5c8
NB
113 int inargv = 0;
114 int start_partial_ok = force || devlist==NULL;
64c4757e 115
64c4757e
NB
116 vers = md_get_version(mdfd);
117 if (vers <= 0) {
682c7051 118 fprintf(stderr, Name ": %s appears not to be an md device.\n");
64c4757e
NB
119 return 1;
120 }
682c7051
NB
121 if (vers < 9000) {
122 fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n"
52826846 123 " Upgrade your kernel or try --build\n");
64c4757e
NB
124 return 1;
125 }
682c7051 126 if (get_linux_version() < 2004000)
64c4757e
NB
127 old_linux = 1;
128
129 if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0) {
682c7051 130 fprintf(stderr, Name ": device %s already active - cannot assemble it\n",
64c4757e
NB
131 mddev);
132 return 1;
133 }
134 ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */
135
136 /*
52826846
NB
137 * If any subdevs are listed, then any that don't
138 * match ident are discarded. Remainder must all match and
139 * become the array.
140 * If no subdevs, then we scan all devices in the config file, but
141 * there must be something in the identity
64c4757e 142 */
64c4757e 143
cd29a5c8 144 if (!devlist &&
52826846
NB
145 ident->uuid_set == 0 &&
146 ident->super_minor < 0 &&
147 ident->devices == NULL) {
148 fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n",
149 mddev);
150 return 1;
64c4757e 151 }
cd29a5c8 152 if (devlist == NULL)
64c4757e 153 devlist = conf_get_devs(conffile);
cd29a5c8 154 else inargv = 1;
64c4757e 155
64c4757e
NB
156 first_super.md_magic = 0;
157 for (i=0; i<MD_SB_DISKS; i++)
158 best[i] = -1;
159
82b27616
NB
160 if (verbose)
161 fprintf(stderr, Name ": looking for devices for %s\n",
162 mddev);
163
cd29a5c8 164 while ( devlist) {
64c4757e
NB
165 char *devname;
166 int this_uuid[4];
167 int dfd;
168 struct stat stb;
52826846
NB
169 int havesuper=0;
170
cd29a5c8
NB
171 devname = devlist->devname;
172 devlist = devlist->next;
64c4757e 173
52826846
NB
174 if (ident->devices &&
175 !match_oneof(ident->devices, devname))
176 continue;
177
64c4757e
NB
178 dfd = open(devname, O_RDONLY, 0);
179 if (dfd < 0) {
180 if (inargv || verbose)
682c7051 181 fprintf(stderr, Name ": cannot open device %s: %s\n",
64c4757e 182 devname, strerror(errno));
52826846
NB
183 } else if (fstat(dfd, &stb)< 0) {
184 /* Impossible! */
185 fprintf(stderr, Name ": fstat failed for %s: %s\n",
186 devname, strerror(errno));
187 close(dfd);
cd29a5c8
NB
188 } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
189 fprintf(stderr, Name ": %s is not a block device.\n",
52826846
NB
190 devname);
191 close(dfd);
cd29a5c8 192 } else if (load_super(dfd, &super)) {
64c4757e 193 if (inargv || verbose)
682c7051 194 fprintf( stderr, Name ": no RAID superblock on %s\n",
64c4757e
NB
195 devname);
196 close(dfd);
52826846
NB
197 } else {
198 havesuper =1;
199 uuid_from_super(this_uuid, &super);
200 close(dfd);
64c4757e 201 }
52826846
NB
202
203 if (ident->uuid_set &&
204 (!havesuper || same_uuid(this_uuid, ident->uuid)==0)) {
205 if (inargv || verbose)
206 fprintf(stderr, Name ": %s has wrong uuid.\n",
207 devname);
208 continue;
82b27616 209 }
52826846
NB
210 if (ident->super_minor >= 0 &&
211 (!havesuper || ident->super_minor != super.md_minor)) {
64c4757e 212 if (inargv || verbose)
52826846 213 fprintf(stderr, Name ": %s has wrong super-minor.\n",
64c4757e
NB
214 devname);
215 continue;
216 }
cd29a5c8
NB
217 if (ident->level != -10 &&
218 (!havesuper|| ident->level != super.level)) {
219 if (inargv || verbose)
220 fprintf(stderr, Name ": %s has wrong raid level.\n",
221 devname);
222 continue;
223 }
224 if (ident->raid_disks != -1 &&
225 (!havesuper || ident->raid_disks!= super.raid_disks)) {
226 if (inargv || verbose)
227 fprintf(stderr, Name ": %s requires wrong number of drives.\n",
228 devname);
229 continue;
230 }
52826846
NB
231
232 /* If we are this far, then we are commited to this device.
233 * If the super_block doesn't exist, or doesn't match others,
234 * then we cannot continue
235 */
52826846
NB
236
237 if (!havesuper) {
238 fprintf(stderr, Name ": %s has no superblock - assembly aborted\n",
239 devname);
240 return 1;
241 }
242 if (compare_super(&first_super, &super)) {
243 fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n",
244 devname);
245 return 1;
64c4757e
NB
246 }
247
64c4757e 248 if (devcnt >= MD_SB_DISKS) {
682c7051 249 fprintf(stderr, Name ": ouch - too many devices appear to be in this array. Ignoring %s\n",
64c4757e
NB
250 devname);
251 continue;
252 }
cd29a5c8
NB
253 if (verbose)
254 fprintf(stderr, Name ": %s is identified as a member of %s, slot %d.\n",
255 devname, mddev, super.this_disk.raid_disk);
64c4757e
NB
256 devices[devcnt].devname = devname;
257 devices[devcnt].major = MAJOR(stb.st_rdev);
258 devices[devcnt].minor = MINOR(stb.st_rdev);
11a3e71d
NB
259 devices[devcnt].oldmajor = super.this_disk.major;
260 devices[devcnt].oldminor = super.this_disk.minor;
64c4757e
NB
261 devices[devcnt].events = md_event(&super);
262 devices[devcnt].utime = super.utime;
52826846 263 devices[devcnt].raid_disk = super.this_disk.raid_disk;
64c4757e
NB
264 devices[devcnt].uptodate = 0;
265 if (most_recent < devcnt) {
266 if (devices[devcnt].events
267 > devices[most_recent].events)
268 most_recent = devcnt;
269 }
52826846
NB
270 i = devices[devcnt].raid_disk;
271 if (i>=0 && i < MD_SB_DISKS)
272 if (best[i] == -1
273 || devices[best[i]].events < devices[devcnt].events)
274 best[i] = devcnt;
275
64c4757e
NB
276 devcnt++;
277 }
278
279 if (devcnt == 0) {
682c7051 280 fprintf(stderr, Name ": no devices found for %s\n",
64c4757e
NB
281 mddev);
282 return 1;
283 }
284 /* now we have some devices that might be suitable.
285 * I wonder how many
286 */
287 okcnt = 0;
52826846
NB
288 sparecnt=0;
289 for (i=0; i< MD_SB_DISKS;i++) {
64c4757e 290 int j = best[i];
cd29a5c8 291 int event_margin = !force;
64c4757e 292 if (j < 0) continue;
cd29a5c8 293 if (devices[j].events+event_margin >=
64c4757e
NB
294 devices[most_recent].events) {
295 devices[j].uptodate = 1;
52826846
NB
296 if (i < first_super.raid_disks)
297 okcnt++;
298 else
299 sparecnt++;
64c4757e
NB
300 }
301 }
302 while (force && !enough(first_super.level, first_super.raid_disks, okcnt)) {
303 /* Choose the newest best drive which is
304 * not up-to-date, update the superblock
305 * and add it.
306 */
52826846 307 int fd;
cd29a5c8 308 chosen_drive = -1;
52826846
NB
309 for (i=0; i<first_super.raid_disks; i++) {
310 int j = best[i];
311 if (j>=0 &&
312 !devices[j].uptodate &&
313 devices[j].events > 0 &&
314 (chosen_drive < 0 ||
315 devices[j].events > devices[chosen_drive].events))
316 chosen_drive = j;
317 }
318 if (chosen_drive < 0)
319 break;
320 fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n",
321 devices[chosen_drive].devname, devices[chosen_drive].raid_disk,
322 (int)(devices[chosen_drive].events),
323 (int)(devices[most_recent].events));
324 fd = open(devices[chosen_drive].devname, O_RDWR);
325 if (fd < 0) {
326 fprintf(stderr, Name ": Couldn't open %s for write - not updating\n",
327 devices[chosen_drive].devname);
328 devices[chosen_drive].events = 0;
329 continue;
330 }
331 if (load_super(fd, &super)) {
332 close(fd);
333 fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n",
334 devices[chosen_drive].devname);
335 devices[chosen_drive].events = 0;
336 continue;
337 }
338 super.events_hi = (devices[most_recent].events>>32)&0xFFFFFFFF;
339 super.events_lo = (devices[most_recent].events)&0xFFFFFFFF;
340 super.sb_csum = calc_sb_csum(&super);
341/*DRYRUN*/ if (store_super(fd, &super)) {
342 close(fd);
343 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
344 devices[chosen_drive].devname);
345 devices[chosen_drive].events = 0;
346 continue;
347 }
348 close(fd);
349 devices[chosen_drive].events = devices[most_recent].events;
350 devices[chosen_drive].uptodate = 1;
351 okcnt++;
64c4757e 352 }
52826846
NB
353
354 /* Now we want to look at the superblock which the kernel will base things on
355 * and compare the devices that we think are working with the devices that the
356 * superblock thinks are working.
357 * If there are differences and --force is given, then update this chosen
358 * superblock.
359 */
cd29a5c8 360 chosen_drive = -1;
52826846
NB
361 for (i=0; chosen_drive < 0 && i<MD_SB_DISKS; i++) {
362 int j = best[i];
363 int fd;
364 if (j<0)
365 continue;
366 if (!devices[j].uptodate)
367 continue;
368 chosen_drive = j;
369 if ((fd=open(devices[j].devname, O_RDONLY))< 0) {
370 fprintf(stderr, Name ": Cannot open %s: %s\n",
371 devices[j].devname, strerror(errno));
372 return 1;
373 }
374 if (load_super(fd, &super)) {
375 close(fd);
376 fprintf(stderr, Name ": RAID superblock has disappeared from %s\n",
377 devices[j].devname);
378 return 1;
379 }
380 close(fd);
381 }
382
383 for (i=0; i<MD_SB_DISKS; i++) {
384 int j = best[i];
11a3e71d
NB
385 int desired_state;
386
387 if (i < super.raid_disks)
388 desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
389 else
390 desired_state = 0;
391
52826846
NB
392 if (j<0)
393 continue;
394 if (!devices[j].uptodate)
395 continue;
11a3e71d
NB
396#if 0
397This doesnt work yet
398 if (devices[j].major != super.disks[i].major ||
399 devices[j].minor != super.disks[i].minor) {
52826846 400 change |= 1;
11a3e71d
NB
401 super.disks[i].major = devices[j].major;
402 super.disks[i].minor = devices[j].minor;
403 }
404#endif
405 if (devices[j].oldmajor != super.disks[i].major ||
406 devices[j].oldminor != super.disks[i].minor) {
407 change |= 2;
408 super.disks[i].major = devices[i].oldmajor;
409 super.disks[i].minor = devices[i].oldminor;
52826846
NB
410 }
411 if (devices[j].uptodate &&
11a3e71d 412 (super.disks[i].state != desired_state)) {
52826846
NB
413 if (force) {
414 fprintf(stderr, Name ": "
cd29a5c8 415 "clearing FAULTY flag for device %d in %s for %s\n",
52826846 416 j, mddev, devices[j].devname);
11a3e71d 417 super.disks[i].state = desired_state;
52826846
NB
418 change |= 2;
419 } else {
420 fprintf(stderr, Name ": "
11a3e71d 421 "device %d in %s has wrong state in superblock, but %s seems ok\n",
52826846
NB
422 i, mddev, devices[j].devname);
423 }
424 }
425 if (!devices[j].uptodate &&
426 !(super.disks[i].state & (1 << MD_DISK_FAULTY))) {
427 fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n",
428 i, mddev);
429 }
430 }
431
432 if ((force && (change & 2))
433 || (old_linux && (change & 1))) {
434 int fd;
435 super.sb_csum = calc_sb_csum(&super);
436 fd = open(devices[chosen_drive].devname, O_RDWR);
437 if (fd < 0) {
438 fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n",
439 devices[chosen_drive].devname);
440 return 1;
441 }
442 if (store_super(fd, &super)) {
443 close(fd);
444 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
445 devices[chosen_drive].devname);
446 return 1;
447 }
448 close(fd);
449 change = 0;
450 }
451
64c4757e
NB
452 /* Almost ready to actually *do* something */
453 if (!old_linux) {
454 if (ioctl(mdfd, SET_ARRAY_INFO, NULL) != 0) {
682c7051 455 fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n",
64c4757e
NB
456 mddev, strerror(errno));
457 return 1;
458 }
52826846
NB
459 /* First, add the raid disks, but add the chosen one last */
460 for (i=0; i<=MD_SB_DISKS; i++) {
461 int j;
462 if (i < MD_SB_DISKS) {
463 j = best[i];
464 if (j == chosen_drive)
465 continue;
466 } else
467 j = chosen_drive;
468
0db17fcb 469 if (j >= 0 && devices[j].uptodate) {
64c4757e
NB
470 mdu_disk_info_t disk;
471 memset(&disk, 0, sizeof(disk));
472 disk.major = devices[j].major;
473 disk.minor = devices[j].minor;
474 if (ioctl(mdfd, ADD_NEW_DISK, &disk)!=0) {
682c7051 475 fprintf(stderr, Name ": failed to add %s to %s: %s\n",
64c4757e
NB
476 devices[j].devname,
477 mddev,
478 strerror(errno));
52826846
NB
479 if (i < first_super.raid_disks)
480 okcnt--;
481 else
482 sparecnt--;
483 } else if (verbose)
484 fprintf(stderr, Name ": added %s to %s as %d\n",
485 devices[j].devname, mddev, devices[j].raid_disk);
486 } else if (verbose && i < first_super.raid_disks)
682c7051 487 fprintf(stderr, Name ": no uptodate device for slot %d of %s\n",
64c4757e
NB
488 i, mddev);
489 }
52826846 490
64c4757e
NB
491 if (runstop == 1 ||
492 (runstop == 0 &&
cd29a5c8
NB
493 ( first_super.raid_disks == okcnt
494 || start_partial_ok && enough(first_super.level, first_super.raid_disks, okcnt))
495 )) {
82b27616 496 if (ioctl(mdfd, RUN_ARRAY, NULL)==0) {
52826846
NB
497 fprintf(stderr, Name ": %s has been started with %d drive%s",
498 mddev, okcnt, okcnt==1?"":"s");
499 if (sparecnt)
500 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
501 fprintf(stderr, ".\n");
64c4757e 502 return 0;
82b27616 503 }
682c7051 504 fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n",
64c4757e
NB
505 mddev, strerror(errno));
506 return 1;
507 }
82b27616 508 if (runstop == -1) {
52826846
NB
509 fprintf(stderr, Name ": %s assembled from %d drive%s, but not started.\n",
510 mddev, okcnt, okcnt==1?"":"s");
64c4757e 511 return 0;
82b27616 512 }
cd29a5c8 513 fprintf(stderr, Name ": %s assembled from %d drive%s - not enough to start it (use --run to insist).\n",
52826846 514 mddev, okcnt, okcnt==1?"":"s");
64c4757e 515 return 1;
82b27616 516 } else {
52826846
NB
517 /* The "chosen_drive" is a good choice, and if necessary, the superblock has
518 * been updated to point to the current locations of devices.
519 * so we can just start the array
82b27616 520 */
cd29a5c8 521 unsigned long dev;
82b27616
NB
522 dev = MKDEV(devices[chosen_drive].major,
523 devices[chosen_drive].minor);
524 if (ioctl(mdfd, START_ARRAY, dev)) {
525 fprintf(stderr, Name ": Cannot start array: %s\n",
526 strerror(errno));
527 }
528
64c4757e
NB
529 }
530}