]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Assemble.c
96e9ebe840b726965b485243328b5cc8a2caaad7
[thirdparty/mdadm.git] / Assemble.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2002 Neil Brown <neilb@cse.unsw.edu.au>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29
30 #include "mdadm.h"
31 #include "md_u.h"
32 #include "md_p.h"
33
34 int Assemble(char *mddev, int mdfd,
35 mddev_ident_t ident, char *conffile,
36 mddev_dev_t devlist,
37 int readonly, int runstop,
38 char *update,
39 int verbose, int force)
40 {
41 /*
42 * The task of Assemble is to find a collection of
43 * devices that should (according to their superblocks)
44 * form an array, and to give this collection to the MD driver.
45 * In Linux-2.4 and later, this involves submitting a
46 * SET_ARRAY_INFO ioctl with no arg - to prepare
47 * the array - and then submit a number of
48 * ADD_NEW_DISK ioctls to add disks into
49 * the array. Finally RUN_ARRAY might
50 * be submitted to start the array.
51 *
52 * Much of the work of Assemble is in finding and/or
53 * checking the disks to make sure they look right.
54 *
55 * If mddev is not set, then scan must be and we
56 * read through the config file for dev+uuid mapping
57 * We recurse, setting mddev, for each device that
58 * - isn't running
59 * - has a valid uuid (or any uuid if !uuidset
60 *
61 * If mddev is set, we try to determine state of md.
62 * check version - must be at least 0.90.0
63 * check kernel version. must be at least 2.4.
64 * If not, we can possibly fall back on START_ARRAY
65 * Try to GET_ARRAY_INFO.
66 * If possible, give up
67 * If not, try to STOP_ARRAY just to make sure
68 *
69 * If !uuidset and scan, look in conf-file for uuid
70 * If not found, give up
71 * If !devlist and scan and uuidset, get list of devs from conf-file
72 *
73 * For each device:
74 * Check superblock - discard if bad
75 * Check uuid (set if we don't have one) - discard if no match
76 * Check superblock similarity if we have a superblock - discard if different
77 * Record events, devicenum, utime
78 * This should give us a list of devices for the array
79 * We should collect the most recent event and utime numbers
80 *
81 * Count disks with recent enough event count
82 * While force && !enough disks
83 * Choose newest rejected disks, update event count
84 * mark clean and rewrite superblock
85 * If recent kernel:
86 * SET_ARRAY_INFO
87 * foreach device with recent events : ADD_NEW_DISK
88 * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY
89 * If old kernel:
90 * Check the device numbers in superblock are right
91 * update superblock if any changes
92 * START_ARRAY
93 *
94 */
95 int old_linux = 0;
96 int vers;
97 mdu_array_info_t array;
98 mdp_super_t first_super, super;
99 struct {
100 char *devname;
101 int major, minor;
102 int oldmajor, oldminor;
103 long long events;
104 time_t utime;
105 int uptodate;
106 int state;
107 int raid_disk;
108 } *devices;
109 int *best = NULL; /* indexed by raid_disk */
110 int bestcnt = 0;
111 int devcnt = 0, okcnt, sparecnt;
112 int i;
113 int most_recent = 0;
114 int chosen_drive;
115 int change = 0;
116 int inargv = 0;
117 int start_partial_ok = force || devlist==NULL;
118 int num_devs;
119 mddev_dev_t tmpdev;
120
121 vers = md_get_version(mdfd);
122 if (vers <= 0) {
123 fprintf(stderr, Name ": %s appears not to be an md device.\n", mddev);
124 return 1;
125 }
126 if (vers < 9000) {
127 fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n"
128 " Upgrade your kernel or try --build\n");
129 return 1;
130 }
131 if (get_linux_version() < 2004000)
132 old_linux = 1;
133
134 if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0) {
135 fprintf(stderr, Name ": device %s already active - cannot assemble it\n",
136 mddev);
137 return 1;
138 }
139 ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */
140
141 /*
142 * If any subdevs are listed, then any that don't
143 * match ident are discarded. Remainder must all match and
144 * become the array.
145 * If no subdevs, then we scan all devices in the config file, but
146 * there must be something in the identity
147 */
148
149 if (!devlist &&
150 ident->uuid_set == 0 &&
151 ident->super_minor < 0 &&
152 ident->devices == NULL) {
153 fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n",
154 mddev);
155 return 1;
156 }
157 if (devlist == NULL)
158 devlist = conf_get_devs(conffile);
159 else inargv = 1;
160
161 tmpdev = devlist; num_devs = 0;
162 while (tmpdev) {
163 num_devs++;
164 tmpdev = tmpdev->next;
165 }
166 best = malloc(num_devs * sizeof(*best));
167 devices = malloc(num_devs * sizeof(*devices));
168
169 first_super.md_magic = 0;
170 for (i=0; i<num_devs; i++)
171 best[i] = -1;
172
173 if (verbose)
174 fprintf(stderr, Name ": looking for devices for %s\n",
175 mddev);
176
177 while ( devlist) {
178 char *devname;
179 int this_uuid[4];
180 int dfd;
181 struct stat stb;
182 int havesuper=0;
183
184 devname = devlist->devname;
185 devlist = devlist->next;
186
187 if (ident->devices &&
188 !match_oneof(ident->devices, devname)) {
189 if (inargv || verbose)
190 fprintf(stderr, Name ": %s is not one of %s\n", devname, ident->devices);
191 continue;
192 }
193
194 dfd = open(devname, O_RDONLY, 0);
195 if (dfd < 0) {
196 if (inargv || verbose)
197 fprintf(stderr, Name ": cannot open device %s: %s\n",
198 devname, strerror(errno));
199 } else if (fstat(dfd, &stb)< 0) {
200 /* Impossible! */
201 fprintf(stderr, Name ": fstat failed for %s: %s\n",
202 devname, strerror(errno));
203 close(dfd);
204 } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
205 fprintf(stderr, Name ": %s is not a block device.\n",
206 devname);
207 close(dfd);
208 } else if (load_super(dfd, &super)) {
209 if (inargv || verbose)
210 fprintf( stderr, Name ": no RAID superblock on %s\n",
211 devname);
212 close(dfd);
213 } else {
214 havesuper =1;
215 uuid_from_super(this_uuid, &super);
216 close(dfd);
217 }
218
219 if (ident->uuid_set &&
220 (!havesuper || same_uuid(this_uuid, ident->uuid)==0)) {
221 if (inargv || verbose)
222 fprintf(stderr, Name ": %s has wrong uuid.\n",
223 devname);
224 continue;
225 }
226 if (ident->super_minor >= 0 &&
227 (!havesuper || ident->super_minor != super.md_minor)) {
228 if (inargv || verbose)
229 fprintf(stderr, Name ": %s has wrong super-minor.\n",
230 devname);
231 continue;
232 }
233 if (ident->level != -10 &&
234 (!havesuper|| ident->level != super.level)) {
235 if (inargv || verbose)
236 fprintf(stderr, Name ": %s has wrong raid level.\n",
237 devname);
238 continue;
239 }
240 if (ident->raid_disks != -1 &&
241 (!havesuper || ident->raid_disks!= super.raid_disks)) {
242 if (inargv || verbose)
243 fprintf(stderr, Name ": %s requires wrong number of drives.\n",
244 devname);
245 continue;
246 }
247
248 /* If we are this far, then we are commited to this device.
249 * If the super_block doesn't exist, or doesn't match others,
250 * then we cannot continue
251 */
252
253 if (!havesuper) {
254 fprintf(stderr, Name ": %s has no superblock - assembly aborted\n",
255 devname);
256 return 1;
257 }
258 if (compare_super(&first_super, &super)) {
259 fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n",
260 devname);
261 return 1;
262 }
263
264
265 /* this is needed until we get a more relaxed super block format */
266 if (devcnt >= MD_SB_DISKS) {
267 fprintf(stderr, Name ": ouch - too many devices appear to be in this array. Ignoring %s\n",
268 devname);
269 continue;
270 }
271
272 /* looks like a good enough match to update the super block if needed */
273 if (update) {
274 if (strcmp(update, "sparc2.2")==0 ) {
275 /* 2.2 sparc put the events in the wrong place
276 * So we copy the tail of the superblock
277 * up 4 bytes before continuing
278 */
279 __u32 *sb32 = (__u32*)&super;
280 memcpy(sb32+MD_SB_GENERIC_CONSTANT_WORDS+7,
281 sb32+MD_SB_GENERIC_CONSTANT_WORDS+7+1,
282 (MD_SB_WORDS - (MD_SB_GENERIC_CONSTANT_WORDS+7+1))*4);
283 fprintf (stderr, Name ": adjusting superblock of %s for 2.2/sparc compatability.\n",
284 devname);
285 }
286 if (strcmp(update, "super-minor") ==0) {
287 struct stat stb2;
288 fstat(mdfd, &stb2);
289 super.md_minor = MINOR(stb2.st_rdev);
290 if (verbose)
291 fprintf(stderr, Name ": updating superblock of %s with minor number %d\n",
292 devname, super.md_minor);
293 }
294 super.sb_csum = calc_sb_csum(&super);
295 dfd = open(devname, O_RDWR, 0);
296 if (dfd < 0)
297 fprintf(stderr, Name ": Cannot open %s for superblock update\n",
298 devname);
299 else if (store_super(dfd, &super))
300 fprintf(stderr, Name ": Could not re-write superblock on %s.\n",
301 devname);
302 if (dfd >= 0)
303 close(dfd);
304 }
305
306 if (verbose)
307 fprintf(stderr, Name ": %s is identified as a member of %s, slot %d.\n",
308 devname, mddev, super.this_disk.raid_disk);
309 devices[devcnt].devname = devname;
310 devices[devcnt].major = MAJOR(stb.st_rdev);
311 devices[devcnt].minor = MINOR(stb.st_rdev);
312 devices[devcnt].oldmajor = super.this_disk.major;
313 devices[devcnt].oldminor = super.this_disk.minor;
314 devices[devcnt].events = md_event(&super);
315 devices[devcnt].utime = super.utime;
316 devices[devcnt].raid_disk = super.this_disk.raid_disk;
317 devices[devcnt].uptodate = 0;
318 devices[devcnt].state = super.this_disk.state;
319 if (most_recent < devcnt) {
320 if (devices[devcnt].events
321 > devices[most_recent].events)
322 most_recent = devcnt;
323 }
324 if (super.level == -4)
325 /* with multipath, the raid_disk from the superblock is meaningless */
326 i = devcnt;
327 else
328 i = devices[devcnt].raid_disk;
329 if (i>=0 && i < 10000) {
330 if (i >= bestcnt) {
331 int newbestcnt = i+10;
332 int *newbest = malloc(sizeof(int)*newbestcnt);
333 int c;
334 for (c=0; c < newbestcnt; c++)
335 if (c < bestcnt)
336 newbest[c] = best[c];
337 else
338 newbest[c] = -1;
339 if (best)free(best);
340 best = newbest;
341 bestcnt = newbestcnt;
342 }
343 if (best[i] == -1
344 || devices[best[i]].events < devices[devcnt].events)
345 best[i] = devcnt;
346 }
347 devcnt++;
348 }
349
350 if (devcnt == 0) {
351 fprintf(stderr, Name ": no devices found for %s\n",
352 mddev);
353 return 1;
354 }
355 /* now we have some devices that might be suitable.
356 * I wonder how many
357 */
358 okcnt = 0;
359 sparecnt=0;
360 for (i=0; i< bestcnt ;i++) {
361 int j = best[i];
362 int event_margin = !force;
363 if (j < 0) continue;
364 /* note: we ignore error flags in multipath arrays
365 * as they don't make sense
366 */
367 if (first_super.level != -4)
368 if (!(devices[j].state & (1<<MD_DISK_SYNC)))
369 continue;
370 if (devices[j].events+event_margin >=
371 devices[most_recent].events) {
372 devices[j].uptodate = 1;
373 if (i < first_super.raid_disks)
374 okcnt++;
375 else
376 sparecnt++;
377 }
378 }
379 while (force && !enough(first_super.level, first_super.raid_disks, okcnt)) {
380 /* Choose the newest best drive which is
381 * not up-to-date, update the superblock
382 * and add it.
383 */
384 int fd;
385 chosen_drive = -1;
386 for (i=0; i<first_super.raid_disks && i < bestcnt; i++) {
387 int j = best[i];
388 if (j>=0 &&
389 !devices[j].uptodate &&
390 devices[j].events > 0 &&
391 (chosen_drive < 0 ||
392 devices[j].events > devices[chosen_drive].events))
393 chosen_drive = j;
394 }
395 if (chosen_drive < 0)
396 break;
397 fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n",
398 devices[chosen_drive].devname, devices[chosen_drive].raid_disk,
399 (int)(devices[chosen_drive].events),
400 (int)(devices[most_recent].events));
401 fd = open(devices[chosen_drive].devname, O_RDWR);
402 if (fd < 0) {
403 fprintf(stderr, Name ": Couldn't open %s for write - not updating\n",
404 devices[chosen_drive].devname);
405 devices[chosen_drive].events = 0;
406 continue;
407 }
408 if (load_super(fd, &super)) {
409 close(fd);
410 fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n",
411 devices[chosen_drive].devname);
412 devices[chosen_drive].events = 0;
413 continue;
414 }
415 super.events_hi = (devices[most_recent].events>>32)&0xFFFFFFFF;
416 super.events_lo = (devices[most_recent].events)&0xFFFFFFFF;
417 if (super.level == 5 || super.level == 4) {
418 /* need to force clean */
419 super.state = (1<<MD_SB_CLEAN);
420 }
421 super.sb_csum = calc_sb_csum(&super);
422 /*DRYRUN*/ if (store_super(fd, &super)) {
423 close(fd);
424 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
425 devices[chosen_drive].devname);
426 devices[chosen_drive].events = 0;
427 continue;
428 }
429 close(fd);
430 devices[chosen_drive].events = devices[most_recent].events;
431 devices[chosen_drive].uptodate = 1;
432 okcnt++;
433 }
434
435 /* Now we want to look at the superblock which the kernel will base things on
436 * and compare the devices that we think are working with the devices that the
437 * superblock thinks are working.
438 * If there are differences and --force is given, then update this chosen
439 * superblock.
440 */
441 chosen_drive = -1;
442 for (i=0; chosen_drive < 0 && i<bestcnt; i++) {
443 int j = best[i];
444 int fd;
445 if (j<0)
446 continue;
447 if (!devices[j].uptodate)
448 continue;
449 chosen_drive = j;
450 if ((fd=open(devices[j].devname, O_RDONLY))< 0) {
451 fprintf(stderr, Name ": Cannot open %s: %s\n",
452 devices[j].devname, strerror(errno));
453 return 1;
454 }
455 if (load_super(fd, &super)) {
456 close(fd);
457 fprintf(stderr, Name ": RAID superblock has disappeared from %s\n",
458 devices[j].devname);
459 return 1;
460 }
461 close(fd);
462 }
463
464 for (i=0; i<bestcnt; i++) {
465 int j = best[i];
466 int desired_state;
467
468 if (i < super.raid_disks)
469 desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
470 else
471 desired_state = 0;
472
473 if (j<0)
474 continue;
475 if (!devices[j].uptodate)
476 continue;
477 #if 0
478 This doesnt work yet
479 if (devices[j].major != super.disks[i].major ||
480 devices[j].minor != super.disks[i].minor) {
481 change |= 1;
482 super.disks[i].major = devices[j].major;
483 super.disks[i].minor = devices[j].minor;
484 }
485 #endif
486 if (devices[j].oldmajor != super.disks[i].major ||
487 devices[j].oldminor != super.disks[i].minor) {
488 change |= 2;
489 super.disks[i].major = devices[j].oldmajor;
490 super.disks[i].minor = devices[j].oldminor;
491 }
492 if (devices[j].uptodate &&
493 (super.disks[i].state != desired_state)) {
494 if (force) {
495 fprintf(stderr, Name ": "
496 "clearing FAULTY flag for device %d in %s for %s\n",
497 j, mddev, devices[j].devname);
498 super.disks[i].state = desired_state;
499 change |= 2;
500 } else {
501 fprintf(stderr, Name ": "
502 "device %d in %s has wrong state in superblock, but %s seems ok\n",
503 i, mddev, devices[j].devname);
504 }
505 }
506 if (!devices[j].uptodate &&
507 !(super.disks[i].state & (1 << MD_DISK_FAULTY))) {
508 fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n",
509 i, mddev);
510 }
511 }
512 if (force && (super.level == 4 || super.level == 5) &&
513 okcnt == super.raid_disks-1) {
514 super.state = (1<< MD_SB_CLEAN);
515 change |= 2;
516 }
517
518 if ((force && (change & 2))
519 || (old_linux && (change & 1))) {
520 int fd;
521 super.sb_csum = calc_sb_csum(&super);
522 fd = open(devices[chosen_drive].devname, O_RDWR);
523 if (fd < 0) {
524 fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n",
525 devices[chosen_drive].devname);
526 return 1;
527 }
528 if (store_super(fd, &super)) {
529 close(fd);
530 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
531 devices[chosen_drive].devname);
532 return 1;
533 }
534 close(fd);
535 change = 0;
536 }
537
538 /* Almost ready to actually *do* something */
539 if (!old_linux) {
540 if (ioctl(mdfd, SET_ARRAY_INFO, NULL) != 0) {
541 fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n",
542 mddev, strerror(errno));
543 return 1;
544 }
545 /* First, add the raid disks, but add the chosen one last */
546 for (i=0; i<= bestcnt; i++) {
547 int j;
548 if (i < bestcnt) {
549 j = best[i];
550 if (j == chosen_drive)
551 continue;
552 } else
553 j = chosen_drive;
554
555 if (j >= 0 /* && devices[j].uptodate */) {
556 mdu_disk_info_t disk;
557 memset(&disk, 0, sizeof(disk));
558 disk.major = devices[j].major;
559 disk.minor = devices[j].minor;
560 if (ioctl(mdfd, ADD_NEW_DISK, &disk)!=0) {
561 fprintf(stderr, Name ": failed to add %s to %s: %s\n",
562 devices[j].devname,
563 mddev,
564 strerror(errno));
565 if (i < first_super.raid_disks)
566 okcnt--;
567 else
568 sparecnt--;
569 } else if (verbose)
570 fprintf(stderr, Name ": added %s to %s as %d\n",
571 devices[j].devname, mddev, devices[j].raid_disk);
572 } else if (verbose && i < first_super.raid_disks)
573 fprintf(stderr, Name ": no uptodate device for slot %d of %s\n",
574 i, mddev);
575 }
576
577 if (runstop == 1 ||
578 (runstop == 0 &&
579 ( first_super.raid_disks == okcnt
580 || (start_partial_ok && enough(first_super.level, first_super.raid_disks, okcnt)))
581 )) {
582 if (ioctl(mdfd, RUN_ARRAY, NULL)==0) {
583 fprintf(stderr, Name ": %s has been started with %d drive%s",
584 mddev, okcnt, okcnt==1?"":"s");
585 if (sparecnt)
586 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
587 fprintf(stderr, ".\n");
588 return 0;
589 }
590 fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n",
591 mddev, strerror(errno));
592 return 1;
593 }
594 if (runstop == -1) {
595 fprintf(stderr, Name ": %s assembled from %d drive%s, but not started.\n",
596 mddev, okcnt, okcnt==1?"":"s");
597 return 0;
598 }
599 fprintf(stderr, Name ": %s assembled from %d drive%s - not enough to start it (use --run to insist).\n",
600 mddev, okcnt, okcnt==1?"":"s");
601 return 1;
602 } else {
603 /* The "chosen_drive" is a good choice, and if necessary, the superblock has
604 * been updated to point to the current locations of devices.
605 * so we can just start the array
606 */
607 unsigned long dev;
608 dev = MKDEV(devices[chosen_drive].major,
609 devices[chosen_drive].minor);
610 if (ioctl(mdfd, START_ARRAY, dev)) {
611 fprintf(stderr, Name ": Cannot start array: %s\n",
612 strerror(errno));
613 }
614
615 }
616 return 0;
617 }