]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Assemble.c
mdadm-1.3.0
[thirdparty/mdadm.git] / Assemble.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2002 Neil Brown <neilb@cse.unsw.edu.au>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29
30 #include "mdadm.h"
31 #include "md_u.h"
32 #include "md_p.h"
33
34 int Assemble(char *mddev, int mdfd,
35 mddev_ident_t ident, char *conffile,
36 mddev_dev_t devlist,
37 int readonly, int runstop,
38 char *update,
39 int verbose, int force)
40 {
41 /*
42 * The task of Assemble is to find a collection of
43 * devices that should (according to their superblocks)
44 * form an array, and to give this collection to the MD driver.
45 * In Linux-2.4 and later, this involves submitting a
46 * SET_ARRAY_INFO ioctl with no arg - to prepare
47 * the array - and then submit a number of
48 * ADD_NEW_DISK ioctls to add disks into
49 * the array. Finally RUN_ARRAY might
50 * be submitted to start the array.
51 *
52 * Much of the work of Assemble is in finding and/or
53 * checking the disks to make sure they look right.
54 *
55 * If mddev is not set, then scan must be and we
56 * read through the config file for dev+uuid mapping
57 * We recurse, setting mddev, for each device that
58 * - isn't running
59 * - has a valid uuid (or any uuid if !uuidset
60 *
61 * If mddev is set, we try to determine state of md.
62 * check version - must be at least 0.90.0
63 * check kernel version. must be at least 2.4.
64 * If not, we can possibly fall back on START_ARRAY
65 * Try to GET_ARRAY_INFO.
66 * If possible, give up
67 * If not, try to STOP_ARRAY just to make sure
68 *
69 * If !uuidset and scan, look in conf-file for uuid
70 * If not found, give up
71 * If !devlist and scan and uuidset, get list of devs from conf-file
72 *
73 * For each device:
74 * Check superblock - discard if bad
75 * Check uuid (set if we don't have one) - discard if no match
76 * Check superblock similarity if we have a superblock - discard if different
77 * Record events, devicenum, utime
78 * This should give us a list of devices for the array
79 * We should collect the most recent event and utime numbers
80 *
81 * Count disks with recent enough event count
82 * While force && !enough disks
83 * Choose newest rejected disks, update event count
84 * mark clean and rewrite superblock
85 * If recent kernel:
86 * SET_ARRAY_INFO
87 * foreach device with recent events : ADD_NEW_DISK
88 * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY
89 * If old kernel:
90 * Check the device numbers in superblock are right
91 * update superblock if any changes
92 * START_ARRAY
93 *
94 */
95 int old_linux = 0;
96 int vers;
97 mdu_array_info_t array;
98 mdp_super_t first_super, super;
99 struct {
100 char *devname;
101 int major, minor;
102 int oldmajor, oldminor;
103 long long events;
104 time_t utime;
105 int uptodate;
106 int state;
107 int raid_disk;
108 } *devices;
109 int *best = NULL; /* indexed by raid_disk */
110 int bestcnt = 0;
111 int devcnt = 0, okcnt, sparecnt;
112 int req_cnt;
113 int i;
114 int most_recent = 0;
115 int chosen_drive;
116 int change = 0;
117 int inargv = 0;
118 int start_partial_ok = force || devlist==NULL;
119 int num_devs;
120 mddev_dev_t tmpdev;
121
122 vers = md_get_version(mdfd);
123 if (vers <= 0) {
124 fprintf(stderr, Name ": %s appears not to be an md device.\n", mddev);
125 return 1;
126 }
127 if (vers < 9000) {
128 fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n"
129 " Upgrade your kernel or try --build\n");
130 return 1;
131 }
132 if (get_linux_version() < 2004000)
133 old_linux = 1;
134
135 if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0) {
136 fprintf(stderr, Name ": device %s already active - cannot assemble it\n",
137 mddev);
138 return 1;
139 }
140 ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */
141
142 /*
143 * If any subdevs are listed, then any that don't
144 * match ident are discarded. Remainder must all match and
145 * become the array.
146 * If no subdevs, then we scan all devices in the config file, but
147 * there must be something in the identity
148 */
149
150 if (!devlist &&
151 ident->uuid_set == 0 &&
152 ident->super_minor < 0 &&
153 ident->devices == NULL) {
154 fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n",
155 mddev);
156 return 1;
157 }
158 if (devlist == NULL)
159 devlist = conf_get_devs(conffile);
160 else inargv = 1;
161
162 tmpdev = devlist; num_devs = 0;
163 while (tmpdev) {
164 num_devs++;
165 tmpdev = tmpdev->next;
166 }
167 best = malloc(num_devs * sizeof(*best));
168 devices = malloc(num_devs * sizeof(*devices));
169
170 first_super.md_magic = 0;
171 for (i=0; i<num_devs; i++)
172 best[i] = -1;
173
174 if (verbose)
175 fprintf(stderr, Name ": looking for devices for %s\n",
176 mddev);
177
178 while ( devlist) {
179 char *devname;
180 int this_uuid[4];
181 int dfd;
182 struct stat stb;
183 int havesuper=0;
184
185 devname = devlist->devname;
186 devlist = devlist->next;
187
188 if (ident->devices &&
189 !match_oneof(ident->devices, devname)) {
190 if (inargv || verbose)
191 fprintf(stderr, Name ": %s is not one of %s\n", devname, ident->devices);
192 continue;
193 }
194
195 dfd = open(devname, O_RDONLY, 0);
196 if (dfd < 0) {
197 if (inargv || verbose)
198 fprintf(stderr, Name ": cannot open device %s: %s\n",
199 devname, strerror(errno));
200 } else if (fstat(dfd, &stb)< 0) {
201 /* Impossible! */
202 fprintf(stderr, Name ": fstat failed for %s: %s\n",
203 devname, strerror(errno));
204 close(dfd);
205 } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
206 fprintf(stderr, Name ": %s is not a block device.\n",
207 devname);
208 close(dfd);
209 } else if (load_super(dfd, &super)) {
210 if (inargv || verbose)
211 fprintf( stderr, Name ": no RAID superblock on %s\n",
212 devname);
213 close(dfd);
214 } else {
215 havesuper =1;
216 uuid_from_super(this_uuid, &super);
217 close(dfd);
218 }
219
220 if (ident->uuid_set &&
221 (!havesuper || same_uuid(this_uuid, ident->uuid)==0)) {
222 if (inargv || verbose)
223 fprintf(stderr, Name ": %s has wrong uuid.\n",
224 devname);
225 continue;
226 }
227 if (ident->super_minor >= 0 &&
228 (!havesuper || ident->super_minor != super.md_minor)) {
229 if (inargv || verbose)
230 fprintf(stderr, Name ": %s has wrong super-minor.\n",
231 devname);
232 continue;
233 }
234 if (ident->level != -10 &&
235 (!havesuper|| ident->level != super.level)) {
236 if (inargv || verbose)
237 fprintf(stderr, Name ": %s has wrong raid level.\n",
238 devname);
239 continue;
240 }
241 if (ident->raid_disks != -1 &&
242 (!havesuper || ident->raid_disks!= super.raid_disks)) {
243 if (inargv || verbose)
244 fprintf(stderr, Name ": %s requires wrong number of drives.\n",
245 devname);
246 continue;
247 }
248
249 /* If we are this far, then we are commited to this device.
250 * If the super_block doesn't exist, or doesn't match others,
251 * then we cannot continue
252 */
253
254 if (!havesuper) {
255 fprintf(stderr, Name ": %s has no superblock - assembly aborted\n",
256 devname);
257 return 1;
258 }
259 if (compare_super(&first_super, &super)) {
260 fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n",
261 devname);
262 return 1;
263 }
264
265
266 /* this is needed until we get a more relaxed super block format */
267 if (devcnt >= MD_SB_DISKS) {
268 fprintf(stderr, Name ": ouch - too many devices appear to be in this array. Ignoring %s\n",
269 devname);
270 continue;
271 }
272
273 /* looks like a good enough match to update the super block if needed */
274 if (update) {
275 if (strcmp(update, "sparc2.2")==0 ) {
276 /* 2.2 sparc put the events in the wrong place
277 * So we copy the tail of the superblock
278 * up 4 bytes before continuing
279 */
280 __u32 *sb32 = (__u32*)&super;
281 memcpy(sb32+MD_SB_GENERIC_CONSTANT_WORDS+7,
282 sb32+MD_SB_GENERIC_CONSTANT_WORDS+7+1,
283 (MD_SB_WORDS - (MD_SB_GENERIC_CONSTANT_WORDS+7+1))*4);
284 fprintf (stderr, Name ": adjusting superblock of %s for 2.2/sparc compatability.\n",
285 devname);
286 }
287 if (strcmp(update, "super-minor") ==0) {
288 struct stat stb2;
289 fstat(mdfd, &stb2);
290 super.md_minor = MINOR(stb2.st_rdev);
291 if (verbose)
292 fprintf(stderr, Name ": updating superblock of %s with minor number %d\n",
293 devname, super.md_minor);
294 }
295 super.sb_csum = calc_sb_csum(&super);
296 dfd = open(devname, O_RDWR, 0);
297 if (dfd < 0)
298 fprintf(stderr, Name ": Cannot open %s for superblock update\n",
299 devname);
300 else if (store_super(dfd, &super))
301 fprintf(stderr, Name ": Could not re-write superblock on %s.\n",
302 devname);
303 if (dfd >= 0)
304 close(dfd);
305 }
306
307 if (verbose)
308 fprintf(stderr, Name ": %s is identified as a member of %s, slot %d.\n",
309 devname, mddev, super.this_disk.raid_disk);
310 devices[devcnt].devname = devname;
311 devices[devcnt].major = MAJOR(stb.st_rdev);
312 devices[devcnt].minor = MINOR(stb.st_rdev);
313 devices[devcnt].oldmajor = super.this_disk.major;
314 devices[devcnt].oldminor = super.this_disk.minor;
315 devices[devcnt].events = md_event(&super);
316 devices[devcnt].utime = super.utime;
317 devices[devcnt].raid_disk = super.this_disk.raid_disk;
318 devices[devcnt].uptodate = 0;
319 devices[devcnt].state = super.this_disk.state;
320 if (most_recent < devcnt) {
321 if (devices[devcnt].events
322 > devices[most_recent].events)
323 most_recent = devcnt;
324 }
325 if (super.level == -4)
326 /* with multipath, the raid_disk from the superblock is meaningless */
327 i = devcnt;
328 else
329 i = devices[devcnt].raid_disk;
330 if (i>=0 && i < 10000) {
331 if (i >= bestcnt) {
332 int newbestcnt = i+10;
333 int *newbest = malloc(sizeof(int)*newbestcnt);
334 int c;
335 for (c=0; c < newbestcnt; c++)
336 if (c < bestcnt)
337 newbest[c] = best[c];
338 else
339 newbest[c] = -1;
340 if (best)free(best);
341 best = newbest;
342 bestcnt = newbestcnt;
343 }
344 if (best[i] == -1
345 || devices[best[i]].events < devices[devcnt].events)
346 best[i] = devcnt;
347 }
348 devcnt++;
349 }
350
351 if (devcnt == 0) {
352 fprintf(stderr, Name ": no devices found for %s\n",
353 mddev);
354 return 1;
355 }
356 /* now we have some devices that might be suitable.
357 * I wonder how many
358 */
359 okcnt = 0;
360 sparecnt=0;
361 for (i=0; i< bestcnt ;i++) {
362 int j = best[i];
363 int event_margin = !force;
364 if (j < 0) continue;
365 /* note: we ignore error flags in multipath arrays
366 * as they don't make sense
367 */
368 if (first_super.level != -4)
369 if (!(devices[j].state & (1<<MD_DISK_SYNC))) {
370 if (!(devices[j].state & (1<<MD_DISK_FAULTY)))
371 sparecnt++;
372 continue;
373 }
374 if (devices[j].events+event_margin >=
375 devices[most_recent].events) {
376 devices[j].uptodate = 1;
377 if (i < first_super.raid_disks)
378 okcnt++;
379 else
380 sparecnt++;
381 }
382 }
383 while (force && !enough(first_super.level, first_super.raid_disks, okcnt)) {
384 /* Choose the newest best drive which is
385 * not up-to-date, update the superblock
386 * and add it.
387 */
388 int fd;
389 chosen_drive = -1;
390 for (i=0; i<first_super.raid_disks && i < bestcnt; i++) {
391 int j = best[i];
392 if (j>=0 &&
393 !devices[j].uptodate &&
394 devices[j].events > 0 &&
395 (chosen_drive < 0 ||
396 devices[j].events > devices[chosen_drive].events))
397 chosen_drive = j;
398 }
399 if (chosen_drive < 0)
400 break;
401 fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n",
402 devices[chosen_drive].devname, devices[chosen_drive].raid_disk,
403 (int)(devices[chosen_drive].events),
404 (int)(devices[most_recent].events));
405 fd = open(devices[chosen_drive].devname, O_RDWR);
406 if (fd < 0) {
407 fprintf(stderr, Name ": Couldn't open %s for write - not updating\n",
408 devices[chosen_drive].devname);
409 devices[chosen_drive].events = 0;
410 continue;
411 }
412 if (load_super(fd, &super)) {
413 close(fd);
414 fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n",
415 devices[chosen_drive].devname);
416 devices[chosen_drive].events = 0;
417 continue;
418 }
419 super.events_hi = (devices[most_recent].events>>32)&0xFFFFFFFF;
420 super.events_lo = (devices[most_recent].events)&0xFFFFFFFF;
421 if (super.level == 5 || super.level == 4) {
422 /* need to force clean */
423 super.state = (1<<MD_SB_CLEAN);
424 }
425 super.sb_csum = calc_sb_csum(&super);
426 /*DRYRUN*/ if (store_super(fd, &super)) {
427 close(fd);
428 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
429 devices[chosen_drive].devname);
430 devices[chosen_drive].events = 0;
431 continue;
432 }
433 close(fd);
434 devices[chosen_drive].events = devices[most_recent].events;
435 devices[chosen_drive].uptodate = 1;
436 okcnt++;
437 }
438
439 /* Now we want to look at the superblock which the kernel will base things on
440 * and compare the devices that we think are working with the devices that the
441 * superblock thinks are working.
442 * If there are differences and --force is given, then update this chosen
443 * superblock.
444 */
445 chosen_drive = -1;
446 for (i=0; chosen_drive < 0 && i<bestcnt; i++) {
447 int j = best[i];
448 int fd;
449 if (j<0)
450 continue;
451 if (!devices[j].uptodate)
452 continue;
453 chosen_drive = j;
454 if ((fd=open(devices[j].devname, O_RDONLY))< 0) {
455 fprintf(stderr, Name ": Cannot open %s: %s\n",
456 devices[j].devname, strerror(errno));
457 return 1;
458 }
459 if (load_super(fd, &super)) {
460 close(fd);
461 fprintf(stderr, Name ": RAID superblock has disappeared from %s\n",
462 devices[j].devname);
463 return 1;
464 }
465 close(fd);
466 }
467
468 for (i=0; i<bestcnt; i++) {
469 int j = best[i];
470 int desired_state;
471
472 if (i < super.raid_disks)
473 desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
474 else
475 desired_state = 0;
476
477 if (j<0)
478 continue;
479 if (!devices[j].uptodate)
480 continue;
481 #if 0
482 This doesnt work yet
483 if (devices[j].major != super.disks[i].major ||
484 devices[j].minor != super.disks[i].minor) {
485 change |= 1;
486 super.disks[i].major = devices[j].major;
487 super.disks[i].minor = devices[j].minor;
488 }
489 #endif
490 if (devices[j].oldmajor != super.disks[i].major ||
491 devices[j].oldminor != super.disks[i].minor) {
492 change |= 2;
493 super.disks[i].major = devices[j].oldmajor;
494 super.disks[i].minor = devices[j].oldminor;
495 }
496 if (devices[j].uptodate &&
497 (super.disks[i].state != desired_state)) {
498 if (force) {
499 fprintf(stderr, Name ": "
500 "clearing FAULTY flag for device %d in %s for %s\n",
501 j, mddev, devices[j].devname);
502 super.disks[i].state = desired_state;
503 change |= 2;
504 } else {
505 fprintf(stderr, Name ": "
506 "device %d in %s has wrong state in superblock, but %s seems ok\n",
507 i, mddev, devices[j].devname);
508 }
509 }
510 if (!devices[j].uptodate &&
511 !(super.disks[i].state & (1 << MD_DISK_FAULTY))) {
512 fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n",
513 i, mddev);
514 }
515 }
516 if (force && (super.level == 4 || super.level == 5) &&
517 okcnt == super.raid_disks-1) {
518 super.state = (1<< MD_SB_CLEAN);
519 change |= 2;
520 }
521
522 if ((force && (change & 2))
523 || (old_linux && (change & 1))) {
524 int fd;
525 super.sb_csum = calc_sb_csum(&super);
526 fd = open(devices[chosen_drive].devname, O_RDWR);
527 if (fd < 0) {
528 fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n",
529 devices[chosen_drive].devname);
530 return 1;
531 }
532 if (store_super(fd, &super)) {
533 close(fd);
534 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
535 devices[chosen_drive].devname);
536 return 1;
537 }
538 close(fd);
539 change = 0;
540 }
541
542 /* count number of in-sync devices according to the superblock.
543 * We must have this number to start the array without -s or -R
544 */
545 req_cnt = 0;
546 for (i=0; i<MD_SB_DISKS; i++)
547 if ((first_super.disks[i].state & (1<<MD_DISK_SYNC)) &&
548 (first_super.disks[i].state & (1<<MD_DISK_ACTIVE)) &&
549 !(first_super.disks[i].state & (1<<MD_DISK_FAULTY)))
550 req_cnt ++;
551
552
553 /* Almost ready to actually *do* something */
554 if (!old_linux) {
555 if (ioctl(mdfd, SET_ARRAY_INFO, NULL) != 0) {
556 fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n",
557 mddev, strerror(errno));
558 return 1;
559 }
560 /* First, add the raid disks, but add the chosen one last */
561 for (i=0; i<= bestcnt; i++) {
562 int j;
563 if (i < bestcnt) {
564 j = best[i];
565 if (j == chosen_drive)
566 continue;
567 } else
568 j = chosen_drive;
569
570 if (j >= 0 /* && devices[j].uptodate */) {
571 mdu_disk_info_t disk;
572 memset(&disk, 0, sizeof(disk));
573 disk.major = devices[j].major;
574 disk.minor = devices[j].minor;
575 if (ioctl(mdfd, ADD_NEW_DISK, &disk)!=0) {
576 fprintf(stderr, Name ": failed to add %s to %s: %s\n",
577 devices[j].devname,
578 mddev,
579 strerror(errno));
580 if (i < first_super.raid_disks)
581 okcnt--;
582 else
583 sparecnt--;
584 } else if (verbose)
585 fprintf(stderr, Name ": added %s to %s as %d\n",
586 devices[j].devname, mddev, devices[j].raid_disk);
587 } else if (verbose && i < first_super.raid_disks)
588 fprintf(stderr, Name ": no uptodate device for slot %d of %s\n",
589 i, mddev);
590 }
591
592 if (runstop == 1 ||
593 (runstop == 0 &&
594 ( enough(first_super.level, first_super.raid_disks, okcnt) &&
595 (okcnt >= req_cnt || start_partial_ok)
596 ))) {
597 if (ioctl(mdfd, RUN_ARRAY, NULL)==0) {
598 fprintf(stderr, Name ": %s has been started with %d drive%s",
599 mddev, okcnt, okcnt==1?"":"s");
600 if (okcnt < first_super.raid_disks)
601 fprintf(stderr, " (out of %d)", first_super.raid_disks);
602 if (sparecnt)
603 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
604 fprintf(stderr, ".\n");
605 return 0;
606 }
607 fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n",
608 mddev, strerror(errno));
609 return 1;
610 }
611 if (runstop == -1) {
612 fprintf(stderr, Name ": %s assembled from %d drive%s, but not started.\n",
613 mddev, okcnt, okcnt==1?"":"s");
614 return 0;
615 }
616 fprintf(stderr, Name ": %s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s");
617 if (sparecnt)
618 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
619 if (!enough(first_super.level, first_super.raid_disks, okcnt))
620 fprintf(stderr, " - not enough to start the array.\n");
621 else {
622 if (req_cnt == first_super.raid_disks)
623 fprintf(stderr, " - need all %d to start it", req_cnt);
624 else
625 fprintf(stderr, " - need %d of %d to start", req_cnt, first_super.raid_disks);
626 fprintf(stderr, " (use --run to insist).\n");
627 }
628 return 1;
629 } else {
630 /* The "chosen_drive" is a good choice, and if necessary, the superblock has
631 * been updated to point to the current locations of devices.
632 * so we can just start the array
633 */
634 unsigned long dev;
635 dev = MKDEV(devices[chosen_drive].major,
636 devices[chosen_drive].minor);
637 if (ioctl(mdfd, START_ARRAY, dev)) {
638 fprintf(stderr, Name ": Cannot start array: %s\n",
639 strerror(errno));
640 }
641
642 }
643 return 0;
644 }