]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Assemble.c
mdadm-1.1.0
[thirdparty/mdadm.git] / Assemble.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2002 Neil Brown <neilb@cse.unsw.edu.au>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29
30 #include "mdadm.h"
31 #include "md_u.h"
32 #include "md_p.h"
33
34 int Assemble(char *mddev, int mdfd,
35 mddev_ident_t ident, char *conffile,
36 mddev_dev_t devlist,
37 int readonly, int runstop,
38 char *update,
39 int verbose, int force)
40 {
41 /*
42 * The task of Assemble is to find a collection of
43 * devices that should (according to their superblocks)
44 * form an array, and to give this collection to the MD driver.
45 * In Linux-2.4 and later, this involves submitting a
46 * SET_ARRAY_INFO ioctl with no arg - to prepare
47 * the array - and then submit a number of
48 * ADD_NEW_DISK ioctls to add disks into
49 * the array. Finally RUN_ARRAY might
50 * be submitted to start the array.
51 *
52 * Much of the work of Assemble is in finding and/or
53 * checking the disks to make sure they look right.
54 *
55 * If mddev is not set, then scan must be and we
56 * read through the config file for dev+uuid mapping
57 * We recurse, setting mddev, for each device that
58 * - isn't running
59 * - has a valid uuid (or any uuid if !uuidset
60 *
61 * If mddev is set, we try to determine state of md.
62 * check version - must be at least 0.90.0
63 * check kernel version. must be at least 2.4.
64 * If not, we can possibly fall back on START_ARRAY
65 * Try to GET_ARRAY_INFO.
66 * If possible, give up
67 * If not, try to STOP_ARRAY just to make sure
68 *
69 * If !uuidset and scan, look in conf-file for uuid
70 * If not found, give up
71 * If !devlist and scan and uuidset, get list of devs from conf-file
72 *
73 * For each device:
74 * Check superblock - discard if bad
75 * Check uuid (set if we don't have one) - discard if no match
76 * Check superblock similarity if we have a superblock - discard if different
77 * Record events, devicenum, utime
78 * This should give us a list of devices for the array
79 * We should collect the most recent event and utime numbers
80 *
81 * Count disks with recent enough event count
82 * While force && !enough disks
83 * Choose newest rejected disks, update event count
84 * mark clean and rewrite superblock
85 * If recent kernel:
86 * SET_ARRAY_INFO
87 * foreach device with recent events : ADD_NEW_DISK
88 * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY
89 * If old kernel:
90 * Check the device numbers in superblock are right
91 * update superblock if any changes
92 * START_ARRAY
93 *
94 */
95 int old_linux = 0;
96 int vers;
97 mdu_array_info_t array;
98 mdp_super_t first_super, super;
99 struct {
100 char *devname;
101 int major, minor;
102 int oldmajor, oldminor;
103 long long events;
104 time_t utime;
105 int uptodate;
106 int state;
107 int raid_disk;
108 } *devices;
109 int *best; /* indexed by raid_disk */
110 int devcnt = 0, okcnt, sparecnt;
111 int i;
112 int most_recent = 0;
113 int chosen_drive;
114 int change = 0;
115 int inargv = 0;
116 int start_partial_ok = force || devlist==NULL;
117 int num_devs;
118 mddev_dev_t tmpdev;
119
120 vers = md_get_version(mdfd);
121 if (vers <= 0) {
122 fprintf(stderr, Name ": %s appears not to be an md device.\n", mddev);
123 return 1;
124 }
125 if (vers < 9000) {
126 fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n"
127 " Upgrade your kernel or try --build\n");
128 return 1;
129 }
130 if (get_linux_version() < 2004000)
131 old_linux = 1;
132
133 if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0) {
134 fprintf(stderr, Name ": device %s already active - cannot assemble it\n",
135 mddev);
136 return 1;
137 }
138 ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */
139
140 /*
141 * If any subdevs are listed, then any that don't
142 * match ident are discarded. Remainder must all match and
143 * become the array.
144 * If no subdevs, then we scan all devices in the config file, but
145 * there must be something in the identity
146 */
147
148 if (!devlist &&
149 ident->uuid_set == 0 &&
150 ident->super_minor < 0 &&
151 ident->devices == NULL) {
152 fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n",
153 mddev);
154 return 1;
155 }
156 if (devlist == NULL)
157 devlist = conf_get_devs(conffile);
158 else inargv = 1;
159
160 tmpdev = devlist; num_devs = 0;
161 while (tmpdev) {
162 num_devs++;
163 tmpdev = tmpdev->next;
164 }
165 best = malloc(num_devs * sizeof(*best));
166 devices = malloc(num_devs * sizeof(*devices));
167
168 first_super.md_magic = 0;
169 for (i=0; i<num_devs; i++)
170 best[i] = -1;
171
172 if (verbose)
173 fprintf(stderr, Name ": looking for devices for %s\n",
174 mddev);
175
176 while ( devlist) {
177 char *devname;
178 int this_uuid[4];
179 int dfd;
180 struct stat stb;
181 int havesuper=0;
182
183 devname = devlist->devname;
184 devlist = devlist->next;
185
186 if (ident->devices &&
187 !match_oneof(ident->devices, devname))
188 continue;
189
190 dfd = open(devname, O_RDONLY, 0);
191 if (dfd < 0) {
192 if (inargv || verbose)
193 fprintf(stderr, Name ": cannot open device %s: %s\n",
194 devname, strerror(errno));
195 } else if (fstat(dfd, &stb)< 0) {
196 /* Impossible! */
197 fprintf(stderr, Name ": fstat failed for %s: %s\n",
198 devname, strerror(errno));
199 close(dfd);
200 } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
201 fprintf(stderr, Name ": %s is not a block device.\n",
202 devname);
203 close(dfd);
204 } else if (load_super(dfd, &super)) {
205 if (inargv || verbose)
206 fprintf( stderr, Name ": no RAID superblock on %s\n",
207 devname);
208 close(dfd);
209 } else {
210 havesuper =1;
211 uuid_from_super(this_uuid, &super);
212 close(dfd);
213 }
214
215 if (ident->uuid_set &&
216 (!havesuper || same_uuid(this_uuid, ident->uuid)==0)) {
217 if (inargv || verbose)
218 fprintf(stderr, Name ": %s has wrong uuid.\n",
219 devname);
220 continue;
221 }
222 if (ident->super_minor >= 0 &&
223 (!havesuper || ident->super_minor != super.md_minor)) {
224 if (inargv || verbose)
225 fprintf(stderr, Name ": %s has wrong super-minor.\n",
226 devname);
227 continue;
228 }
229 if (ident->level != -10 &&
230 (!havesuper|| ident->level != super.level)) {
231 if (inargv || verbose)
232 fprintf(stderr, Name ": %s has wrong raid level.\n",
233 devname);
234 continue;
235 }
236 if (ident->raid_disks != -1 &&
237 (!havesuper || ident->raid_disks!= super.raid_disks)) {
238 if (inargv || verbose)
239 fprintf(stderr, Name ": %s requires wrong number of drives.\n",
240 devname);
241 continue;
242 }
243
244 /* If we are this far, then we are commited to this device.
245 * If the super_block doesn't exist, or doesn't match others,
246 * then we cannot continue
247 */
248
249 if (!havesuper) {
250 fprintf(stderr, Name ": %s has no superblock - assembly aborted\n",
251 devname);
252 return 1;
253 }
254 if (compare_super(&first_super, &super)) {
255 fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n",
256 devname);
257 return 1;
258 }
259
260
261 /* this is needed until we get a more relaxed super block format */
262 if (devcnt >= MD_SB_DISKS) {
263 fprintf(stderr, Name ": ouch - too many devices appear to be in this array. Ignoring %s\n",
264 devname);
265 continue;
266 }
267
268 /* looks like a good enough match to update the super block if needed */
269 if (update) {
270 if (strcmp(update, "sparc2.2")==0 ) {
271 /* 2.2 sparc put the events in the wrong place
272 * So we copy the tail of the superblock
273 * up 4 bytes before continuing
274 */
275 __u32 *sb32 = (__u32*)&super;
276 memcpy(sb32+MD_SB_GENERIC_CONSTANT_WORDS+7,
277 sb32+MD_SB_GENERIC_CONSTANT_WORDS+7+1,
278 (MD_SB_WORDS - (MD_SB_GENERIC_CONSTANT_WORDS+7+1))*4);
279 fprintf (stderr, Name ": adjusting superblock of %s for 2.2/sparc compatability.\n",
280 devname);
281 }
282 if (strcmp(update, "super-minor") ==0) {
283 struct stat stb2;
284 fstat(mdfd, &stb2);
285 super.md_minor = MINOR(stb2.st_rdev);
286 if (verbose)
287 fprintf(stderr, Name ": updating superblock of %s with minor number %d\n",
288 devname, super.md_minor);
289 }
290 super.sb_csum = calc_sb_csum(&super);
291 dfd = open(devname, O_RDWR, 0);
292 if (dfd < 0)
293 fprintf(stderr, Name ": Cannot open %s for superblock update\n",
294 devname);
295 else if (store_super(dfd, &super))
296 fprintf(stderr, Name ": Could not re-write superblock on %s.\n",
297 devname);
298 if (dfd >= 0)
299 close(dfd);
300 }
301
302 if (verbose)
303 fprintf(stderr, Name ": %s is identified as a member of %s, slot %d.\n",
304 devname, mddev, super.this_disk.raid_disk);
305 devices[devcnt].devname = devname;
306 devices[devcnt].major = MAJOR(stb.st_rdev);
307 devices[devcnt].minor = MINOR(stb.st_rdev);
308 devices[devcnt].oldmajor = super.this_disk.major;
309 devices[devcnt].oldminor = super.this_disk.minor;
310 devices[devcnt].events = md_event(&super);
311 devices[devcnt].utime = super.utime;
312 devices[devcnt].raid_disk = super.this_disk.raid_disk;
313 devices[devcnt].uptodate = 0;
314 devices[devcnt].state = super.this_disk.state;
315 if (most_recent < devcnt) {
316 if (devices[devcnt].events
317 > devices[most_recent].events)
318 most_recent = devcnt;
319 }
320 if (super.level == -4)
321 /* with multipath, the raid_disk from the superblock is meaningless */
322 i = devcnt;
323 else
324 i = devices[devcnt].raid_disk;
325 if (i>=0 && i < num_devs)
326 if (best[i] == -1
327 || devices[best[i]].events < devices[devcnt].events)
328 best[i] = devcnt;
329
330 devcnt++;
331 }
332
333 if (devcnt == 0) {
334 fprintf(stderr, Name ": no devices found for %s\n",
335 mddev);
336 return 1;
337 }
338 /* now we have some devices that might be suitable.
339 * I wonder how many
340 */
341 okcnt = 0;
342 sparecnt=0;
343 for (i=0; i< num_devs ;i++) {
344 int j = best[i];
345 int event_margin = !force;
346 if (j < 0) continue;
347 /* note: we ignore error flags in multipath arrays
348 * as they don't make sense
349 */
350 if (first_super.level != -4)
351 if (!(devices[j].state & (1<<MD_DISK_SYNC)))
352 continue;
353 if (devices[j].events+event_margin >=
354 devices[most_recent].events) {
355 devices[j].uptodate = 1;
356 if (i < first_super.raid_disks)
357 okcnt++;
358 else
359 sparecnt++;
360 }
361 }
362 while (force && !enough(first_super.level, first_super.raid_disks, okcnt)) {
363 /* Choose the newest best drive which is
364 * not up-to-date, update the superblock
365 * and add it.
366 */
367 int fd;
368 chosen_drive = -1;
369 for (i=0; i<first_super.raid_disks; i++) {
370 int j = best[i];
371 if (j>=0 &&
372 !devices[j].uptodate &&
373 devices[j].events > 0 &&
374 (chosen_drive < 0 ||
375 devices[j].events > devices[chosen_drive].events))
376 chosen_drive = j;
377 }
378 if (chosen_drive < 0)
379 break;
380 fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n",
381 devices[chosen_drive].devname, devices[chosen_drive].raid_disk,
382 (int)(devices[chosen_drive].events),
383 (int)(devices[most_recent].events));
384 fd = open(devices[chosen_drive].devname, O_RDWR);
385 if (fd < 0) {
386 fprintf(stderr, Name ": Couldn't open %s for write - not updating\n",
387 devices[chosen_drive].devname);
388 devices[chosen_drive].events = 0;
389 continue;
390 }
391 if (load_super(fd, &super)) {
392 close(fd);
393 fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n",
394 devices[chosen_drive].devname);
395 devices[chosen_drive].events = 0;
396 continue;
397 }
398 super.events_hi = (devices[most_recent].events>>32)&0xFFFFFFFF;
399 super.events_lo = (devices[most_recent].events)&0xFFFFFFFF;
400 if (super.level == 5 || super.level == 4) {
401 /* need to force clean */
402 super.state = (1<<MD_SB_CLEAN);
403 }
404 super.sb_csum = calc_sb_csum(&super);
405 /*DRYRUN*/ if (store_super(fd, &super)) {
406 close(fd);
407 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
408 devices[chosen_drive].devname);
409 devices[chosen_drive].events = 0;
410 continue;
411 }
412 close(fd);
413 devices[chosen_drive].events = devices[most_recent].events;
414 devices[chosen_drive].uptodate = 1;
415 okcnt++;
416 }
417
418 /* Now we want to look at the superblock which the kernel will base things on
419 * and compare the devices that we think are working with the devices that the
420 * superblock thinks are working.
421 * If there are differences and --force is given, then update this chosen
422 * superblock.
423 */
424 chosen_drive = -1;
425 for (i=0; chosen_drive < 0 && i<num_devs; i++) {
426 int j = best[i];
427 int fd;
428 if (j<0)
429 continue;
430 if (!devices[j].uptodate)
431 continue;
432 chosen_drive = j;
433 if ((fd=open(devices[j].devname, O_RDONLY))< 0) {
434 fprintf(stderr, Name ": Cannot open %s: %s\n",
435 devices[j].devname, strerror(errno));
436 return 1;
437 }
438 if (load_super(fd, &super)) {
439 close(fd);
440 fprintf(stderr, Name ": RAID superblock has disappeared from %s\n",
441 devices[j].devname);
442 return 1;
443 }
444 close(fd);
445 }
446
447 for (i=0; i<num_devs; i++) {
448 int j = best[i];
449 int desired_state;
450
451 if (i < super.raid_disks)
452 desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
453 else
454 desired_state = 0;
455
456 if (j<0)
457 continue;
458 if (!devices[j].uptodate)
459 continue;
460 #if 0
461 This doesnt work yet
462 if (devices[j].major != super.disks[i].major ||
463 devices[j].minor != super.disks[i].minor) {
464 change |= 1;
465 super.disks[i].major = devices[j].major;
466 super.disks[i].minor = devices[j].minor;
467 }
468 #endif
469 if (devices[j].oldmajor != super.disks[i].major ||
470 devices[j].oldminor != super.disks[i].minor) {
471 change |= 2;
472 super.disks[i].major = devices[j].oldmajor;
473 super.disks[i].minor = devices[j].oldminor;
474 }
475 if (devices[j].uptodate &&
476 (super.disks[i].state != desired_state)) {
477 if (force) {
478 fprintf(stderr, Name ": "
479 "clearing FAULTY flag for device %d in %s for %s\n",
480 j, mddev, devices[j].devname);
481 super.disks[i].state = desired_state;
482 change |= 2;
483 } else {
484 fprintf(stderr, Name ": "
485 "device %d in %s has wrong state in superblock, but %s seems ok\n",
486 i, mddev, devices[j].devname);
487 }
488 }
489 if (!devices[j].uptodate &&
490 !(super.disks[i].state & (1 << MD_DISK_FAULTY))) {
491 fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n",
492 i, mddev);
493 }
494 }
495 if (force && (super.level == 4 || super.level == 5) &&
496 okcnt == super.raid_disks-1) {
497 super.state = (1<< MD_SB_CLEAN);
498 change |= 2;
499 }
500
501 if ((force && (change & 2))
502 || (old_linux && (change & 1))) {
503 int fd;
504 super.sb_csum = calc_sb_csum(&super);
505 fd = open(devices[chosen_drive].devname, O_RDWR);
506 if (fd < 0) {
507 fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n",
508 devices[chosen_drive].devname);
509 return 1;
510 }
511 if (store_super(fd, &super)) {
512 close(fd);
513 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
514 devices[chosen_drive].devname);
515 return 1;
516 }
517 close(fd);
518 change = 0;
519 }
520
521 /* Almost ready to actually *do* something */
522 if (!old_linux) {
523 if (ioctl(mdfd, SET_ARRAY_INFO, NULL) != 0) {
524 fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n",
525 mddev, strerror(errno));
526 return 1;
527 }
528 /* First, add the raid disks, but add the chosen one last */
529 for (i=0; i<= num_devs; i++) {
530 int j;
531 if (i < num_devs) {
532 j = best[i];
533 if (j == chosen_drive)
534 continue;
535 } else
536 j = chosen_drive;
537
538 if (j >= 0 && devices[j].uptodate) {
539 mdu_disk_info_t disk;
540 memset(&disk, 0, sizeof(disk));
541 disk.major = devices[j].major;
542 disk.minor = devices[j].minor;
543 if (ioctl(mdfd, ADD_NEW_DISK, &disk)!=0) {
544 fprintf(stderr, Name ": failed to add %s to %s: %s\n",
545 devices[j].devname,
546 mddev,
547 strerror(errno));
548 if (i < first_super.raid_disks)
549 okcnt--;
550 else
551 sparecnt--;
552 } else if (verbose)
553 fprintf(stderr, Name ": added %s to %s as %d\n",
554 devices[j].devname, mddev, devices[j].raid_disk);
555 } else if (verbose && i < first_super.raid_disks)
556 fprintf(stderr, Name ": no uptodate device for slot %d of %s\n",
557 i, mddev);
558 }
559
560 if (runstop == 1 ||
561 (runstop == 0 &&
562 ( first_super.raid_disks == okcnt
563 || (start_partial_ok && enough(first_super.level, first_super.raid_disks, okcnt)))
564 )) {
565 if (ioctl(mdfd, RUN_ARRAY, NULL)==0) {
566 fprintf(stderr, Name ": %s has been started with %d drive%s",
567 mddev, okcnt, okcnt==1?"":"s");
568 if (sparecnt)
569 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
570 fprintf(stderr, ".\n");
571 return 0;
572 }
573 fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n",
574 mddev, strerror(errno));
575 return 1;
576 }
577 if (runstop == -1) {
578 fprintf(stderr, Name ": %s assembled from %d drive%s, but not started.\n",
579 mddev, okcnt, okcnt==1?"":"s");
580 return 0;
581 }
582 fprintf(stderr, Name ": %s assembled from %d drive%s - not enough to start it (use --run to insist).\n",
583 mddev, okcnt, okcnt==1?"":"s");
584 return 1;
585 } else {
586 /* The "chosen_drive" is a good choice, and if necessary, the superblock has
587 * been updated to point to the current locations of devices.
588 * so we can just start the array
589 */
590 unsigned long dev;
591 dev = MKDEV(devices[chosen_drive].major,
592 devices[chosen_drive].minor);
593 if (ioctl(mdfd, START_ARRAY, dev)) {
594 fprintf(stderr, Name ": Cannot start array: %s\n",
595 strerror(errno));
596 }
597
598 }
599 return 0;
600 }