]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Assemble.c
mdadm-1.0.9
[thirdparty/mdadm.git] / Assemble.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2002 Neil Brown <neilb@cse.unsw.edu.au>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29
30 #include "mdadm.h"
31 #include "md_u.h"
32 #include "md_p.h"
33
34 int Assemble(char *mddev, int mdfd,
35 mddev_ident_t ident, char *conffile,
36 mddev_dev_t devlist,
37 int readonly, int runstop,
38 char *update,
39 int verbose, int force)
40 {
41 /*
42 * The task of Assemble is to find a collection of
43 * devices that should (according to their superblocks)
44 * form an array, and to give this collection to the MD driver.
45 * In Linux-2.4 and later, this involves submitting a
46 * SET_ARRAY_INFO ioctl with no arg - to prepare
47 * the array - and then submit a number of
48 * ADD_NEW_DISK ioctls to add disks into
49 * the array. Finally RUN_ARRAY might
50 * be submitted to start the array.
51 *
52 * Much of the work of Assemble is in finding and/or
53 * checking the disks to make sure they look right.
54 *
55 * If mddev is not set, then scan must be and we
56 * read through the config file for dev+uuid mapping
57 * We recurse, setting mddev, for each device that
58 * - isn't running
59 * - has a valid uuid (or any uuid if !uuidset
60 *
61 * If mddev is set, we try to determine state of md.
62 * check version - must be at least 0.90.0
63 * check kernel version. must be at least 2.4.
64 * If not, we can possibly fall back on START_ARRAY
65 * Try to GET_ARRAY_INFO.
66 * If possible, give up
67 * If not, try to STOP_ARRAY just to make sure
68 *
69 * If !uuidset and scan, look in conf-file for uuid
70 * If not found, give up
71 * If !devlist and scan and uuidset, get list of devs from conf-file
72 *
73 * For each device:
74 * Check superblock - discard if bad
75 * Check uuid (set if we don't have one) - discard if no match
76 * Check superblock similarity if we have a superblock - discard if different
77 * Record events, devicenum, utime
78 * This should give us a list of devices for the array
79 * We should collect the most recent event and utime numbers
80 *
81 * Count disks with recent enough event count
82 * While force && !enough disks
83 * Choose newest rejected disks, update event count
84 * mark clean and rewrite superblock
85 * If recent kernel:
86 * SET_ARRAY_INFO
87 * foreach device with recent events : ADD_NEW_DISK
88 * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY
89 * If old kernel:
90 * Check the device numbers in superblock are right
91 * update superblock if any changes
92 * START_ARRAY
93 *
94 */
95 int old_linux = 0;
96 int vers;
97 mdu_array_info_t array;
98 mdp_super_t first_super, super;
99 struct {
100 char *devname;
101 int major, minor;
102 int oldmajor, oldminor;
103 long long events;
104 time_t utime;
105 int uptodate;
106 int raid_disk;
107 } *devices;
108 int *best; /* indexed by raid_disk */
109 int devcnt = 0, okcnt, sparecnt;
110 int i;
111 int most_recent = 0;
112 int chosen_drive;
113 int change = 0;
114 int inargv = 0;
115 int start_partial_ok = force || devlist==NULL;
116 int num_devs;
117 mddev_dev_t tmpdev;
118
119 vers = md_get_version(mdfd);
120 if (vers <= 0) {
121 fprintf(stderr, Name ": %s appears not to be an md device.\n", mddev);
122 return 1;
123 }
124 if (vers < 9000) {
125 fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n"
126 " Upgrade your kernel or try --build\n");
127 return 1;
128 }
129 if (get_linux_version() < 2004000)
130 old_linux = 1;
131
132 if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0) {
133 fprintf(stderr, Name ": device %s already active - cannot assemble it\n",
134 mddev);
135 return 1;
136 }
137 ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */
138
139 /*
140 * If any subdevs are listed, then any that don't
141 * match ident are discarded. Remainder must all match and
142 * become the array.
143 * If no subdevs, then we scan all devices in the config file, but
144 * there must be something in the identity
145 */
146
147 if (!devlist &&
148 ident->uuid_set == 0 &&
149 ident->super_minor < 0 &&
150 ident->devices == NULL) {
151 fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n",
152 mddev);
153 return 1;
154 }
155 if (devlist == NULL)
156 devlist = conf_get_devs(conffile);
157 else inargv = 1;
158
159 tmpdev = devlist; num_devs = 0;
160 while (tmpdev) {
161 num_devs++;
162 tmpdev = tmpdev->next;
163 }
164 best = malloc(num_devs * sizeof(*best));
165 devices = malloc(num_devs * sizeof(*devices));
166
167 first_super.md_magic = 0;
168 for (i=0; i<num_devs; i++)
169 best[i] = -1;
170
171 if (verbose)
172 fprintf(stderr, Name ": looking for devices for %s\n",
173 mddev);
174
175 while ( devlist) {
176 char *devname;
177 int this_uuid[4];
178 int dfd;
179 struct stat stb;
180 int havesuper=0;
181
182 devname = devlist->devname;
183 devlist = devlist->next;
184
185 if (ident->devices &&
186 !match_oneof(ident->devices, devname))
187 continue;
188
189 dfd = open(devname, O_RDONLY, 0);
190 if (dfd < 0) {
191 if (inargv || verbose)
192 fprintf(stderr, Name ": cannot open device %s: %s\n",
193 devname, strerror(errno));
194 } else if (fstat(dfd, &stb)< 0) {
195 /* Impossible! */
196 fprintf(stderr, Name ": fstat failed for %s: %s\n",
197 devname, strerror(errno));
198 close(dfd);
199 } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
200 fprintf(stderr, Name ": %s is not a block device.\n",
201 devname);
202 close(dfd);
203 } else if (load_super(dfd, &super)) {
204 if (inargv || verbose)
205 fprintf( stderr, Name ": no RAID superblock on %s\n",
206 devname);
207 close(dfd);
208 } else {
209 havesuper =1;
210 uuid_from_super(this_uuid, &super);
211 close(dfd);
212 }
213
214 if (ident->uuid_set &&
215 (!havesuper || same_uuid(this_uuid, ident->uuid)==0)) {
216 if (inargv || verbose)
217 fprintf(stderr, Name ": %s has wrong uuid.\n",
218 devname);
219 continue;
220 }
221 if (ident->super_minor >= 0 &&
222 (!havesuper || ident->super_minor != super.md_minor)) {
223 if (inargv || verbose)
224 fprintf(stderr, Name ": %s has wrong super-minor.\n",
225 devname);
226 continue;
227 }
228 if (ident->level != -10 &&
229 (!havesuper|| ident->level != super.level)) {
230 if (inargv || verbose)
231 fprintf(stderr, Name ": %s has wrong raid level.\n",
232 devname);
233 continue;
234 }
235 if (ident->raid_disks != -1 &&
236 (!havesuper || ident->raid_disks!= super.raid_disks)) {
237 if (inargv || verbose)
238 fprintf(stderr, Name ": %s requires wrong number of drives.\n",
239 devname);
240 continue;
241 }
242
243 /* If we are this far, then we are commited to this device.
244 * If the super_block doesn't exist, or doesn't match others,
245 * then we cannot continue
246 */
247
248 if (!havesuper) {
249 fprintf(stderr, Name ": %s has no superblock - assembly aborted\n",
250 devname);
251 return 1;
252 }
253 if (compare_super(&first_super, &super)) {
254 fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n",
255 devname);
256 return 1;
257 }
258
259
260 /* this is needed until we get a more relaxed super block format */
261 if (devcnt >= MD_SB_DISKS) {
262 fprintf(stderr, Name ": ouch - too many devices appear to be in this array. Ignoring %s\n",
263 devname);
264 continue;
265 }
266
267 /* looks like a good enough match to update the super block if needed */
268 if (update) {
269 if (strcmp(update, "sparc2.2")==0 ) {
270 /* 2.2 sparc put the events in the wrong place
271 * So we copy the tail of the superblock
272 * up 4 bytes before continuing
273 */
274 __u32 *sb32 = (__u32*)&super;
275 memcpy(sb32+MD_SB_GENERIC_CONSTANT_WORDS+7,
276 sb32+MD_SB_GENERIC_CONSTANT_WORDS+7+1,
277 (MD_SB_WORDS - (MD_SB_GENERIC_CONSTANT_WORDS+7+1))*4);
278 fprintf (stderr, Name ": adjusting superblock of %s for 2.2/sparc compatability.\n",
279 devname);
280 }
281 if (strcmp(update, "super-minor") ==0) {
282 struct stat stb2;
283 fstat(mdfd, &stb2);
284 super.md_minor = MINOR(stb2.st_rdev);
285 if (verbose)
286 fprintf(stderr, Name ": updating superblock of %s with minor number %d\n",
287 devname, super.md_minor);
288 }
289 super.sb_csum = calc_sb_csum(&super);
290 dfd = open(devname, O_RDWR, 0);
291 if (dfd < 0)
292 fprintf(stderr, Name ": Cannot open %s for superblock update\n",
293 devname);
294 else if (store_super(dfd, &super))
295 fprintf(stderr, Name ": Could not re-write superblock on %s.\n",
296 devname);
297 if (dfd >= 0)
298 close(dfd);
299 }
300
301 if (verbose)
302 fprintf(stderr, Name ": %s is identified as a member of %s, slot %d.\n",
303 devname, mddev, super.this_disk.raid_disk);
304 devices[devcnt].devname = devname;
305 devices[devcnt].major = MAJOR(stb.st_rdev);
306 devices[devcnt].minor = MINOR(stb.st_rdev);
307 devices[devcnt].oldmajor = super.this_disk.major;
308 devices[devcnt].oldminor = super.this_disk.minor;
309 devices[devcnt].events = md_event(&super);
310 devices[devcnt].utime = super.utime;
311 devices[devcnt].raid_disk = super.this_disk.raid_disk;
312 devices[devcnt].uptodate = 0;
313 if (most_recent < devcnt) {
314 if (devices[devcnt].events
315 > devices[most_recent].events)
316 most_recent = devcnt;
317 }
318 if (super.level == -4)
319 /* with multipath, the raid_disk from the superblock is meaningless */
320 i = devcnt;
321 else
322 i = devices[devcnt].raid_disk;
323 if (i>=0 && i < num_devs)
324 if (best[i] == -1
325 || devices[best[i]].events < devices[devcnt].events)
326 best[i] = devcnt;
327
328 devcnt++;
329 }
330
331 if (devcnt == 0) {
332 fprintf(stderr, Name ": no devices found for %s\n",
333 mddev);
334 return 1;
335 }
336 /* now we have some devices that might be suitable.
337 * I wonder how many
338 */
339 okcnt = 0;
340 sparecnt=0;
341 for (i=0; i< num_devs ;i++) {
342 int j = best[i];
343 int event_margin = !force;
344 if (j < 0) continue;
345 if (devices[j].events+event_margin >=
346 devices[most_recent].events) {
347 devices[j].uptodate = 1;
348 if (i < first_super.raid_disks)
349 okcnt++;
350 else
351 sparecnt++;
352 }
353 }
354 while (force && !enough(first_super.level, first_super.raid_disks, okcnt)) {
355 /* Choose the newest best drive which is
356 * not up-to-date, update the superblock
357 * and add it.
358 */
359 int fd;
360 chosen_drive = -1;
361 for (i=0; i<first_super.raid_disks; i++) {
362 int j = best[i];
363 if (j>=0 &&
364 !devices[j].uptodate &&
365 devices[j].events > 0 &&
366 (chosen_drive < 0 ||
367 devices[j].events > devices[chosen_drive].events))
368 chosen_drive = j;
369 }
370 if (chosen_drive < 0)
371 break;
372 fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n",
373 devices[chosen_drive].devname, devices[chosen_drive].raid_disk,
374 (int)(devices[chosen_drive].events),
375 (int)(devices[most_recent].events));
376 fd = open(devices[chosen_drive].devname, O_RDWR);
377 if (fd < 0) {
378 fprintf(stderr, Name ": Couldn't open %s for write - not updating\n",
379 devices[chosen_drive].devname);
380 devices[chosen_drive].events = 0;
381 continue;
382 }
383 if (load_super(fd, &super)) {
384 close(fd);
385 fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n",
386 devices[chosen_drive].devname);
387 devices[chosen_drive].events = 0;
388 continue;
389 }
390 super.events_hi = (devices[most_recent].events>>32)&0xFFFFFFFF;
391 super.events_lo = (devices[most_recent].events)&0xFFFFFFFF;
392 if (super.level == 5 || super.level == 4) {
393 /* need to force clean */
394 super.state = 0;
395 }
396 super.sb_csum = calc_sb_csum(&super);
397 /*DRYRUN*/ if (store_super(fd, &super)) {
398 close(fd);
399 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
400 devices[chosen_drive].devname);
401 devices[chosen_drive].events = 0;
402 continue;
403 }
404 close(fd);
405 devices[chosen_drive].events = devices[most_recent].events;
406 devices[chosen_drive].uptodate = 1;
407 okcnt++;
408 }
409
410 /* Now we want to look at the superblock which the kernel will base things on
411 * and compare the devices that we think are working with the devices that the
412 * superblock thinks are working.
413 * If there are differences and --force is given, then update this chosen
414 * superblock.
415 */
416 chosen_drive = -1;
417 for (i=0; chosen_drive < 0 && i<num_devs; i++) {
418 int j = best[i];
419 int fd;
420 if (j<0)
421 continue;
422 if (!devices[j].uptodate)
423 continue;
424 chosen_drive = j;
425 if ((fd=open(devices[j].devname, O_RDONLY))< 0) {
426 fprintf(stderr, Name ": Cannot open %s: %s\n",
427 devices[j].devname, strerror(errno));
428 return 1;
429 }
430 if (load_super(fd, &super)) {
431 close(fd);
432 fprintf(stderr, Name ": RAID superblock has disappeared from %s\n",
433 devices[j].devname);
434 return 1;
435 }
436 close(fd);
437 }
438
439 for (i=0; i<num_devs; i++) {
440 int j = best[i];
441 int desired_state;
442
443 if (i < super.raid_disks)
444 desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
445 else
446 desired_state = 0;
447
448 if (j<0)
449 continue;
450 if (!devices[j].uptodate)
451 continue;
452 #if 0
453 This doesnt work yet
454 if (devices[j].major != super.disks[i].major ||
455 devices[j].minor != super.disks[i].minor) {
456 change |= 1;
457 super.disks[i].major = devices[j].major;
458 super.disks[i].minor = devices[j].minor;
459 }
460 #endif
461 if (devices[j].oldmajor != super.disks[i].major ||
462 devices[j].oldminor != super.disks[i].minor) {
463 change |= 2;
464 super.disks[i].major = devices[j].oldmajor;
465 super.disks[i].minor = devices[j].oldminor;
466 }
467 if (devices[j].uptodate &&
468 (super.disks[i].state != desired_state)) {
469 if (force) {
470 fprintf(stderr, Name ": "
471 "clearing FAULTY flag for device %d in %s for %s\n",
472 j, mddev, devices[j].devname);
473 super.disks[i].state = desired_state;
474 change |= 2;
475 } else {
476 fprintf(stderr, Name ": "
477 "device %d in %s has wrong state in superblock, but %s seems ok\n",
478 i, mddev, devices[j].devname);
479 }
480 }
481 if (!devices[j].uptodate &&
482 !(super.disks[i].state & (1 << MD_DISK_FAULTY))) {
483 fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n",
484 i, mddev);
485 }
486 }
487
488 if ((force && (change & 2))
489 || (old_linux && (change & 1))) {
490 int fd;
491 super.sb_csum = calc_sb_csum(&super);
492 fd = open(devices[chosen_drive].devname, O_RDWR);
493 if (fd < 0) {
494 fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n",
495 devices[chosen_drive].devname);
496 return 1;
497 }
498 if (store_super(fd, &super)) {
499 close(fd);
500 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
501 devices[chosen_drive].devname);
502 return 1;
503 }
504 close(fd);
505 change = 0;
506 }
507
508 /* Almost ready to actually *do* something */
509 if (!old_linux) {
510 if (ioctl(mdfd, SET_ARRAY_INFO, NULL) != 0) {
511 fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n",
512 mddev, strerror(errno));
513 return 1;
514 }
515 /* First, add the raid disks, but add the chosen one last */
516 for (i=0; i<= num_devs; i++) {
517 int j;
518 if (i < num_devs) {
519 j = best[i];
520 if (j == chosen_drive)
521 continue;
522 } else
523 j = chosen_drive;
524
525 if (j >= 0 && devices[j].uptodate) {
526 mdu_disk_info_t disk;
527 memset(&disk, 0, sizeof(disk));
528 disk.major = devices[j].major;
529 disk.minor = devices[j].minor;
530 if (ioctl(mdfd, ADD_NEW_DISK, &disk)!=0) {
531 fprintf(stderr, Name ": failed to add %s to %s: %s\n",
532 devices[j].devname,
533 mddev,
534 strerror(errno));
535 if (i < first_super.raid_disks)
536 okcnt--;
537 else
538 sparecnt--;
539 } else if (verbose)
540 fprintf(stderr, Name ": added %s to %s as %d\n",
541 devices[j].devname, mddev, devices[j].raid_disk);
542 } else if (verbose && i < first_super.raid_disks)
543 fprintf(stderr, Name ": no uptodate device for slot %d of %s\n",
544 i, mddev);
545 }
546
547 if (runstop == 1 ||
548 (runstop == 0 &&
549 ( first_super.raid_disks == okcnt
550 || (start_partial_ok && enough(first_super.level, first_super.raid_disks, okcnt)))
551 )) {
552 if (ioctl(mdfd, RUN_ARRAY, NULL)==0) {
553 fprintf(stderr, Name ": %s has been started with %d drive%s",
554 mddev, okcnt, okcnt==1?"":"s");
555 if (sparecnt)
556 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
557 fprintf(stderr, ".\n");
558 return 0;
559 }
560 fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n",
561 mddev, strerror(errno));
562 return 1;
563 }
564 if (runstop == -1) {
565 fprintf(stderr, Name ": %s assembled from %d drive%s, but not started.\n",
566 mddev, okcnt, okcnt==1?"":"s");
567 return 0;
568 }
569 fprintf(stderr, Name ": %s assembled from %d drive%s - not enough to start it (use --run to insist).\n",
570 mddev, okcnt, okcnt==1?"":"s");
571 return 1;
572 } else {
573 /* The "chosen_drive" is a good choice, and if necessary, the superblock has
574 * been updated to point to the current locations of devices.
575 * so we can just start the array
576 */
577 unsigned long dev;
578 dev = MKDEV(devices[chosen_drive].major,
579 devices[chosen_drive].minor);
580 if (ioctl(mdfd, START_ARRAY, dev)) {
581 fprintf(stderr, Name ": Cannot start array: %s\n",
582 strerror(errno));
583 }
584
585 }
586 return 0;
587 }