]> git.ipfire.org Git - thirdparty/mdadm.git/blame - Assemble.c
mdadm-0.7.1
[thirdparty/mdadm.git] / Assemble.c
CommitLineData
64c4757e 1/*
9a9dab36 2 * mdadm - manage Linux "md" devices aka RAID arrays.
64c4757e 3 *
cd29a5c8 4 * Copyright (C) 2001-2002 Neil Brown <neilb@cse.unsw.edu.au>
64c4757e
NB
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29
9a9dab36 30#include "mdadm.h"
64c4757e 31#include "md_u.h"
52826846 32#include "md_p.h"
64c4757e
NB
33
34int Assemble(char *mddev, int mdfd,
52826846 35 mddev_ident_t ident, char *conffile,
cd29a5c8 36 mddev_dev_t devlist,
64c4757e
NB
37 int readonly, int runstop,
38 int verbose, int force)
39{
40 /*
52826846
NB
41 * The task of Assemble is to find a collection of
42 * devices that should (according to their superblocks)
43 * form an array, and to give this collection to the MD driver.
44 * In Linux-2.4 and later, this involves submitting a
64c4757e
NB
45 * SET_ARRAY_INFO ioctl with no arg - to prepare
46 * the array - and then submit a number of
47 * ADD_NEW_DISK ioctls to add disks into
48 * the array. Finally RUN_ARRAY might
49 * be submitted to start the array.
50 *
51 * Much of the work of Assemble is in finding and/or
52 * checking the disks to make sure they look right.
53 *
54 * If mddev is not set, then scan must be and we
55 * read through the config file for dev+uuid mapping
56 * We recurse, setting mddev, for each device that
57 * - isn't running
58 * - has a valid uuid (or any uuid if !uuidset
59 *
60 * If mddev is set, we try to determine state of md.
61 * check version - must be at least 0.90.0
62 * check kernel version. must be at least 2.4.
63 * If not, we can possibly fall back on START_ARRAY
64 * Try to GET_ARRAY_INFO.
65 * If possible, give up
66 * If not, try to STOP_ARRAY just to make sure
67 *
68 * If !uuidset and scan, look in conf-file for uuid
69 * If not found, give up
cd29a5c8 70 * If !devlist and scan and uuidset, get list of devs from conf-file
64c4757e
NB
71 *
72 * For each device:
73 * Check superblock - discard if bad
74 * Check uuid (set if we don't have one) - discard if no match
75 * Check superblock similarity if we have a superbloc - discard if different
76 * Record events, devicenum, utime
77 * This should give us a list of devices for the array
78 * We should collect the most recent event and utime numbers
79 *
80 * Count disks with recent enough event count
81 * While force && !enough disks
82 * Choose newest rejected disks, update event count
83 * mark clean and rewrite superblock
84 * If recent kernel:
85 * SET_ARRAY_INFO
86 * foreach device with recent events : ADD_NEW_DISK
87 * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY
88 * If old kernel:
89 * Check the device numbers in superblock are right
90 * update superblock if any changes
91 * START_ARRAY
92 *
93 */
94 int old_linux = 0;
95 int vers;
96 mdu_array_info_t array;
64c4757e
NB
97 mdp_super_t first_super, super;
98 struct {
99 char *devname;
100 int major, minor;
101 long long events;
102 time_t utime;
103 int uptodate;
52826846 104 int raid_disk;
64c4757e
NB
105 } devices[MD_SB_DISKS];
106 int best[MD_SB_DISKS]; /* indexed by raid_disk */
52826846 107 int devcnt = 0, okcnt, sparecnt;
64c4757e
NB
108 int i;
109 int most_recent = 0;
cd29a5c8 110 int chosen_drive;
52826846 111 int change = 0;
cd29a5c8
NB
112 int inargv = 0;
113 int start_partial_ok = force || devlist==NULL;
64c4757e 114
64c4757e
NB
115 vers = md_get_version(mdfd);
116 if (vers <= 0) {
682c7051 117 fprintf(stderr, Name ": %s appears not to be an md device.\n");
64c4757e
NB
118 return 1;
119 }
682c7051
NB
120 if (vers < 9000) {
121 fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n"
52826846 122 " Upgrade your kernel or try --build\n");
64c4757e
NB
123 return 1;
124 }
682c7051 125 if (get_linux_version() < 2004000)
64c4757e
NB
126 old_linux = 1;
127
128 if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0) {
682c7051 129 fprintf(stderr, Name ": device %s already active - cannot assemble it\n",
64c4757e
NB
130 mddev);
131 return 1;
132 }
133 ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */
134
135 /*
52826846
NB
136 * If any subdevs are listed, then any that don't
137 * match ident are discarded. Remainder must all match and
138 * become the array.
139 * If no subdevs, then we scan all devices in the config file, but
140 * there must be something in the identity
64c4757e 141 */
64c4757e 142
cd29a5c8 143 if (!devlist &&
52826846
NB
144 ident->uuid_set == 0 &&
145 ident->super_minor < 0 &&
146 ident->devices == NULL) {
147 fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n",
148 mddev);
149 return 1;
64c4757e 150 }
cd29a5c8 151 if (devlist == NULL)
64c4757e 152 devlist = conf_get_devs(conffile);
cd29a5c8 153 else inargv = 1;
64c4757e 154
64c4757e
NB
155 first_super.md_magic = 0;
156 for (i=0; i<MD_SB_DISKS; i++)
157 best[i] = -1;
158
82b27616
NB
159 if (verbose)
160 fprintf(stderr, Name ": looking for devices for %s\n",
161 mddev);
162
cd29a5c8 163 while ( devlist) {
64c4757e
NB
164 char *devname;
165 int this_uuid[4];
166 int dfd;
167 struct stat stb;
52826846
NB
168 int havesuper=0;
169
cd29a5c8
NB
170 devname = devlist->devname;
171 devlist = devlist->next;
64c4757e 172
52826846
NB
173 if (ident->devices &&
174 !match_oneof(ident->devices, devname))
175 continue;
176
64c4757e
NB
177 dfd = open(devname, O_RDONLY, 0);
178 if (dfd < 0) {
179 if (inargv || verbose)
682c7051 180 fprintf(stderr, Name ": cannot open device %s: %s\n",
64c4757e 181 devname, strerror(errno));
52826846
NB
182 } else if (fstat(dfd, &stb)< 0) {
183 /* Impossible! */
184 fprintf(stderr, Name ": fstat failed for %s: %s\n",
185 devname, strerror(errno));
186 close(dfd);
cd29a5c8
NB
187 } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
188 fprintf(stderr, Name ": %s is not a block device.\n",
52826846
NB
189 devname);
190 close(dfd);
cd29a5c8 191 } else if (load_super(dfd, &super)) {
64c4757e 192 if (inargv || verbose)
682c7051 193 fprintf( stderr, Name ": no RAID superblock on %s\n",
64c4757e
NB
194 devname);
195 close(dfd);
52826846
NB
196 } else {
197 havesuper =1;
198 uuid_from_super(this_uuid, &super);
199 close(dfd);
64c4757e 200 }
52826846
NB
201
202 if (ident->uuid_set &&
203 (!havesuper || same_uuid(this_uuid, ident->uuid)==0)) {
204 if (inargv || verbose)
205 fprintf(stderr, Name ": %s has wrong uuid.\n",
206 devname);
207 continue;
82b27616 208 }
52826846
NB
209 if (ident->super_minor >= 0 &&
210 (!havesuper || ident->super_minor != super.md_minor)) {
64c4757e 211 if (inargv || verbose)
52826846 212 fprintf(stderr, Name ": %s has wrong super-minor.\n",
64c4757e
NB
213 devname);
214 continue;
215 }
cd29a5c8
NB
216 if (ident->level != -10 &&
217 (!havesuper|| ident->level != super.level)) {
218 if (inargv || verbose)
219 fprintf(stderr, Name ": %s has wrong raid level.\n",
220 devname);
221 continue;
222 }
223 if (ident->raid_disks != -1 &&
224 (!havesuper || ident->raid_disks!= super.raid_disks)) {
225 if (inargv || verbose)
226 fprintf(stderr, Name ": %s requires wrong number of drives.\n",
227 devname);
228 continue;
229 }
52826846
NB
230
231 /* If we are this far, then we are commited to this device.
232 * If the super_block doesn't exist, or doesn't match others,
233 * then we cannot continue
234 */
52826846
NB
235
236 if (!havesuper) {
237 fprintf(stderr, Name ": %s has no superblock - assembly aborted\n",
238 devname);
239 return 1;
240 }
241 if (compare_super(&first_super, &super)) {
242 fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n",
243 devname);
244 return 1;
64c4757e
NB
245 }
246
64c4757e 247 if (devcnt >= MD_SB_DISKS) {
682c7051 248 fprintf(stderr, Name ": ouch - too many devices appear to be in this array. Ignoring %s\n",
64c4757e
NB
249 devname);
250 continue;
251 }
cd29a5c8
NB
252 if (verbose)
253 fprintf(stderr, Name ": %s is identified as a member of %s, slot %d.\n",
254 devname, mddev, super.this_disk.raid_disk);
64c4757e
NB
255 devices[devcnt].devname = devname;
256 devices[devcnt].major = MAJOR(stb.st_rdev);
257 devices[devcnt].minor = MINOR(stb.st_rdev);
258 devices[devcnt].events = md_event(&super);
259 devices[devcnt].utime = super.utime;
52826846 260 devices[devcnt].raid_disk = super.this_disk.raid_disk;
64c4757e
NB
261 devices[devcnt].uptodate = 0;
262 if (most_recent < devcnt) {
263 if (devices[devcnt].events
264 > devices[most_recent].events)
265 most_recent = devcnt;
266 }
52826846
NB
267 i = devices[devcnt].raid_disk;
268 if (i>=0 && i < MD_SB_DISKS)
269 if (best[i] == -1
270 || devices[best[i]].events < devices[devcnt].events)
271 best[i] = devcnt;
272
64c4757e
NB
273 devcnt++;
274 }
275
276 if (devcnt == 0) {
682c7051 277 fprintf(stderr, Name ": no devices found for %s\n",
64c4757e
NB
278 mddev);
279 return 1;
280 }
281 /* now we have some devices that might be suitable.
282 * I wonder how many
283 */
284 okcnt = 0;
52826846
NB
285 sparecnt=0;
286 for (i=0; i< MD_SB_DISKS;i++) {
64c4757e 287 int j = best[i];
cd29a5c8 288 int event_margin = !force;
64c4757e 289 if (j < 0) continue;
cd29a5c8 290 if (devices[j].events+event_margin >=
64c4757e
NB
291 devices[most_recent].events) {
292 devices[j].uptodate = 1;
52826846
NB
293 if (i < first_super.raid_disks)
294 okcnt++;
295 else
296 sparecnt++;
64c4757e
NB
297 }
298 }
299 while (force && !enough(first_super.level, first_super.raid_disks, okcnt)) {
300 /* Choose the newest best drive which is
301 * not up-to-date, update the superblock
302 * and add it.
303 */
52826846 304 int fd;
cd29a5c8 305 chosen_drive = -1;
52826846
NB
306 for (i=0; i<first_super.raid_disks; i++) {
307 int j = best[i];
308 if (j>=0 &&
309 !devices[j].uptodate &&
310 devices[j].events > 0 &&
311 (chosen_drive < 0 ||
312 devices[j].events > devices[chosen_drive].events))
313 chosen_drive = j;
314 }
315 if (chosen_drive < 0)
316 break;
317 fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n",
318 devices[chosen_drive].devname, devices[chosen_drive].raid_disk,
319 (int)(devices[chosen_drive].events),
320 (int)(devices[most_recent].events));
321 fd = open(devices[chosen_drive].devname, O_RDWR);
322 if (fd < 0) {
323 fprintf(stderr, Name ": Couldn't open %s for write - not updating\n",
324 devices[chosen_drive].devname);
325 devices[chosen_drive].events = 0;
326 continue;
327 }
328 if (load_super(fd, &super)) {
329 close(fd);
330 fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n",
331 devices[chosen_drive].devname);
332 devices[chosen_drive].events = 0;
333 continue;
334 }
335 super.events_hi = (devices[most_recent].events>>32)&0xFFFFFFFF;
336 super.events_lo = (devices[most_recent].events)&0xFFFFFFFF;
337 super.sb_csum = calc_sb_csum(&super);
338/*DRYRUN*/ if (store_super(fd, &super)) {
339 close(fd);
340 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
341 devices[chosen_drive].devname);
342 devices[chosen_drive].events = 0;
343 continue;
344 }
345 close(fd);
346 devices[chosen_drive].events = devices[most_recent].events;
347 devices[chosen_drive].uptodate = 1;
348 okcnt++;
64c4757e 349 }
52826846
NB
350
351 /* Now we want to look at the superblock which the kernel will base things on
352 * and compare the devices that we think are working with the devices that the
353 * superblock thinks are working.
354 * If there are differences and --force is given, then update this chosen
355 * superblock.
356 */
cd29a5c8 357 chosen_drive = -1;
52826846
NB
358 for (i=0; chosen_drive < 0 && i<MD_SB_DISKS; i++) {
359 int j = best[i];
360 int fd;
361 if (j<0)
362 continue;
363 if (!devices[j].uptodate)
364 continue;
365 chosen_drive = j;
366 if ((fd=open(devices[j].devname, O_RDONLY))< 0) {
367 fprintf(stderr, Name ": Cannot open %s: %s\n",
368 devices[j].devname, strerror(errno));
369 return 1;
370 }
371 if (load_super(fd, &super)) {
372 close(fd);
373 fprintf(stderr, Name ": RAID superblock has disappeared from %s\n",
374 devices[j].devname);
375 return 1;
376 }
377 close(fd);
378 }
379
380 for (i=0; i<MD_SB_DISKS; i++) {
381 int j = best[i];
cd29a5c8 382 int active_sync = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
52826846
NB
383 if (j<0)
384 continue;
385 if (!devices[j].uptodate)
386 continue;
387 if (devices[j].major != super.disks[j].major ||
388 devices[j].minor != super.disks[j].minor) {
389 change |= 1;
390 super.disks[j].major = devices[j].major;
391 super.disks[j].minor = devices[j].minor;
392 }
393 if (devices[j].uptodate &&
cd29a5c8 394 (super.disks[i].state != active_sync)) {
52826846
NB
395 if (force) {
396 fprintf(stderr, Name ": "
cd29a5c8 397 "clearing FAULTY flag for device %d in %s for %s\n",
52826846 398 j, mddev, devices[j].devname);
cd29a5c8 399 super.disks[i].state = active_sync;
52826846
NB
400 change |= 2;
401 } else {
402 fprintf(stderr, Name ": "
403 "device %d in %s is marked faulty in superblock, but %s seems ok\n",
404 i, mddev, devices[j].devname);
405 }
406 }
407 if (!devices[j].uptodate &&
408 !(super.disks[i].state & (1 << MD_DISK_FAULTY))) {
409 fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n",
410 i, mddev);
411 }
412 }
413
414 if ((force && (change & 2))
415 || (old_linux && (change & 1))) {
416 int fd;
417 super.sb_csum = calc_sb_csum(&super);
418 fd = open(devices[chosen_drive].devname, O_RDWR);
419 if (fd < 0) {
420 fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n",
421 devices[chosen_drive].devname);
422 return 1;
423 }
424 if (store_super(fd, &super)) {
425 close(fd);
426 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
427 devices[chosen_drive].devname);
428 return 1;
429 }
430 close(fd);
431 change = 0;
432 }
433
64c4757e
NB
434 /* Almost ready to actually *do* something */
435 if (!old_linux) {
436 if (ioctl(mdfd, SET_ARRAY_INFO, NULL) != 0) {
682c7051 437 fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n",
64c4757e
NB
438 mddev, strerror(errno));
439 return 1;
440 }
52826846
NB
441 /* First, add the raid disks, but add the chosen one last */
442 for (i=0; i<=MD_SB_DISKS; i++) {
443 int j;
444 if (i < MD_SB_DISKS) {
445 j = best[i];
446 if (j == chosen_drive)
447 continue;
448 } else
449 j = chosen_drive;
450
0db17fcb 451 if (j >= 0 && devices[j].uptodate) {
64c4757e
NB
452 mdu_disk_info_t disk;
453 memset(&disk, 0, sizeof(disk));
454 disk.major = devices[j].major;
455 disk.minor = devices[j].minor;
456 if (ioctl(mdfd, ADD_NEW_DISK, &disk)!=0) {
682c7051 457 fprintf(stderr, Name ": failed to add %s to %s: %s\n",
64c4757e
NB
458 devices[j].devname,
459 mddev,
460 strerror(errno));
52826846
NB
461 if (i < first_super.raid_disks)
462 okcnt--;
463 else
464 sparecnt--;
465 } else if (verbose)
466 fprintf(stderr, Name ": added %s to %s as %d\n",
467 devices[j].devname, mddev, devices[j].raid_disk);
468 } else if (verbose && i < first_super.raid_disks)
682c7051 469 fprintf(stderr, Name ": no uptodate device for slot %d of %s\n",
64c4757e
NB
470 i, mddev);
471 }
52826846 472
64c4757e
NB
473 if (runstop == 1 ||
474 (runstop == 0 &&
cd29a5c8
NB
475 ( first_super.raid_disks == okcnt
476 || start_partial_ok && enough(first_super.level, first_super.raid_disks, okcnt))
477 )) {
82b27616 478 if (ioctl(mdfd, RUN_ARRAY, NULL)==0) {
52826846
NB
479 fprintf(stderr, Name ": %s has been started with %d drive%s",
480 mddev, okcnt, okcnt==1?"":"s");
481 if (sparecnt)
482 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
483 fprintf(stderr, ".\n");
64c4757e 484 return 0;
82b27616 485 }
682c7051 486 fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n",
64c4757e
NB
487 mddev, strerror(errno));
488 return 1;
489 }
82b27616 490 if (runstop == -1) {
52826846
NB
491 fprintf(stderr, Name ": %s assembled from %d drive%s, but not started.\n",
492 mddev, okcnt, okcnt==1?"":"s");
64c4757e 493 return 0;
82b27616 494 }
cd29a5c8 495 fprintf(stderr, Name ": %s assembled from %d drive%s - not enough to start it (use --run to insist).\n",
52826846 496 mddev, okcnt, okcnt==1?"":"s");
64c4757e 497 return 1;
82b27616 498 } else {
52826846
NB
499 /* The "chosen_drive" is a good choice, and if necessary, the superblock has
500 * been updated to point to the current locations of devices.
501 * so we can just start the array
82b27616 502 */
cd29a5c8 503 unsigned long dev;
82b27616
NB
504 dev = MKDEV(devices[chosen_drive].major,
505 devices[chosen_drive].minor);
506 if (ioctl(mdfd, START_ARRAY, dev)) {
507 fprintf(stderr, Name ": Cannot start array: %s\n",
508 strerror(errno));
509 }
510
64c4757e
NB
511 }
512}