]> git.ipfire.org Git - thirdparty/mdadm.git/blame - Assemble.c
mdctl-0.5
[thirdparty/mdadm.git] / Assemble.c
CommitLineData
64c4757e
NB
1/*
2 * mdctl - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001 Neil Brown <neilb@cse.unsw.edu.au>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29
30#include "mdctl.h"
64c4757e 31#include "md_u.h"
52826846 32#include "md_p.h"
64c4757e
NB
33
34int Assemble(char *mddev, int mdfd,
52826846 35 mddev_ident_t ident, char *conffile,
64c4757e
NB
36 int subdevs, char **subdev,
37 int readonly, int runstop,
38 int verbose, int force)
39{
40 /*
52826846
NB
41 * The task of Assemble is to find a collection of
42 * devices that should (according to their superblocks)
43 * form an array, and to give this collection to the MD driver.
44 * In Linux-2.4 and later, this involves submitting a
64c4757e
NB
45 * SET_ARRAY_INFO ioctl with no arg - to prepare
46 * the array - and then submit a number of
47 * ADD_NEW_DISK ioctls to add disks into
48 * the array. Finally RUN_ARRAY might
49 * be submitted to start the array.
50 *
51 * Much of the work of Assemble is in finding and/or
52 * checking the disks to make sure they look right.
53 *
54 * If mddev is not set, then scan must be and we
55 * read through the config file for dev+uuid mapping
56 * We recurse, setting mddev, for each device that
57 * - isn't running
58 * - has a valid uuid (or any uuid if !uuidset
59 *
60 * If mddev is set, we try to determine state of md.
61 * check version - must be at least 0.90.0
62 * check kernel version. must be at least 2.4.
63 * If not, we can possibly fall back on START_ARRAY
64 * Try to GET_ARRAY_INFO.
65 * If possible, give up
66 * If not, try to STOP_ARRAY just to make sure
67 *
68 * If !uuidset and scan, look in conf-file for uuid
69 * If not found, give up
70 * If !subdevs and scan and uuidset, get list of devs from conf-file
71 *
72 * For each device:
73 * Check superblock - discard if bad
74 * Check uuid (set if we don't have one) - discard if no match
75 * Check superblock similarity if we have a superbloc - discard if different
76 * Record events, devicenum, utime
77 * This should give us a list of devices for the array
78 * We should collect the most recent event and utime numbers
79 *
80 * Count disks with recent enough event count
81 * While force && !enough disks
82 * Choose newest rejected disks, update event count
83 * mark clean and rewrite superblock
84 * If recent kernel:
85 * SET_ARRAY_INFO
86 * foreach device with recent events : ADD_NEW_DISK
87 * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY
88 * If old kernel:
89 * Check the device numbers in superblock are right
90 * update superblock if any changes
91 * START_ARRAY
92 *
93 */
94 int old_linux = 0;
95 int vers;
96 mdu_array_info_t array;
97 mddev_dev_t devlist = NULL;
98 mdp_super_t first_super, super;
99 struct {
100 char *devname;
101 int major, minor;
102 long long events;
103 time_t utime;
104 int uptodate;
52826846 105 int raid_disk;
64c4757e
NB
106 } devices[MD_SB_DISKS];
107 int best[MD_SB_DISKS]; /* indexed by raid_disk */
52826846 108 int devcnt = 0, okcnt, sparecnt;
64c4757e
NB
109 int i;
110 int most_recent = 0;
52826846
NB
111 int chosen_drive = -1;
112 int change = 0;
64c4757e 113
64c4757e
NB
114 vers = md_get_version(mdfd);
115 if (vers <= 0) {
682c7051 116 fprintf(stderr, Name ": %s appears not to be an md device.\n");
64c4757e
NB
117 return 1;
118 }
682c7051
NB
119 if (vers < 9000) {
120 fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n"
52826846 121 " Upgrade your kernel or try --build\n");
64c4757e
NB
122 return 1;
123 }
682c7051 124 if (get_linux_version() < 2004000)
64c4757e
NB
125 old_linux = 1;
126
127 if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0) {
682c7051 128 fprintf(stderr, Name ": device %s already active - cannot assemble it\n",
64c4757e
NB
129 mddev);
130 return 1;
131 }
132 ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */
133
134 /*
52826846
NB
135 * If any subdevs are listed, then any that don't
136 * match ident are discarded. Remainder must all match and
137 * become the array.
138 * If no subdevs, then we scan all devices in the config file, but
139 * there must be something in the identity
64c4757e 140 */
64c4757e 141
52826846
NB
142 if (subdevs == 0 &&
143 ident->uuid_set == 0 &&
144 ident->super_minor < 0 &&
145 ident->devices == NULL) {
146 fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n",
147 mddev);
148 return 1;
64c4757e 149 }
52826846 150 if (subdevs==0)
64c4757e
NB
151 devlist = conf_get_devs(conffile);
152
64c4757e
NB
153 first_super.md_magic = 0;
154 for (i=0; i<MD_SB_DISKS; i++)
155 best[i] = -1;
156
82b27616
NB
157 if (verbose)
158 fprintf(stderr, Name ": looking for devices for %s\n",
159 mddev);
160
64c4757e
NB
161 while (subdevs || devlist) {
162 char *devname;
163 int this_uuid[4];
164 int dfd;
165 struct stat stb;
166 int inargv;
52826846
NB
167 int havesuper=0;
168
64c4757e
NB
169 if (subdevs) {
170 devname = *subdev++;
171 subdevs--;
172 inargv=1;
173 } else {
174 devname = devlist->devname;
175 devlist = devlist->next;
176 inargv=0;
177 }
178
52826846
NB
179 if (ident->devices &&
180 !match_oneof(ident->devices, devname))
181 continue;
182
64c4757e
NB
183 dfd = open(devname, O_RDONLY, 0);
184 if (dfd < 0) {
185 if (inargv || verbose)
682c7051 186 fprintf(stderr, Name ": cannot open device %s: %s\n",
64c4757e 187 devname, strerror(errno));
52826846
NB
188 } else if (fstat(dfd, &stb)< 0) {
189 /* Impossible! */
190 fprintf(stderr, Name ": fstat failed for %s: %s\n",
191 devname, strerror(errno));
192 close(dfd);
193 } if ((stb.st_mode & S_IFMT) != S_IFBLK) {
194 fprintf(stderr, Name ": %d is not a block device.\n",
195 devname);
196 close(dfd);
197 } if (load_super(dfd, &super)) {
64c4757e 198 if (inargv || verbose)
682c7051 199 fprintf( stderr, Name ": no RAID superblock on %s\n",
64c4757e
NB
200 devname);
201 close(dfd);
52826846
NB
202 } else {
203 havesuper =1;
204 uuid_from_super(this_uuid, &super);
205 close(dfd);
64c4757e 206 }
52826846
NB
207
208 if (ident->uuid_set &&
209 (!havesuper || same_uuid(this_uuid, ident->uuid)==0)) {
210 if (inargv || verbose)
211 fprintf(stderr, Name ": %s has wrong uuid.\n",
212 devname);
213 continue;
82b27616 214 }
52826846
NB
215 if (ident->super_minor >= 0 &&
216 (!havesuper || ident->super_minor != super.md_minor)) {
64c4757e 217 if (inargv || verbose)
52826846 218 fprintf(stderr, Name ": %s has wrong super-minor.\n",
64c4757e
NB
219 devname);
220 continue;
221 }
52826846
NB
222
223 /* If we are this far, then we are commited to this device.
224 * If the super_block doesn't exist, or doesn't match others,
225 * then we cannot continue
226 */
227 if (verbose)
228 fprintf(stderr, Name ": %s is identified as a member of %s.\n",
229 devname, mddev);
230
231 if (!havesuper) {
232 fprintf(stderr, Name ": %s has no superblock - assembly aborted\n",
233 devname);
234 return 1;
235 }
236 if (compare_super(&first_super, &super)) {
237 fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n",
238 devname);
239 return 1;
64c4757e
NB
240 }
241
64c4757e 242 if (devcnt >= MD_SB_DISKS) {
682c7051 243 fprintf(stderr, Name ": ouch - too many devices appear to be in this array. Ignoring %s\n",
64c4757e
NB
244 devname);
245 continue;
246 }
247 devices[devcnt].devname = devname;
248 devices[devcnt].major = MAJOR(stb.st_rdev);
249 devices[devcnt].minor = MINOR(stb.st_rdev);
250 devices[devcnt].events = md_event(&super);
251 devices[devcnt].utime = super.utime;
52826846 252 devices[devcnt].raid_disk = super.this_disk.raid_disk;
64c4757e
NB
253 devices[devcnt].uptodate = 0;
254 if (most_recent < devcnt) {
255 if (devices[devcnt].events
256 > devices[most_recent].events)
257 most_recent = devcnt;
258 }
52826846
NB
259 i = devices[devcnt].raid_disk;
260 if (i>=0 && i < MD_SB_DISKS)
261 if (best[i] == -1
262 || devices[best[i]].events < devices[devcnt].events)
263 best[i] = devcnt;
264
64c4757e
NB
265 devcnt++;
266 }
267
268 if (devcnt == 0) {
682c7051 269 fprintf(stderr, Name ": no devices found for %s\n",
64c4757e
NB
270 mddev);
271 return 1;
272 }
273 /* now we have some devices that might be suitable.
274 * I wonder how many
275 */
276 okcnt = 0;
52826846
NB
277 sparecnt=0;
278 for (i=0; i< MD_SB_DISKS;i++) {
64c4757e
NB
279 int j = best[i];
280 if (j < 0) continue;
281 if (devices[j].events+1 >=
282 devices[most_recent].events) {
283 devices[j].uptodate = 1;
52826846
NB
284 if (i < first_super.raid_disks)
285 okcnt++;
286 else
287 sparecnt++;
64c4757e
NB
288 }
289 }
290 while (force && !enough(first_super.level, first_super.raid_disks, okcnt)) {
291 /* Choose the newest best drive which is
292 * not up-to-date, update the superblock
293 * and add it.
294 */
52826846
NB
295 int fd;
296 for (i=0; i<first_super.raid_disks; i++) {
297 int j = best[i];
298 if (j>=0 &&
299 !devices[j].uptodate &&
300 devices[j].events > 0 &&
301 (chosen_drive < 0 ||
302 devices[j].events > devices[chosen_drive].events))
303 chosen_drive = j;
304 }
305 if (chosen_drive < 0)
306 break;
307 fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n",
308 devices[chosen_drive].devname, devices[chosen_drive].raid_disk,
309 (int)(devices[chosen_drive].events),
310 (int)(devices[most_recent].events));
311 fd = open(devices[chosen_drive].devname, O_RDWR);
312 if (fd < 0) {
313 fprintf(stderr, Name ": Couldn't open %s for write - not updating\n",
314 devices[chosen_drive].devname);
315 devices[chosen_drive].events = 0;
316 continue;
317 }
318 if (load_super(fd, &super)) {
319 close(fd);
320 fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n",
321 devices[chosen_drive].devname);
322 devices[chosen_drive].events = 0;
323 continue;
324 }
325 super.events_hi = (devices[most_recent].events>>32)&0xFFFFFFFF;
326 super.events_lo = (devices[most_recent].events)&0xFFFFFFFF;
327 super.sb_csum = calc_sb_csum(&super);
328/*DRYRUN*/ if (store_super(fd, &super)) {
329 close(fd);
330 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
331 devices[chosen_drive].devname);
332 devices[chosen_drive].events = 0;
333 continue;
334 }
335 close(fd);
336 devices[chosen_drive].events = devices[most_recent].events;
337 devices[chosen_drive].uptodate = 1;
338 okcnt++;
64c4757e 339 }
52826846
NB
340
341 /* Now we want to look at the superblock which the kernel will base things on
342 * and compare the devices that we think are working with the devices that the
343 * superblock thinks are working.
344 * If there are differences and --force is given, then update this chosen
345 * superblock.
346 */
347 for (i=0; chosen_drive < 0 && i<MD_SB_DISKS; i++) {
348 int j = best[i];
349 int fd;
350 if (j<0)
351 continue;
352 if (!devices[j].uptodate)
353 continue;
354 chosen_drive = j;
355 if ((fd=open(devices[j].devname, O_RDONLY))< 0) {
356 fprintf(stderr, Name ": Cannot open %s: %s\n",
357 devices[j].devname, strerror(errno));
358 return 1;
359 }
360 if (load_super(fd, &super)) {
361 close(fd);
362 fprintf(stderr, Name ": RAID superblock has disappeared from %s\n",
363 devices[j].devname);
364 return 1;
365 }
366 close(fd);
367 }
368
369 for (i=0; i<MD_SB_DISKS; i++) {
370 int j = best[i];
371 if (j<0)
372 continue;
373 if (!devices[j].uptodate)
374 continue;
375 if (devices[j].major != super.disks[j].major ||
376 devices[j].minor != super.disks[j].minor) {
377 change |= 1;
378 super.disks[j].major = devices[j].major;
379 super.disks[j].minor = devices[j].minor;
380 }
381 if (devices[j].uptodate &&
382 (super.disks[i].state & (1 << MD_DISK_FAULTY))) {
383 if (force) {
384 fprintf(stderr, Name ": "
385 "clearing FAULT flag for device %d in %s for %s\n",
386 j, mddev, devices[j].devname);
387 super.disks[i].state &= ~(1<<MD_DISK_FAULTY);
388 change |= 2;
389 } else {
390 fprintf(stderr, Name ": "
391 "device %d in %s is marked faulty in superblock, but %s seems ok\n",
392 i, mddev, devices[j].devname);
393 }
394 }
395 if (!devices[j].uptodate &&
396 !(super.disks[i].state & (1 << MD_DISK_FAULTY))) {
397 fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n",
398 i, mddev);
399 }
400 }
401
402 if ((force && (change & 2))
403 || (old_linux && (change & 1))) {
404 int fd;
405 super.sb_csum = calc_sb_csum(&super);
406 fd = open(devices[chosen_drive].devname, O_RDWR);
407 if (fd < 0) {
408 fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n",
409 devices[chosen_drive].devname);
410 return 1;
411 }
412 if (store_super(fd, &super)) {
413 close(fd);
414 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
415 devices[chosen_drive].devname);
416 return 1;
417 }
418 close(fd);
419 change = 0;
420 }
421
64c4757e
NB
422 /* Almost ready to actually *do* something */
423 if (!old_linux) {
424 if (ioctl(mdfd, SET_ARRAY_INFO, NULL) != 0) {
682c7051 425 fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n",
64c4757e
NB
426 mddev, strerror(errno));
427 return 1;
428 }
52826846
NB
429 /* First, add the raid disks, but add the chosen one last */
430 for (i=0; i<=MD_SB_DISKS; i++) {
431 int j;
432 if (i < MD_SB_DISKS) {
433 j = best[i];
434 if (j == chosen_drive)
435 continue;
436 } else
437 j = chosen_drive;
438
0db17fcb 439 if (j >= 0 && devices[j].uptodate) {
64c4757e
NB
440 mdu_disk_info_t disk;
441 memset(&disk, 0, sizeof(disk));
442 disk.major = devices[j].major;
443 disk.minor = devices[j].minor;
444 if (ioctl(mdfd, ADD_NEW_DISK, &disk)!=0) {
682c7051 445 fprintf(stderr, Name ": failed to add %s to %s: %s\n",
64c4757e
NB
446 devices[j].devname,
447 mddev,
448 strerror(errno));
52826846
NB
449 if (i < first_super.raid_disks)
450 okcnt--;
451 else
452 sparecnt--;
453 } else if (verbose)
454 fprintf(stderr, Name ": added %s to %s as %d\n",
455 devices[j].devname, mddev, devices[j].raid_disk);
456 } else if (verbose && i < first_super.raid_disks)
682c7051 457 fprintf(stderr, Name ": no uptodate device for slot %d of %s\n",
64c4757e
NB
458 i, mddev);
459 }
52826846 460
64c4757e
NB
461 if (runstop == 1 ||
462 (runstop == 0 &&
463 enough(first_super.level, first_super.raid_disks, okcnt))) {
82b27616 464 if (ioctl(mdfd, RUN_ARRAY, NULL)==0) {
52826846
NB
465 fprintf(stderr, Name ": %s has been started with %d drive%s",
466 mddev, okcnt, okcnt==1?"":"s");
467 if (sparecnt)
468 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
469 fprintf(stderr, ".\n");
64c4757e 470 return 0;
82b27616 471 }
682c7051 472 fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n",
64c4757e
NB
473 mddev, strerror(errno));
474 return 1;
475 }
82b27616 476 if (runstop == -1) {
52826846
NB
477 fprintf(stderr, Name ": %s assembled from %d drive%s, but not started.\n",
478 mddev, okcnt, okcnt==1?"":"s");
64c4757e 479 return 0;
82b27616 480 }
52826846
NB
481 fprintf(stderr, Name ": %s assembled from %d drive%s - not enough to start it.\n",
482 mddev, okcnt, okcnt==1?"":"s");
64c4757e 483 return 1;
82b27616 484 } else {
52826846
NB
485 /* The "chosen_drive" is a good choice, and if necessary, the superblock has
486 * been updated to point to the current locations of devices.
487 * so we can just start the array
82b27616 488 */
82b27616 489 int dev;
82b27616
NB
490 dev = MKDEV(devices[chosen_drive].major,
491 devices[chosen_drive].minor);
492 if (ioctl(mdfd, START_ARRAY, dev)) {
493 fprintf(stderr, Name ": Cannot start array: %s\n",
494 strerror(errno));
495 }
496
64c4757e
NB
497 }
498}