]>
Commit | Line | Data |
---|---|---|
64c4757e NB |
1 | /* |
2 | * mdctl - manage Linux "md" devices aka RAID arrays. | |
3 | * | |
4 | * Copyright (C) 2001 Neil Brown <neilb@cse.unsw.edu.au> | |
5 | * | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
20 | * | |
21 | * Author: Neil Brown | |
22 | * Email: <neilb@cse.unsw.edu.au> | |
23 | * Paper: Neil Brown | |
24 | * School of Computer Science and Engineering | |
25 | * The University of New South Wales | |
26 | * Sydney, 2052 | |
27 | * Australia | |
28 | */ | |
29 | ||
30 | #include "mdctl.h" | |
64c4757e | 31 | #include "md_u.h" |
52826846 | 32 | #include "md_p.h" |
64c4757e NB |
33 | |
34 | int Assemble(char *mddev, int mdfd, | |
52826846 | 35 | mddev_ident_t ident, char *conffile, |
64c4757e NB |
36 | int subdevs, char **subdev, |
37 | int readonly, int runstop, | |
38 | int verbose, int force) | |
39 | { | |
40 | /* | |
52826846 NB |
41 | * The task of Assemble is to find a collection of |
42 | * devices that should (according to their superblocks) | |
43 | * form an array, and to give this collection to the MD driver. | |
44 | * In Linux-2.4 and later, this involves submitting a | |
64c4757e NB |
45 | * SET_ARRAY_INFO ioctl with no arg - to prepare |
46 | * the array - and then submit a number of | |
47 | * ADD_NEW_DISK ioctls to add disks into | |
48 | * the array. Finally RUN_ARRAY might | |
49 | * be submitted to start the array. | |
50 | * | |
51 | * Much of the work of Assemble is in finding and/or | |
52 | * checking the disks to make sure they look right. | |
53 | * | |
54 | * If mddev is not set, then scan must be and we | |
55 | * read through the config file for dev+uuid mapping | |
56 | * We recurse, setting mddev, for each device that | |
57 | * - isn't running | |
58 | * - has a valid uuid (or any uuid if !uuidset | |
59 | * | |
60 | * If mddev is set, we try to determine state of md. | |
61 | * check version - must be at least 0.90.0 | |
62 | * check kernel version. must be at least 2.4. | |
63 | * If not, we can possibly fall back on START_ARRAY | |
64 | * Try to GET_ARRAY_INFO. | |
65 | * If possible, give up | |
66 | * If not, try to STOP_ARRAY just to make sure | |
67 | * | |
68 | * If !uuidset and scan, look in conf-file for uuid | |
69 | * If not found, give up | |
70 | * If !subdevs and scan and uuidset, get list of devs from conf-file | |
71 | * | |
72 | * For each device: | |
73 | * Check superblock - discard if bad | |
74 | * Check uuid (set if we don't have one) - discard if no match | |
75 | * Check superblock similarity if we have a superbloc - discard if different | |
76 | * Record events, devicenum, utime | |
77 | * This should give us a list of devices for the array | |
78 | * We should collect the most recent event and utime numbers | |
79 | * | |
80 | * Count disks with recent enough event count | |
81 | * While force && !enough disks | |
82 | * Choose newest rejected disks, update event count | |
83 | * mark clean and rewrite superblock | |
84 | * If recent kernel: | |
85 | * SET_ARRAY_INFO | |
86 | * foreach device with recent events : ADD_NEW_DISK | |
87 | * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY | |
88 | * If old kernel: | |
89 | * Check the device numbers in superblock are right | |
90 | * update superblock if any changes | |
91 | * START_ARRAY | |
92 | * | |
93 | */ | |
94 | int old_linux = 0; | |
95 | int vers; | |
96 | mdu_array_info_t array; | |
97 | mddev_dev_t devlist = NULL; | |
98 | mdp_super_t first_super, super; | |
99 | struct { | |
100 | char *devname; | |
101 | int major, minor; | |
102 | long long events; | |
103 | time_t utime; | |
104 | int uptodate; | |
52826846 | 105 | int raid_disk; |
64c4757e NB |
106 | } devices[MD_SB_DISKS]; |
107 | int best[MD_SB_DISKS]; /* indexed by raid_disk */ | |
52826846 | 108 | int devcnt = 0, okcnt, sparecnt; |
64c4757e NB |
109 | int i; |
110 | int most_recent = 0; | |
52826846 NB |
111 | int chosen_drive = -1; |
112 | int change = 0; | |
64c4757e | 113 | |
64c4757e NB |
114 | vers = md_get_version(mdfd); |
115 | if (vers <= 0) { | |
682c7051 | 116 | fprintf(stderr, Name ": %s appears not to be an md device.\n"); |
64c4757e NB |
117 | return 1; |
118 | } | |
682c7051 NB |
119 | if (vers < 9000) { |
120 | fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n" | |
52826846 | 121 | " Upgrade your kernel or try --build\n"); |
64c4757e NB |
122 | return 1; |
123 | } | |
682c7051 | 124 | if (get_linux_version() < 2004000) |
64c4757e NB |
125 | old_linux = 1; |
126 | ||
127 | if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0) { | |
682c7051 | 128 | fprintf(stderr, Name ": device %s already active - cannot assemble it\n", |
64c4757e NB |
129 | mddev); |
130 | return 1; | |
131 | } | |
132 | ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */ | |
133 | ||
134 | /* | |
52826846 NB |
135 | * If any subdevs are listed, then any that don't |
136 | * match ident are discarded. Remainder must all match and | |
137 | * become the array. | |
138 | * If no subdevs, then we scan all devices in the config file, but | |
139 | * there must be something in the identity | |
64c4757e | 140 | */ |
64c4757e | 141 | |
52826846 NB |
142 | if (subdevs == 0 && |
143 | ident->uuid_set == 0 && | |
144 | ident->super_minor < 0 && | |
145 | ident->devices == NULL) { | |
146 | fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n", | |
147 | mddev); | |
148 | return 1; | |
64c4757e | 149 | } |
52826846 | 150 | if (subdevs==0) |
64c4757e NB |
151 | devlist = conf_get_devs(conffile); |
152 | ||
64c4757e NB |
153 | first_super.md_magic = 0; |
154 | for (i=0; i<MD_SB_DISKS; i++) | |
155 | best[i] = -1; | |
156 | ||
82b27616 NB |
157 | if (verbose) |
158 | fprintf(stderr, Name ": looking for devices for %s\n", | |
159 | mddev); | |
160 | ||
64c4757e NB |
161 | while (subdevs || devlist) { |
162 | char *devname; | |
163 | int this_uuid[4]; | |
164 | int dfd; | |
165 | struct stat stb; | |
166 | int inargv; | |
52826846 NB |
167 | int havesuper=0; |
168 | ||
64c4757e NB |
169 | if (subdevs) { |
170 | devname = *subdev++; | |
171 | subdevs--; | |
172 | inargv=1; | |
173 | } else { | |
174 | devname = devlist->devname; | |
175 | devlist = devlist->next; | |
176 | inargv=0; | |
177 | } | |
178 | ||
52826846 NB |
179 | if (ident->devices && |
180 | !match_oneof(ident->devices, devname)) | |
181 | continue; | |
182 | ||
64c4757e NB |
183 | dfd = open(devname, O_RDONLY, 0); |
184 | if (dfd < 0) { | |
185 | if (inargv || verbose) | |
682c7051 | 186 | fprintf(stderr, Name ": cannot open device %s: %s\n", |
64c4757e | 187 | devname, strerror(errno)); |
52826846 NB |
188 | } else if (fstat(dfd, &stb)< 0) { |
189 | /* Impossible! */ | |
190 | fprintf(stderr, Name ": fstat failed for %s: %s\n", | |
191 | devname, strerror(errno)); | |
192 | close(dfd); | |
193 | } if ((stb.st_mode & S_IFMT) != S_IFBLK) { | |
194 | fprintf(stderr, Name ": %d is not a block device.\n", | |
195 | devname); | |
196 | close(dfd); | |
197 | } if (load_super(dfd, &super)) { | |
64c4757e | 198 | if (inargv || verbose) |
682c7051 | 199 | fprintf( stderr, Name ": no RAID superblock on %s\n", |
64c4757e NB |
200 | devname); |
201 | close(dfd); | |
52826846 NB |
202 | } else { |
203 | havesuper =1; | |
204 | uuid_from_super(this_uuid, &super); | |
205 | close(dfd); | |
64c4757e | 206 | } |
52826846 NB |
207 | |
208 | if (ident->uuid_set && | |
209 | (!havesuper || same_uuid(this_uuid, ident->uuid)==0)) { | |
210 | if (inargv || verbose) | |
211 | fprintf(stderr, Name ": %s has wrong uuid.\n", | |
212 | devname); | |
213 | continue; | |
82b27616 | 214 | } |
52826846 NB |
215 | if (ident->super_minor >= 0 && |
216 | (!havesuper || ident->super_minor != super.md_minor)) { | |
64c4757e | 217 | if (inargv || verbose) |
52826846 | 218 | fprintf(stderr, Name ": %s has wrong super-minor.\n", |
64c4757e NB |
219 | devname); |
220 | continue; | |
221 | } | |
52826846 NB |
222 | |
223 | /* If we are this far, then we are commited to this device. | |
224 | * If the super_block doesn't exist, or doesn't match others, | |
225 | * then we cannot continue | |
226 | */ | |
227 | if (verbose) | |
228 | fprintf(stderr, Name ": %s is identified as a member of %s.\n", | |
229 | devname, mddev); | |
230 | ||
231 | if (!havesuper) { | |
232 | fprintf(stderr, Name ": %s has no superblock - assembly aborted\n", | |
233 | devname); | |
234 | return 1; | |
235 | } | |
236 | if (compare_super(&first_super, &super)) { | |
237 | fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n", | |
238 | devname); | |
239 | return 1; | |
64c4757e NB |
240 | } |
241 | ||
64c4757e | 242 | if (devcnt >= MD_SB_DISKS) { |
682c7051 | 243 | fprintf(stderr, Name ": ouch - too many devices appear to be in this array. Ignoring %s\n", |
64c4757e NB |
244 | devname); |
245 | continue; | |
246 | } | |
247 | devices[devcnt].devname = devname; | |
248 | devices[devcnt].major = MAJOR(stb.st_rdev); | |
249 | devices[devcnt].minor = MINOR(stb.st_rdev); | |
250 | devices[devcnt].events = md_event(&super); | |
251 | devices[devcnt].utime = super.utime; | |
52826846 | 252 | devices[devcnt].raid_disk = super.this_disk.raid_disk; |
64c4757e NB |
253 | devices[devcnt].uptodate = 0; |
254 | if (most_recent < devcnt) { | |
255 | if (devices[devcnt].events | |
256 | > devices[most_recent].events) | |
257 | most_recent = devcnt; | |
258 | } | |
52826846 NB |
259 | i = devices[devcnt].raid_disk; |
260 | if (i>=0 && i < MD_SB_DISKS) | |
261 | if (best[i] == -1 | |
262 | || devices[best[i]].events < devices[devcnt].events) | |
263 | best[i] = devcnt; | |
264 | ||
64c4757e NB |
265 | devcnt++; |
266 | } | |
267 | ||
268 | if (devcnt == 0) { | |
682c7051 | 269 | fprintf(stderr, Name ": no devices found for %s\n", |
64c4757e NB |
270 | mddev); |
271 | return 1; | |
272 | } | |
273 | /* now we have some devices that might be suitable. | |
274 | * I wonder how many | |
275 | */ | |
276 | okcnt = 0; | |
52826846 NB |
277 | sparecnt=0; |
278 | for (i=0; i< MD_SB_DISKS;i++) { | |
64c4757e NB |
279 | int j = best[i]; |
280 | if (j < 0) continue; | |
281 | if (devices[j].events+1 >= | |
282 | devices[most_recent].events) { | |
283 | devices[j].uptodate = 1; | |
52826846 NB |
284 | if (i < first_super.raid_disks) |
285 | okcnt++; | |
286 | else | |
287 | sparecnt++; | |
64c4757e NB |
288 | } |
289 | } | |
290 | while (force && !enough(first_super.level, first_super.raid_disks, okcnt)) { | |
291 | /* Choose the newest best drive which is | |
292 | * not up-to-date, update the superblock | |
293 | * and add it. | |
294 | */ | |
52826846 NB |
295 | int fd; |
296 | for (i=0; i<first_super.raid_disks; i++) { | |
297 | int j = best[i]; | |
298 | if (j>=0 && | |
299 | !devices[j].uptodate && | |
300 | devices[j].events > 0 && | |
301 | (chosen_drive < 0 || | |
302 | devices[j].events > devices[chosen_drive].events)) | |
303 | chosen_drive = j; | |
304 | } | |
305 | if (chosen_drive < 0) | |
306 | break; | |
307 | fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n", | |
308 | devices[chosen_drive].devname, devices[chosen_drive].raid_disk, | |
309 | (int)(devices[chosen_drive].events), | |
310 | (int)(devices[most_recent].events)); | |
311 | fd = open(devices[chosen_drive].devname, O_RDWR); | |
312 | if (fd < 0) { | |
313 | fprintf(stderr, Name ": Couldn't open %s for write - not updating\n", | |
314 | devices[chosen_drive].devname); | |
315 | devices[chosen_drive].events = 0; | |
316 | continue; | |
317 | } | |
318 | if (load_super(fd, &super)) { | |
319 | close(fd); | |
320 | fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n", | |
321 | devices[chosen_drive].devname); | |
322 | devices[chosen_drive].events = 0; | |
323 | continue; | |
324 | } | |
325 | super.events_hi = (devices[most_recent].events>>32)&0xFFFFFFFF; | |
326 | super.events_lo = (devices[most_recent].events)&0xFFFFFFFF; | |
327 | super.sb_csum = calc_sb_csum(&super); | |
328 | /*DRYRUN*/ if (store_super(fd, &super)) { | |
329 | close(fd); | |
330 | fprintf(stderr, Name ": Could not re-write superblock on %s\n", | |
331 | devices[chosen_drive].devname); | |
332 | devices[chosen_drive].events = 0; | |
333 | continue; | |
334 | } | |
335 | close(fd); | |
336 | devices[chosen_drive].events = devices[most_recent].events; | |
337 | devices[chosen_drive].uptodate = 1; | |
338 | okcnt++; | |
64c4757e | 339 | } |
52826846 NB |
340 | |
341 | /* Now we want to look at the superblock which the kernel will base things on | |
342 | * and compare the devices that we think are working with the devices that the | |
343 | * superblock thinks are working. | |
344 | * If there are differences and --force is given, then update this chosen | |
345 | * superblock. | |
346 | */ | |
347 | for (i=0; chosen_drive < 0 && i<MD_SB_DISKS; i++) { | |
348 | int j = best[i]; | |
349 | int fd; | |
350 | if (j<0) | |
351 | continue; | |
352 | if (!devices[j].uptodate) | |
353 | continue; | |
354 | chosen_drive = j; | |
355 | if ((fd=open(devices[j].devname, O_RDONLY))< 0) { | |
356 | fprintf(stderr, Name ": Cannot open %s: %s\n", | |
357 | devices[j].devname, strerror(errno)); | |
358 | return 1; | |
359 | } | |
360 | if (load_super(fd, &super)) { | |
361 | close(fd); | |
362 | fprintf(stderr, Name ": RAID superblock has disappeared from %s\n", | |
363 | devices[j].devname); | |
364 | return 1; | |
365 | } | |
366 | close(fd); | |
367 | } | |
368 | ||
369 | for (i=0; i<MD_SB_DISKS; i++) { | |
370 | int j = best[i]; | |
371 | if (j<0) | |
372 | continue; | |
373 | if (!devices[j].uptodate) | |
374 | continue; | |
375 | if (devices[j].major != super.disks[j].major || | |
376 | devices[j].minor != super.disks[j].minor) { | |
377 | change |= 1; | |
378 | super.disks[j].major = devices[j].major; | |
379 | super.disks[j].minor = devices[j].minor; | |
380 | } | |
381 | if (devices[j].uptodate && | |
382 | (super.disks[i].state & (1 << MD_DISK_FAULTY))) { | |
383 | if (force) { | |
384 | fprintf(stderr, Name ": " | |
385 | "clearing FAULT flag for device %d in %s for %s\n", | |
386 | j, mddev, devices[j].devname); | |
387 | super.disks[i].state &= ~(1<<MD_DISK_FAULTY); | |
388 | change |= 2; | |
389 | } else { | |
390 | fprintf(stderr, Name ": " | |
391 | "device %d in %s is marked faulty in superblock, but %s seems ok\n", | |
392 | i, mddev, devices[j].devname); | |
393 | } | |
394 | } | |
395 | if (!devices[j].uptodate && | |
396 | !(super.disks[i].state & (1 << MD_DISK_FAULTY))) { | |
397 | fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n", | |
398 | i, mddev); | |
399 | } | |
400 | } | |
401 | ||
402 | if ((force && (change & 2)) | |
403 | || (old_linux && (change & 1))) { | |
404 | int fd; | |
405 | super.sb_csum = calc_sb_csum(&super); | |
406 | fd = open(devices[chosen_drive].devname, O_RDWR); | |
407 | if (fd < 0) { | |
408 | fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n", | |
409 | devices[chosen_drive].devname); | |
410 | return 1; | |
411 | } | |
412 | if (store_super(fd, &super)) { | |
413 | close(fd); | |
414 | fprintf(stderr, Name ": Could not re-write superblock on %s\n", | |
415 | devices[chosen_drive].devname); | |
416 | return 1; | |
417 | } | |
418 | close(fd); | |
419 | change = 0; | |
420 | } | |
421 | ||
64c4757e NB |
422 | /* Almost ready to actually *do* something */ |
423 | if (!old_linux) { | |
424 | if (ioctl(mdfd, SET_ARRAY_INFO, NULL) != 0) { | |
682c7051 | 425 | fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n", |
64c4757e NB |
426 | mddev, strerror(errno)); |
427 | return 1; | |
428 | } | |
52826846 NB |
429 | /* First, add the raid disks, but add the chosen one last */ |
430 | for (i=0; i<=MD_SB_DISKS; i++) { | |
431 | int j; | |
432 | if (i < MD_SB_DISKS) { | |
433 | j = best[i]; | |
434 | if (j == chosen_drive) | |
435 | continue; | |
436 | } else | |
437 | j = chosen_drive; | |
438 | ||
0db17fcb | 439 | if (j >= 0 && devices[j].uptodate) { |
64c4757e NB |
440 | mdu_disk_info_t disk; |
441 | memset(&disk, 0, sizeof(disk)); | |
442 | disk.major = devices[j].major; | |
443 | disk.minor = devices[j].minor; | |
444 | if (ioctl(mdfd, ADD_NEW_DISK, &disk)!=0) { | |
682c7051 | 445 | fprintf(stderr, Name ": failed to add %s to %s: %s\n", |
64c4757e NB |
446 | devices[j].devname, |
447 | mddev, | |
448 | strerror(errno)); | |
52826846 NB |
449 | if (i < first_super.raid_disks) |
450 | okcnt--; | |
451 | else | |
452 | sparecnt--; | |
453 | } else if (verbose) | |
454 | fprintf(stderr, Name ": added %s to %s as %d\n", | |
455 | devices[j].devname, mddev, devices[j].raid_disk); | |
456 | } else if (verbose && i < first_super.raid_disks) | |
682c7051 | 457 | fprintf(stderr, Name ": no uptodate device for slot %d of %s\n", |
64c4757e NB |
458 | i, mddev); |
459 | } | |
52826846 | 460 | |
64c4757e NB |
461 | if (runstop == 1 || |
462 | (runstop == 0 && | |
463 | enough(first_super.level, first_super.raid_disks, okcnt))) { | |
82b27616 | 464 | if (ioctl(mdfd, RUN_ARRAY, NULL)==0) { |
52826846 NB |
465 | fprintf(stderr, Name ": %s has been started with %d drive%s", |
466 | mddev, okcnt, okcnt==1?"":"s"); | |
467 | if (sparecnt) | |
468 | fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); | |
469 | fprintf(stderr, ".\n"); | |
64c4757e | 470 | return 0; |
82b27616 | 471 | } |
682c7051 | 472 | fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n", |
64c4757e NB |
473 | mddev, strerror(errno)); |
474 | return 1; | |
475 | } | |
82b27616 | 476 | if (runstop == -1) { |
52826846 NB |
477 | fprintf(stderr, Name ": %s assembled from %d drive%s, but not started.\n", |
478 | mddev, okcnt, okcnt==1?"":"s"); | |
64c4757e | 479 | return 0; |
82b27616 | 480 | } |
52826846 NB |
481 | fprintf(stderr, Name ": %s assembled from %d drive%s - not enough to start it.\n", |
482 | mddev, okcnt, okcnt==1?"":"s"); | |
64c4757e | 483 | return 1; |
82b27616 | 484 | } else { |
52826846 NB |
485 | /* The "chosen_drive" is a good choice, and if necessary, the superblock has |
486 | * been updated to point to the current locations of devices. | |
487 | * so we can just start the array | |
82b27616 | 488 | */ |
82b27616 | 489 | int dev; |
82b27616 NB |
490 | dev = MKDEV(devices[chosen_drive].major, |
491 | devices[chosen_drive].minor); | |
492 | if (ioctl(mdfd, START_ARRAY, dev)) { | |
493 | fprintf(stderr, Name ": Cannot start array: %s\n", | |
494 | strerror(errno)); | |
495 | } | |
496 | ||
64c4757e NB |
497 | } |
498 | } |