]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * mdadm - manage Linux "md" devices aka RAID arrays. | |
3 | * | |
4 | * Copyright (C) 2001-2002 Neil Brown <neilb@cse.unsw.edu.au> | |
5 | * | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
20 | * | |
21 | * Author: Neil Brown | |
22 | * Email: <neilb@cse.unsw.edu.au> | |
23 | * Paper: Neil Brown | |
24 | * School of Computer Science and Engineering | |
25 | * The University of New South Wales | |
26 | * Sydney, 2052 | |
27 | * Australia | |
28 | */ | |
29 | ||
30 | #include "mdadm.h" | |
31 | #include "md_u.h" | |
32 | #include "md_p.h" | |
33 | ||
34 | int Assemble(char *mddev, int mdfd, | |
35 | mddev_ident_t ident, char *conffile, | |
36 | mddev_dev_t devlist, | |
37 | int readonly, int runstop, | |
38 | int verbose, int force) | |
39 | { | |
40 | /* | |
41 | * The task of Assemble is to find a collection of | |
42 | * devices that should (according to their superblocks) | |
43 | * form an array, and to give this collection to the MD driver. | |
44 | * In Linux-2.4 and later, this involves submitting a | |
45 | * SET_ARRAY_INFO ioctl with no arg - to prepare | |
46 | * the array - and then submit a number of | |
47 | * ADD_NEW_DISK ioctls to add disks into | |
48 | * the array. Finally RUN_ARRAY might | |
49 | * be submitted to start the array. | |
50 | * | |
51 | * Much of the work of Assemble is in finding and/or | |
52 | * checking the disks to make sure they look right. | |
53 | * | |
54 | * If mddev is not set, then scan must be and we | |
55 | * read through the config file for dev+uuid mapping | |
56 | * We recurse, setting mddev, for each device that | |
57 | * - isn't running | |
58 | * - has a valid uuid (or any uuid if !uuidset | |
59 | * | |
60 | * If mddev is set, we try to determine state of md. | |
61 | * check version - must be at least 0.90.0 | |
62 | * check kernel version. must be at least 2.4. | |
63 | * If not, we can possibly fall back on START_ARRAY | |
64 | * Try to GET_ARRAY_INFO. | |
65 | * If possible, give up | |
66 | * If not, try to STOP_ARRAY just to make sure | |
67 | * | |
68 | * If !uuidset and scan, look in conf-file for uuid | |
69 | * If not found, give up | |
70 | * If !devlist and scan and uuidset, get list of devs from conf-file | |
71 | * | |
72 | * For each device: | |
73 | * Check superblock - discard if bad | |
74 | * Check uuid (set if we don't have one) - discard if no match | |
75 | * Check superblock similarity if we have a superbloc - discard if different | |
76 | * Record events, devicenum, utime | |
77 | * This should give us a list of devices for the array | |
78 | * We should collect the most recent event and utime numbers | |
79 | * | |
80 | * Count disks with recent enough event count | |
81 | * While force && !enough disks | |
82 | * Choose newest rejected disks, update event count | |
83 | * mark clean and rewrite superblock | |
84 | * If recent kernel: | |
85 | * SET_ARRAY_INFO | |
86 | * foreach device with recent events : ADD_NEW_DISK | |
87 | * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY | |
88 | * If old kernel: | |
89 | * Check the device numbers in superblock are right | |
90 | * update superblock if any changes | |
91 | * START_ARRAY | |
92 | * | |
93 | */ | |
94 | int old_linux = 0; | |
95 | int vers; | |
96 | mdu_array_info_t array; | |
97 | mdp_super_t first_super, super; | |
98 | struct { | |
99 | char *devname; | |
100 | int major, minor; | |
101 | long long events; | |
102 | time_t utime; | |
103 | int uptodate; | |
104 | int raid_disk; | |
105 | } devices[MD_SB_DISKS]; | |
106 | int best[MD_SB_DISKS]; /* indexed by raid_disk */ | |
107 | int devcnt = 0, okcnt, sparecnt; | |
108 | int i; | |
109 | int most_recent = 0; | |
110 | int chosen_drive; | |
111 | int change = 0; | |
112 | int inargv = 0; | |
113 | int start_partial_ok = force || devlist==NULL; | |
114 | ||
115 | vers = md_get_version(mdfd); | |
116 | if (vers <= 0) { | |
117 | fprintf(stderr, Name ": %s appears not to be an md device.\n"); | |
118 | return 1; | |
119 | } | |
120 | if (vers < 9000) { | |
121 | fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n" | |
122 | " Upgrade your kernel or try --build\n"); | |
123 | return 1; | |
124 | } | |
125 | if (get_linux_version() < 2004000) | |
126 | old_linux = 1; | |
127 | ||
128 | if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0) { | |
129 | fprintf(stderr, Name ": device %s already active - cannot assemble it\n", | |
130 | mddev); | |
131 | return 1; | |
132 | } | |
133 | ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */ | |
134 | ||
135 | /* | |
136 | * If any subdevs are listed, then any that don't | |
137 | * match ident are discarded. Remainder must all match and | |
138 | * become the array. | |
139 | * If no subdevs, then we scan all devices in the config file, but | |
140 | * there must be something in the identity | |
141 | */ | |
142 | ||
143 | if (!devlist && | |
144 | ident->uuid_set == 0 && | |
145 | ident->super_minor < 0 && | |
146 | ident->devices == NULL) { | |
147 | fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n", | |
148 | mddev); | |
149 | return 1; | |
150 | } | |
151 | if (devlist == NULL) | |
152 | devlist = conf_get_devs(conffile); | |
153 | else inargv = 1; | |
154 | ||
155 | first_super.md_magic = 0; | |
156 | for (i=0; i<MD_SB_DISKS; i++) | |
157 | best[i] = -1; | |
158 | ||
159 | if (verbose) | |
160 | fprintf(stderr, Name ": looking for devices for %s\n", | |
161 | mddev); | |
162 | ||
163 | while ( devlist) { | |
164 | char *devname; | |
165 | int this_uuid[4]; | |
166 | int dfd; | |
167 | struct stat stb; | |
168 | int havesuper=0; | |
169 | ||
170 | devname = devlist->devname; | |
171 | devlist = devlist->next; | |
172 | ||
173 | if (ident->devices && | |
174 | !match_oneof(ident->devices, devname)) | |
175 | continue; | |
176 | ||
177 | dfd = open(devname, O_RDONLY, 0); | |
178 | if (dfd < 0) { | |
179 | if (inargv || verbose) | |
180 | fprintf(stderr, Name ": cannot open device %s: %s\n", | |
181 | devname, strerror(errno)); | |
182 | } else if (fstat(dfd, &stb)< 0) { | |
183 | /* Impossible! */ | |
184 | fprintf(stderr, Name ": fstat failed for %s: %s\n", | |
185 | devname, strerror(errno)); | |
186 | close(dfd); | |
187 | } else if ((stb.st_mode & S_IFMT) != S_IFBLK) { | |
188 | fprintf(stderr, Name ": %s is not a block device.\n", | |
189 | devname); | |
190 | close(dfd); | |
191 | } else if (load_super(dfd, &super)) { | |
192 | if (inargv || verbose) | |
193 | fprintf( stderr, Name ": no RAID superblock on %s\n", | |
194 | devname); | |
195 | close(dfd); | |
196 | } else { | |
197 | havesuper =1; | |
198 | uuid_from_super(this_uuid, &super); | |
199 | close(dfd); | |
200 | } | |
201 | ||
202 | if (ident->uuid_set && | |
203 | (!havesuper || same_uuid(this_uuid, ident->uuid)==0)) { | |
204 | if (inargv || verbose) | |
205 | fprintf(stderr, Name ": %s has wrong uuid.\n", | |
206 | devname); | |
207 | continue; | |
208 | } | |
209 | if (ident->super_minor >= 0 && | |
210 | (!havesuper || ident->super_minor != super.md_minor)) { | |
211 | if (inargv || verbose) | |
212 | fprintf(stderr, Name ": %s has wrong super-minor.\n", | |
213 | devname); | |
214 | continue; | |
215 | } | |
216 | if (ident->level != -10 && | |
217 | (!havesuper|| ident->level != super.level)) { | |
218 | if (inargv || verbose) | |
219 | fprintf(stderr, Name ": %s has wrong raid level.\n", | |
220 | devname); | |
221 | continue; | |
222 | } | |
223 | if (ident->raid_disks != -1 && | |
224 | (!havesuper || ident->raid_disks!= super.raid_disks)) { | |
225 | if (inargv || verbose) | |
226 | fprintf(stderr, Name ": %s requires wrong number of drives.\n", | |
227 | devname); | |
228 | continue; | |
229 | } | |
230 | ||
231 | /* If we are this far, then we are commited to this device. | |
232 | * If the super_block doesn't exist, or doesn't match others, | |
233 | * then we cannot continue | |
234 | */ | |
235 | ||
236 | if (!havesuper) { | |
237 | fprintf(stderr, Name ": %s has no superblock - assembly aborted\n", | |
238 | devname); | |
239 | return 1; | |
240 | } | |
241 | if (compare_super(&first_super, &super)) { | |
242 | fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n", | |
243 | devname); | |
244 | return 1; | |
245 | } | |
246 | ||
247 | if (devcnt >= MD_SB_DISKS) { | |
248 | fprintf(stderr, Name ": ouch - too many devices appear to be in this array. Ignoring %s\n", | |
249 | devname); | |
250 | continue; | |
251 | } | |
252 | if (verbose) | |
253 | fprintf(stderr, Name ": %s is identified as a member of %s, slot %d.\n", | |
254 | devname, mddev, super.this_disk.raid_disk); | |
255 | devices[devcnt].devname = devname; | |
256 | devices[devcnt].major = MAJOR(stb.st_rdev); | |
257 | devices[devcnt].minor = MINOR(stb.st_rdev); | |
258 | devices[devcnt].events = md_event(&super); | |
259 | devices[devcnt].utime = super.utime; | |
260 | devices[devcnt].raid_disk = super.this_disk.raid_disk; | |
261 | devices[devcnt].uptodate = 0; | |
262 | if (most_recent < devcnt) { | |
263 | if (devices[devcnt].events | |
264 | > devices[most_recent].events) | |
265 | most_recent = devcnt; | |
266 | } | |
267 | i = devices[devcnt].raid_disk; | |
268 | if (i>=0 && i < MD_SB_DISKS) | |
269 | if (best[i] == -1 | |
270 | || devices[best[i]].events < devices[devcnt].events) | |
271 | best[i] = devcnt; | |
272 | ||
273 | devcnt++; | |
274 | } | |
275 | ||
276 | if (devcnt == 0) { | |
277 | fprintf(stderr, Name ": no devices found for %s\n", | |
278 | mddev); | |
279 | return 1; | |
280 | } | |
281 | /* now we have some devices that might be suitable. | |
282 | * I wonder how many | |
283 | */ | |
284 | okcnt = 0; | |
285 | sparecnt=0; | |
286 | for (i=0; i< MD_SB_DISKS;i++) { | |
287 | int j = best[i]; | |
288 | int event_margin = !force; | |
289 | if (j < 0) continue; | |
290 | if (devices[j].events+event_margin >= | |
291 | devices[most_recent].events) { | |
292 | devices[j].uptodate = 1; | |
293 | if (i < first_super.raid_disks) | |
294 | okcnt++; | |
295 | else | |
296 | sparecnt++; | |
297 | } | |
298 | } | |
299 | while (force && !enough(first_super.level, first_super.raid_disks, okcnt)) { | |
300 | /* Choose the newest best drive which is | |
301 | * not up-to-date, update the superblock | |
302 | * and add it. | |
303 | */ | |
304 | int fd; | |
305 | chosen_drive = -1; | |
306 | for (i=0; i<first_super.raid_disks; i++) { | |
307 | int j = best[i]; | |
308 | if (j>=0 && | |
309 | !devices[j].uptodate && | |
310 | devices[j].events > 0 && | |
311 | (chosen_drive < 0 || | |
312 | devices[j].events > devices[chosen_drive].events)) | |
313 | chosen_drive = j; | |
314 | } | |
315 | if (chosen_drive < 0) | |
316 | break; | |
317 | fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n", | |
318 | devices[chosen_drive].devname, devices[chosen_drive].raid_disk, | |
319 | (int)(devices[chosen_drive].events), | |
320 | (int)(devices[most_recent].events)); | |
321 | fd = open(devices[chosen_drive].devname, O_RDWR); | |
322 | if (fd < 0) { | |
323 | fprintf(stderr, Name ": Couldn't open %s for write - not updating\n", | |
324 | devices[chosen_drive].devname); | |
325 | devices[chosen_drive].events = 0; | |
326 | continue; | |
327 | } | |
328 | if (load_super(fd, &super)) { | |
329 | close(fd); | |
330 | fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n", | |
331 | devices[chosen_drive].devname); | |
332 | devices[chosen_drive].events = 0; | |
333 | continue; | |
334 | } | |
335 | super.events_hi = (devices[most_recent].events>>32)&0xFFFFFFFF; | |
336 | super.events_lo = (devices[most_recent].events)&0xFFFFFFFF; | |
337 | super.sb_csum = calc_sb_csum(&super); | |
338 | /*DRYRUN*/ if (store_super(fd, &super)) { | |
339 | close(fd); | |
340 | fprintf(stderr, Name ": Could not re-write superblock on %s\n", | |
341 | devices[chosen_drive].devname); | |
342 | devices[chosen_drive].events = 0; | |
343 | continue; | |
344 | } | |
345 | close(fd); | |
346 | devices[chosen_drive].events = devices[most_recent].events; | |
347 | devices[chosen_drive].uptodate = 1; | |
348 | okcnt++; | |
349 | } | |
350 | ||
351 | /* Now we want to look at the superblock which the kernel will base things on | |
352 | * and compare the devices that we think are working with the devices that the | |
353 | * superblock thinks are working. | |
354 | * If there are differences and --force is given, then update this chosen | |
355 | * superblock. | |
356 | */ | |
357 | chosen_drive = -1; | |
358 | for (i=0; chosen_drive < 0 && i<MD_SB_DISKS; i++) { | |
359 | int j = best[i]; | |
360 | int fd; | |
361 | if (j<0) | |
362 | continue; | |
363 | if (!devices[j].uptodate) | |
364 | continue; | |
365 | chosen_drive = j; | |
366 | if ((fd=open(devices[j].devname, O_RDONLY))< 0) { | |
367 | fprintf(stderr, Name ": Cannot open %s: %s\n", | |
368 | devices[j].devname, strerror(errno)); | |
369 | return 1; | |
370 | } | |
371 | if (load_super(fd, &super)) { | |
372 | close(fd); | |
373 | fprintf(stderr, Name ": RAID superblock has disappeared from %s\n", | |
374 | devices[j].devname); | |
375 | return 1; | |
376 | } | |
377 | close(fd); | |
378 | } | |
379 | ||
380 | for (i=0; i<MD_SB_DISKS; i++) { | |
381 | int j = best[i]; | |
382 | int active_sync = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC); | |
383 | if (j<0) | |
384 | continue; | |
385 | if (!devices[j].uptodate) | |
386 | continue; | |
387 | if (devices[j].major != super.disks[j].major || | |
388 | devices[j].minor != super.disks[j].minor) { | |
389 | change |= 1; | |
390 | super.disks[j].major = devices[j].major; | |
391 | super.disks[j].minor = devices[j].minor; | |
392 | } | |
393 | if (devices[j].uptodate && | |
394 | (super.disks[i].state != active_sync)) { | |
395 | if (force) { | |
396 | fprintf(stderr, Name ": " | |
397 | "clearing FAULTY flag for device %d in %s for %s\n", | |
398 | j, mddev, devices[j].devname); | |
399 | super.disks[i].state = active_sync; | |
400 | change |= 2; | |
401 | } else { | |
402 | fprintf(stderr, Name ": " | |
403 | "device %d in %s is marked faulty in superblock, but %s seems ok\n", | |
404 | i, mddev, devices[j].devname); | |
405 | } | |
406 | } | |
407 | if (!devices[j].uptodate && | |
408 | !(super.disks[i].state & (1 << MD_DISK_FAULTY))) { | |
409 | fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n", | |
410 | i, mddev); | |
411 | } | |
412 | } | |
413 | ||
414 | if ((force && (change & 2)) | |
415 | || (old_linux && (change & 1))) { | |
416 | int fd; | |
417 | super.sb_csum = calc_sb_csum(&super); | |
418 | fd = open(devices[chosen_drive].devname, O_RDWR); | |
419 | if (fd < 0) { | |
420 | fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n", | |
421 | devices[chosen_drive].devname); | |
422 | return 1; | |
423 | } | |
424 | if (store_super(fd, &super)) { | |
425 | close(fd); | |
426 | fprintf(stderr, Name ": Could not re-write superblock on %s\n", | |
427 | devices[chosen_drive].devname); | |
428 | return 1; | |
429 | } | |
430 | close(fd); | |
431 | change = 0; | |
432 | } | |
433 | ||
434 | /* Almost ready to actually *do* something */ | |
435 | if (!old_linux) { | |
436 | if (ioctl(mdfd, SET_ARRAY_INFO, NULL) != 0) { | |
437 | fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n", | |
438 | mddev, strerror(errno)); | |
439 | return 1; | |
440 | } | |
441 | /* First, add the raid disks, but add the chosen one last */ | |
442 | for (i=0; i<=MD_SB_DISKS; i++) { | |
443 | int j; | |
444 | if (i < MD_SB_DISKS) { | |
445 | j = best[i]; | |
446 | if (j == chosen_drive) | |
447 | continue; | |
448 | } else | |
449 | j = chosen_drive; | |
450 | ||
451 | if (j >= 0 && devices[j].uptodate) { | |
452 | mdu_disk_info_t disk; | |
453 | memset(&disk, 0, sizeof(disk)); | |
454 | disk.major = devices[j].major; | |
455 | disk.minor = devices[j].minor; | |
456 | if (ioctl(mdfd, ADD_NEW_DISK, &disk)!=0) { | |
457 | fprintf(stderr, Name ": failed to add %s to %s: %s\n", | |
458 | devices[j].devname, | |
459 | mddev, | |
460 | strerror(errno)); | |
461 | if (i < first_super.raid_disks) | |
462 | okcnt--; | |
463 | else | |
464 | sparecnt--; | |
465 | } else if (verbose) | |
466 | fprintf(stderr, Name ": added %s to %s as %d\n", | |
467 | devices[j].devname, mddev, devices[j].raid_disk); | |
468 | } else if (verbose && i < first_super.raid_disks) | |
469 | fprintf(stderr, Name ": no uptodate device for slot %d of %s\n", | |
470 | i, mddev); | |
471 | } | |
472 | ||
473 | if (runstop == 1 || | |
474 | (runstop == 0 && | |
475 | ( first_super.raid_disks == okcnt | |
476 | || start_partial_ok && enough(first_super.level, first_super.raid_disks, okcnt)) | |
477 | )) { | |
478 | if (ioctl(mdfd, RUN_ARRAY, NULL)==0) { | |
479 | fprintf(stderr, Name ": %s has been started with %d drive%s", | |
480 | mddev, okcnt, okcnt==1?"":"s"); | |
481 | if (sparecnt) | |
482 | fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); | |
483 | fprintf(stderr, ".\n"); | |
484 | return 0; | |
485 | } | |
486 | fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n", | |
487 | mddev, strerror(errno)); | |
488 | return 1; | |
489 | } | |
490 | if (runstop == -1) { | |
491 | fprintf(stderr, Name ": %s assembled from %d drive%s, but not started.\n", | |
492 | mddev, okcnt, okcnt==1?"":"s"); | |
493 | return 0; | |
494 | } | |
495 | fprintf(stderr, Name ": %s assembled from %d drive%s - not enough to start it (use --run to insist).\n", | |
496 | mddev, okcnt, okcnt==1?"":"s"); | |
497 | return 1; | |
498 | } else { | |
499 | /* The "chosen_drive" is a good choice, and if necessary, the superblock has | |
500 | * been updated to point to the current locations of devices. | |
501 | * so we can just start the array | |
502 | */ | |
503 | unsigned long dev; | |
504 | dev = MKDEV(devices[chosen_drive].major, | |
505 | devices[chosen_drive].minor); | |
506 | if (ioctl(mdfd, START_ARRAY, dev)) { | |
507 | fprintf(stderr, Name ": Cannot start array: %s\n", | |
508 | strerror(errno)); | |
509 | } | |
510 | ||
511 | } | |
512 | } |