]>
Commit | Line | Data |
---|---|---|
64c4757e | 1 | /* |
9a9dab36 | 2 | * mdadm - manage Linux "md" devices aka RAID arrays. |
64c4757e | 3 | * |
cd29a5c8 | 4 | * Copyright (C) 2001-2002 Neil Brown <neilb@cse.unsw.edu.au> |
64c4757e NB |
5 | * |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
20 | * | |
21 | * Author: Neil Brown | |
22 | * Email: <neilb@cse.unsw.edu.au> | |
23 | * Paper: Neil Brown | |
24 | * School of Computer Science and Engineering | |
25 | * The University of New South Wales | |
26 | * Sydney, 2052 | |
27 | * Australia | |
28 | */ | |
29 | ||
9a9dab36 | 30 | #include "mdadm.h" |
64c4757e | 31 | #include "md_u.h" |
52826846 | 32 | #include "md_p.h" |
64c4757e NB |
33 | |
34 | int Assemble(char *mddev, int mdfd, | |
52826846 | 35 | mddev_ident_t ident, char *conffile, |
cd29a5c8 | 36 | mddev_dev_t devlist, |
64c4757e NB |
37 | int readonly, int runstop, |
38 | int verbose, int force) | |
39 | { | |
40 | /* | |
52826846 NB |
41 | * The task of Assemble is to find a collection of |
42 | * devices that should (according to their superblocks) | |
43 | * form an array, and to give this collection to the MD driver. | |
44 | * In Linux-2.4 and later, this involves submitting a | |
64c4757e NB |
45 | * SET_ARRAY_INFO ioctl with no arg - to prepare |
46 | * the array - and then submit a number of | |
47 | * ADD_NEW_DISK ioctls to add disks into | |
48 | * the array. Finally RUN_ARRAY might | |
49 | * be submitted to start the array. | |
50 | * | |
51 | * Much of the work of Assemble is in finding and/or | |
52 | * checking the disks to make sure they look right. | |
53 | * | |
54 | * If mddev is not set, then scan must be and we | |
55 | * read through the config file for dev+uuid mapping | |
56 | * We recurse, setting mddev, for each device that | |
57 | * - isn't running | |
58 | * - has a valid uuid (or any uuid if !uuidset | |
59 | * | |
60 | * If mddev is set, we try to determine state of md. | |
61 | * check version - must be at least 0.90.0 | |
62 | * check kernel version. must be at least 2.4. | |
63 | * If not, we can possibly fall back on START_ARRAY | |
64 | * Try to GET_ARRAY_INFO. | |
65 | * If possible, give up | |
66 | * If not, try to STOP_ARRAY just to make sure | |
67 | * | |
68 | * If !uuidset and scan, look in conf-file for uuid | |
69 | * If not found, give up | |
cd29a5c8 | 70 | * If !devlist and scan and uuidset, get list of devs from conf-file |
64c4757e NB |
71 | * |
72 | * For each device: | |
73 | * Check superblock - discard if bad | |
74 | * Check uuid (set if we don't have one) - discard if no match | |
75 | * Check superblock similarity if we have a superbloc - discard if different | |
76 | * Record events, devicenum, utime | |
77 | * This should give us a list of devices for the array | |
78 | * We should collect the most recent event and utime numbers | |
79 | * | |
80 | * Count disks with recent enough event count | |
81 | * While force && !enough disks | |
82 | * Choose newest rejected disks, update event count | |
83 | * mark clean and rewrite superblock | |
84 | * If recent kernel: | |
85 | * SET_ARRAY_INFO | |
86 | * foreach device with recent events : ADD_NEW_DISK | |
87 | * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY | |
88 | * If old kernel: | |
89 | * Check the device numbers in superblock are right | |
90 | * update superblock if any changes | |
91 | * START_ARRAY | |
92 | * | |
93 | */ | |
94 | int old_linux = 0; | |
95 | int vers; | |
96 | mdu_array_info_t array; | |
64c4757e NB |
97 | mdp_super_t first_super, super; |
98 | struct { | |
99 | char *devname; | |
100 | int major, minor; | |
101 | long long events; | |
102 | time_t utime; | |
103 | int uptodate; | |
52826846 | 104 | int raid_disk; |
64c4757e NB |
105 | } devices[MD_SB_DISKS]; |
106 | int best[MD_SB_DISKS]; /* indexed by raid_disk */ | |
52826846 | 107 | int devcnt = 0, okcnt, sparecnt; |
64c4757e NB |
108 | int i; |
109 | int most_recent = 0; | |
cd29a5c8 | 110 | int chosen_drive; |
52826846 | 111 | int change = 0; |
cd29a5c8 NB |
112 | int inargv = 0; |
113 | int start_partial_ok = force || devlist==NULL; | |
64c4757e | 114 | |
64c4757e NB |
115 | vers = md_get_version(mdfd); |
116 | if (vers <= 0) { | |
682c7051 | 117 | fprintf(stderr, Name ": %s appears not to be an md device.\n"); |
64c4757e NB |
118 | return 1; |
119 | } | |
682c7051 NB |
120 | if (vers < 9000) { |
121 | fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n" | |
52826846 | 122 | " Upgrade your kernel or try --build\n"); |
64c4757e NB |
123 | return 1; |
124 | } | |
682c7051 | 125 | if (get_linux_version() < 2004000) |
64c4757e NB |
126 | old_linux = 1; |
127 | ||
128 | if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0) { | |
682c7051 | 129 | fprintf(stderr, Name ": device %s already active - cannot assemble it\n", |
64c4757e NB |
130 | mddev); |
131 | return 1; | |
132 | } | |
133 | ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */ | |
134 | ||
135 | /* | |
52826846 NB |
136 | * If any subdevs are listed, then any that don't |
137 | * match ident are discarded. Remainder must all match and | |
138 | * become the array. | |
139 | * If no subdevs, then we scan all devices in the config file, but | |
140 | * there must be something in the identity | |
64c4757e | 141 | */ |
64c4757e | 142 | |
cd29a5c8 | 143 | if (!devlist && |
52826846 NB |
144 | ident->uuid_set == 0 && |
145 | ident->super_minor < 0 && | |
146 | ident->devices == NULL) { | |
147 | fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n", | |
148 | mddev); | |
149 | return 1; | |
64c4757e | 150 | } |
cd29a5c8 | 151 | if (devlist == NULL) |
64c4757e | 152 | devlist = conf_get_devs(conffile); |
cd29a5c8 | 153 | else inargv = 1; |
64c4757e | 154 | |
64c4757e NB |
155 | first_super.md_magic = 0; |
156 | for (i=0; i<MD_SB_DISKS; i++) | |
157 | best[i] = -1; | |
158 | ||
82b27616 NB |
159 | if (verbose) |
160 | fprintf(stderr, Name ": looking for devices for %s\n", | |
161 | mddev); | |
162 | ||
cd29a5c8 | 163 | while ( devlist) { |
64c4757e NB |
164 | char *devname; |
165 | int this_uuid[4]; | |
166 | int dfd; | |
167 | struct stat stb; | |
52826846 NB |
168 | int havesuper=0; |
169 | ||
cd29a5c8 NB |
170 | devname = devlist->devname; |
171 | devlist = devlist->next; | |
64c4757e | 172 | |
52826846 NB |
173 | if (ident->devices && |
174 | !match_oneof(ident->devices, devname)) | |
175 | continue; | |
176 | ||
64c4757e NB |
177 | dfd = open(devname, O_RDONLY, 0); |
178 | if (dfd < 0) { | |
179 | if (inargv || verbose) | |
682c7051 | 180 | fprintf(stderr, Name ": cannot open device %s: %s\n", |
64c4757e | 181 | devname, strerror(errno)); |
52826846 NB |
182 | } else if (fstat(dfd, &stb)< 0) { |
183 | /* Impossible! */ | |
184 | fprintf(stderr, Name ": fstat failed for %s: %s\n", | |
185 | devname, strerror(errno)); | |
186 | close(dfd); | |
cd29a5c8 NB |
187 | } else if ((stb.st_mode & S_IFMT) != S_IFBLK) { |
188 | fprintf(stderr, Name ": %s is not a block device.\n", | |
52826846 NB |
189 | devname); |
190 | close(dfd); | |
cd29a5c8 | 191 | } else if (load_super(dfd, &super)) { |
64c4757e | 192 | if (inargv || verbose) |
682c7051 | 193 | fprintf( stderr, Name ": no RAID superblock on %s\n", |
64c4757e NB |
194 | devname); |
195 | close(dfd); | |
52826846 NB |
196 | } else { |
197 | havesuper =1; | |
198 | uuid_from_super(this_uuid, &super); | |
199 | close(dfd); | |
64c4757e | 200 | } |
52826846 NB |
201 | |
202 | if (ident->uuid_set && | |
203 | (!havesuper || same_uuid(this_uuid, ident->uuid)==0)) { | |
204 | if (inargv || verbose) | |
205 | fprintf(stderr, Name ": %s has wrong uuid.\n", | |
206 | devname); | |
207 | continue; | |
82b27616 | 208 | } |
52826846 NB |
209 | if (ident->super_minor >= 0 && |
210 | (!havesuper || ident->super_minor != super.md_minor)) { | |
64c4757e | 211 | if (inargv || verbose) |
52826846 | 212 | fprintf(stderr, Name ": %s has wrong super-minor.\n", |
64c4757e NB |
213 | devname); |
214 | continue; | |
215 | } | |
cd29a5c8 NB |
216 | if (ident->level != -10 && |
217 | (!havesuper|| ident->level != super.level)) { | |
218 | if (inargv || verbose) | |
219 | fprintf(stderr, Name ": %s has wrong raid level.\n", | |
220 | devname); | |
221 | continue; | |
222 | } | |
223 | if (ident->raid_disks != -1 && | |
224 | (!havesuper || ident->raid_disks!= super.raid_disks)) { | |
225 | if (inargv || verbose) | |
226 | fprintf(stderr, Name ": %s requires wrong number of drives.\n", | |
227 | devname); | |
228 | continue; | |
229 | } | |
52826846 NB |
230 | |
231 | /* If we are this far, then we are commited to this device. | |
232 | * If the super_block doesn't exist, or doesn't match others, | |
233 | * then we cannot continue | |
234 | */ | |
52826846 NB |
235 | |
236 | if (!havesuper) { | |
237 | fprintf(stderr, Name ": %s has no superblock - assembly aborted\n", | |
238 | devname); | |
239 | return 1; | |
240 | } | |
241 | if (compare_super(&first_super, &super)) { | |
242 | fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n", | |
243 | devname); | |
244 | return 1; | |
64c4757e NB |
245 | } |
246 | ||
64c4757e | 247 | if (devcnt >= MD_SB_DISKS) { |
682c7051 | 248 | fprintf(stderr, Name ": ouch - too many devices appear to be in this array. Ignoring %s\n", |
64c4757e NB |
249 | devname); |
250 | continue; | |
251 | } | |
cd29a5c8 NB |
252 | if (verbose) |
253 | fprintf(stderr, Name ": %s is identified as a member of %s, slot %d.\n", | |
254 | devname, mddev, super.this_disk.raid_disk); | |
64c4757e NB |
255 | devices[devcnt].devname = devname; |
256 | devices[devcnt].major = MAJOR(stb.st_rdev); | |
257 | devices[devcnt].minor = MINOR(stb.st_rdev); | |
258 | devices[devcnt].events = md_event(&super); | |
259 | devices[devcnt].utime = super.utime; | |
52826846 | 260 | devices[devcnt].raid_disk = super.this_disk.raid_disk; |
64c4757e NB |
261 | devices[devcnt].uptodate = 0; |
262 | if (most_recent < devcnt) { | |
263 | if (devices[devcnt].events | |
264 | > devices[most_recent].events) | |
265 | most_recent = devcnt; | |
266 | } | |
52826846 NB |
267 | i = devices[devcnt].raid_disk; |
268 | if (i>=0 && i < MD_SB_DISKS) | |
269 | if (best[i] == -1 | |
270 | || devices[best[i]].events < devices[devcnt].events) | |
271 | best[i] = devcnt; | |
272 | ||
64c4757e NB |
273 | devcnt++; |
274 | } | |
275 | ||
276 | if (devcnt == 0) { | |
682c7051 | 277 | fprintf(stderr, Name ": no devices found for %s\n", |
64c4757e NB |
278 | mddev); |
279 | return 1; | |
280 | } | |
281 | /* now we have some devices that might be suitable. | |
282 | * I wonder how many | |
283 | */ | |
284 | okcnt = 0; | |
52826846 NB |
285 | sparecnt=0; |
286 | for (i=0; i< MD_SB_DISKS;i++) { | |
64c4757e | 287 | int j = best[i]; |
cd29a5c8 | 288 | int event_margin = !force; |
64c4757e | 289 | if (j < 0) continue; |
cd29a5c8 | 290 | if (devices[j].events+event_margin >= |
64c4757e NB |
291 | devices[most_recent].events) { |
292 | devices[j].uptodate = 1; | |
52826846 NB |
293 | if (i < first_super.raid_disks) |
294 | okcnt++; | |
295 | else | |
296 | sparecnt++; | |
64c4757e NB |
297 | } |
298 | } | |
299 | while (force && !enough(first_super.level, first_super.raid_disks, okcnt)) { | |
300 | /* Choose the newest best drive which is | |
301 | * not up-to-date, update the superblock | |
302 | * and add it. | |
303 | */ | |
52826846 | 304 | int fd; |
cd29a5c8 | 305 | chosen_drive = -1; |
52826846 NB |
306 | for (i=0; i<first_super.raid_disks; i++) { |
307 | int j = best[i]; | |
308 | if (j>=0 && | |
309 | !devices[j].uptodate && | |
310 | devices[j].events > 0 && | |
311 | (chosen_drive < 0 || | |
312 | devices[j].events > devices[chosen_drive].events)) | |
313 | chosen_drive = j; | |
314 | } | |
315 | if (chosen_drive < 0) | |
316 | break; | |
317 | fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n", | |
318 | devices[chosen_drive].devname, devices[chosen_drive].raid_disk, | |
319 | (int)(devices[chosen_drive].events), | |
320 | (int)(devices[most_recent].events)); | |
321 | fd = open(devices[chosen_drive].devname, O_RDWR); | |
322 | if (fd < 0) { | |
323 | fprintf(stderr, Name ": Couldn't open %s for write - not updating\n", | |
324 | devices[chosen_drive].devname); | |
325 | devices[chosen_drive].events = 0; | |
326 | continue; | |
327 | } | |
328 | if (load_super(fd, &super)) { | |
329 | close(fd); | |
330 | fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n", | |
331 | devices[chosen_drive].devname); | |
332 | devices[chosen_drive].events = 0; | |
333 | continue; | |
334 | } | |
335 | super.events_hi = (devices[most_recent].events>>32)&0xFFFFFFFF; | |
336 | super.events_lo = (devices[most_recent].events)&0xFFFFFFFF; | |
337 | super.sb_csum = calc_sb_csum(&super); | |
338 | /*DRYRUN*/ if (store_super(fd, &super)) { | |
339 | close(fd); | |
340 | fprintf(stderr, Name ": Could not re-write superblock on %s\n", | |
341 | devices[chosen_drive].devname); | |
342 | devices[chosen_drive].events = 0; | |
343 | continue; | |
344 | } | |
345 | close(fd); | |
346 | devices[chosen_drive].events = devices[most_recent].events; | |
347 | devices[chosen_drive].uptodate = 1; | |
348 | okcnt++; | |
64c4757e | 349 | } |
52826846 NB |
350 | |
351 | /* Now we want to look at the superblock which the kernel will base things on | |
352 | * and compare the devices that we think are working with the devices that the | |
353 | * superblock thinks are working. | |
354 | * If there are differences and --force is given, then update this chosen | |
355 | * superblock. | |
356 | */ | |
cd29a5c8 | 357 | chosen_drive = -1; |
52826846 NB |
358 | for (i=0; chosen_drive < 0 && i<MD_SB_DISKS; i++) { |
359 | int j = best[i]; | |
360 | int fd; | |
361 | if (j<0) | |
362 | continue; | |
363 | if (!devices[j].uptodate) | |
364 | continue; | |
365 | chosen_drive = j; | |
366 | if ((fd=open(devices[j].devname, O_RDONLY))< 0) { | |
367 | fprintf(stderr, Name ": Cannot open %s: %s\n", | |
368 | devices[j].devname, strerror(errno)); | |
369 | return 1; | |
370 | } | |
371 | if (load_super(fd, &super)) { | |
372 | close(fd); | |
373 | fprintf(stderr, Name ": RAID superblock has disappeared from %s\n", | |
374 | devices[j].devname); | |
375 | return 1; | |
376 | } | |
377 | close(fd); | |
378 | } | |
379 | ||
380 | for (i=0; i<MD_SB_DISKS; i++) { | |
381 | int j = best[i]; | |
cd29a5c8 | 382 | int active_sync = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC); |
52826846 NB |
383 | if (j<0) |
384 | continue; | |
385 | if (!devices[j].uptodate) | |
386 | continue; | |
387 | if (devices[j].major != super.disks[j].major || | |
388 | devices[j].minor != super.disks[j].minor) { | |
389 | change |= 1; | |
390 | super.disks[j].major = devices[j].major; | |
391 | super.disks[j].minor = devices[j].minor; | |
392 | } | |
393 | if (devices[j].uptodate && | |
cd29a5c8 | 394 | (super.disks[i].state != active_sync)) { |
52826846 NB |
395 | if (force) { |
396 | fprintf(stderr, Name ": " | |
cd29a5c8 | 397 | "clearing FAULTY flag for device %d in %s for %s\n", |
52826846 | 398 | j, mddev, devices[j].devname); |
cd29a5c8 | 399 | super.disks[i].state = active_sync; |
52826846 NB |
400 | change |= 2; |
401 | } else { | |
402 | fprintf(stderr, Name ": " | |
403 | "device %d in %s is marked faulty in superblock, but %s seems ok\n", | |
404 | i, mddev, devices[j].devname); | |
405 | } | |
406 | } | |
407 | if (!devices[j].uptodate && | |
408 | !(super.disks[i].state & (1 << MD_DISK_FAULTY))) { | |
409 | fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n", | |
410 | i, mddev); | |
411 | } | |
412 | } | |
413 | ||
414 | if ((force && (change & 2)) | |
415 | || (old_linux && (change & 1))) { | |
416 | int fd; | |
417 | super.sb_csum = calc_sb_csum(&super); | |
418 | fd = open(devices[chosen_drive].devname, O_RDWR); | |
419 | if (fd < 0) { | |
420 | fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n", | |
421 | devices[chosen_drive].devname); | |
422 | return 1; | |
423 | } | |
424 | if (store_super(fd, &super)) { | |
425 | close(fd); | |
426 | fprintf(stderr, Name ": Could not re-write superblock on %s\n", | |
427 | devices[chosen_drive].devname); | |
428 | return 1; | |
429 | } | |
430 | close(fd); | |
431 | change = 0; | |
432 | } | |
433 | ||
64c4757e NB |
434 | /* Almost ready to actually *do* something */ |
435 | if (!old_linux) { | |
436 | if (ioctl(mdfd, SET_ARRAY_INFO, NULL) != 0) { | |
682c7051 | 437 | fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n", |
64c4757e NB |
438 | mddev, strerror(errno)); |
439 | return 1; | |
440 | } | |
52826846 NB |
441 | /* First, add the raid disks, but add the chosen one last */ |
442 | for (i=0; i<=MD_SB_DISKS; i++) { | |
443 | int j; | |
444 | if (i < MD_SB_DISKS) { | |
445 | j = best[i]; | |
446 | if (j == chosen_drive) | |
447 | continue; | |
448 | } else | |
449 | j = chosen_drive; | |
450 | ||
0db17fcb | 451 | if (j >= 0 && devices[j].uptodate) { |
64c4757e NB |
452 | mdu_disk_info_t disk; |
453 | memset(&disk, 0, sizeof(disk)); | |
454 | disk.major = devices[j].major; | |
455 | disk.minor = devices[j].minor; | |
456 | if (ioctl(mdfd, ADD_NEW_DISK, &disk)!=0) { | |
682c7051 | 457 | fprintf(stderr, Name ": failed to add %s to %s: %s\n", |
64c4757e NB |
458 | devices[j].devname, |
459 | mddev, | |
460 | strerror(errno)); | |
52826846 NB |
461 | if (i < first_super.raid_disks) |
462 | okcnt--; | |
463 | else | |
464 | sparecnt--; | |
465 | } else if (verbose) | |
466 | fprintf(stderr, Name ": added %s to %s as %d\n", | |
467 | devices[j].devname, mddev, devices[j].raid_disk); | |
468 | } else if (verbose && i < first_super.raid_disks) | |
682c7051 | 469 | fprintf(stderr, Name ": no uptodate device for slot %d of %s\n", |
64c4757e NB |
470 | i, mddev); |
471 | } | |
52826846 | 472 | |
64c4757e NB |
473 | if (runstop == 1 || |
474 | (runstop == 0 && | |
cd29a5c8 NB |
475 | ( first_super.raid_disks == okcnt |
476 | || start_partial_ok && enough(first_super.level, first_super.raid_disks, okcnt)) | |
477 | )) { | |
82b27616 | 478 | if (ioctl(mdfd, RUN_ARRAY, NULL)==0) { |
52826846 NB |
479 | fprintf(stderr, Name ": %s has been started with %d drive%s", |
480 | mddev, okcnt, okcnt==1?"":"s"); | |
481 | if (sparecnt) | |
482 | fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); | |
483 | fprintf(stderr, ".\n"); | |
64c4757e | 484 | return 0; |
82b27616 | 485 | } |
682c7051 | 486 | fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n", |
64c4757e NB |
487 | mddev, strerror(errno)); |
488 | return 1; | |
489 | } | |
82b27616 | 490 | if (runstop == -1) { |
52826846 NB |
491 | fprintf(stderr, Name ": %s assembled from %d drive%s, but not started.\n", |
492 | mddev, okcnt, okcnt==1?"":"s"); | |
64c4757e | 493 | return 0; |
82b27616 | 494 | } |
cd29a5c8 | 495 | fprintf(stderr, Name ": %s assembled from %d drive%s - not enough to start it (use --run to insist).\n", |
52826846 | 496 | mddev, okcnt, okcnt==1?"":"s"); |
64c4757e | 497 | return 1; |
82b27616 | 498 | } else { |
52826846 NB |
499 | /* The "chosen_drive" is a good choice, and if necessary, the superblock has |
500 | * been updated to point to the current locations of devices. | |
501 | * so we can just start the array | |
82b27616 | 502 | */ |
cd29a5c8 | 503 | unsigned long dev; |
82b27616 NB |
504 | dev = MKDEV(devices[chosen_drive].major, |
505 | devices[chosen_drive].minor); | |
506 | if (ioctl(mdfd, START_ARRAY, dev)) { | |
507 | fprintf(stderr, Name ": Cannot start array: %s\n", | |
508 | strerror(errno)); | |
509 | } | |
510 | ||
64c4757e NB |
511 | } |
512 | } |