]> git.ipfire.org Git - thirdparty/mdadm.git/blob - raid6check.c
Discard devnum in favour of devnm
[thirdparty/mdadm.git] / raid6check.c
1 /*
2 * raid6check - extended consistency check for RAID-6
3 *
4 * Copyright (C) 2011 Piergiorgio Sartor
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Piergiorgio Sartor
22 * Based on "restripe.c" from "mdadm" codebase
23 */
24
25 #include "mdadm.h"
26 #include <stdint.h>
27 #include <signal.h>
28 #include <sys/mman.h>
29
30 int geo_map(int block, unsigned long long stripe, int raid_disks,
31 int level, int layout);
32 void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size);
33 void make_tables(void);
34 void ensure_zero_has_size(int chunk_size);
35 void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs);
36 void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
37 uint8_t **ptrs);
38 void xor_blocks(char *target, char **sources, int disks, int size);
39
40
41 /* Collect per stripe consistency information */
42 void raid6_collect(int chunk_size, uint8_t *p, uint8_t *q,
43 char *chunkP, char *chunkQ, int *results)
44 {
45 int i;
46 int data_id;
47 uint8_t Px, Qx;
48 extern uint8_t raid6_gflog[];
49
50 for(i = 0; i < chunk_size; i++) {
51 Px = (uint8_t)chunkP[i] ^ (uint8_t)p[i];
52 Qx = (uint8_t)chunkQ[i] ^ (uint8_t)q[i];
53
54 if((Px != 0) && (Qx == 0))
55 results[i] = -1;
56
57 if((Px == 0) && (Qx != 0))
58 results[i] = -2;
59
60 if((Px != 0) && (Qx != 0)) {
61 data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
62 if(data_id < 0) data_id += 255;
63 results[i] = data_id;
64 }
65
66 if((Px == 0) && (Qx == 0))
67 results[i] = -255;
68 }
69 }
70
71 /* Try to find out if a specific disk has problems */
72 int raid6_stats(int *results, int raid_disks, int chunk_size)
73 {
74 int i;
75 int curr_broken_disk = -255;
76 int prev_broken_disk = -255;
77 int broken_status = 0;
78
79 for(i = 0; i < chunk_size; i++) {
80
81 if(results[i] != -255)
82 curr_broken_disk = results[i];
83
84 if(curr_broken_disk >= raid_disks)
85 broken_status = 2;
86
87 switch(broken_status) {
88 case 0:
89 if(curr_broken_disk != -255) {
90 prev_broken_disk = curr_broken_disk;
91 broken_status = 1;
92 }
93 break;
94
95 case 1:
96 if(curr_broken_disk != prev_broken_disk)
97 broken_status = 2;
98 break;
99
100 case 2:
101 default:
102 curr_broken_disk = prev_broken_disk = -65535;
103 break;
104 }
105 }
106
107 return curr_broken_disk;
108 }
109
110 int lock_stripe(struct mdinfo *info, unsigned long long start,
111 int chunk_size, int data_disks, sighandler_t *sig) {
112 int rv;
113 if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
114 return 2;
115 }
116
117 sig[0] = signal(SIGTERM, SIG_IGN);
118 sig[1] = signal(SIGINT, SIG_IGN);
119 sig[2] = signal(SIGQUIT, SIG_IGN);
120
121 rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
122 rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
123 return rv * 256;
124 }
125
126 int unlock_all_stripes(struct mdinfo *info, sighandler_t *sig) {
127 int rv;
128 rv = sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
129 rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
130 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
131
132 signal(SIGQUIT, sig[2]);
133 signal(SIGINT, sig[1]);
134 signal(SIGTERM, sig[0]);
135
136 if(munlockall() != 0)
137 return 3;
138 return rv * 256;
139 }
140
141
142 int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets,
143 int raid_disks, int chunk_size, int level, int layout,
144 unsigned long long start, unsigned long long length, char *name[],
145 int repair, int failed_disk1, int failed_disk2)
146 {
147 /* read the data and p and q blocks, and check we got them right */
148 char *stripe_buf = xmalloc(raid_disks * chunk_size);
149 char **stripes = xmalloc(raid_disks * sizeof(char*));
150 char **blocks = xmalloc(raid_disks * sizeof(char*));
151 int *block_index_for_slot = xmalloc(raid_disks * sizeof(int));
152 uint8_t *p = xmalloc(chunk_size);
153 uint8_t *q = xmalloc(chunk_size);
154 int *results = xmalloc(chunk_size * sizeof(int));
155 sighandler_t *sig = xmalloc(3 * sizeof(sighandler_t));
156
157 int i;
158 int diskP, diskQ;
159 int data_disks = raid_disks - 2;
160 int err = 0;
161
162 extern int tables_ready;
163
164 if (!tables_ready)
165 make_tables();
166
167 for ( i = 0 ; i < raid_disks ; i++)
168 stripes[i] = stripe_buf + i * chunk_size;
169
170 while (length > 0) {
171 int disk;
172
173 printf("pos --> %llu\n", start);
174
175 err = lock_stripe(info, start, chunk_size, data_disks, sig);
176 if(err != 0) {
177 if (err != 2)
178 unlock_all_stripes(info, sig);
179 goto exitCheck;
180 }
181 for (i = 0 ; i < raid_disks ; i++) {
182 lseek64(source[i], offsets[i] + start * chunk_size, 0);
183 read(source[i], stripes[i], chunk_size);
184 }
185 err = unlock_all_stripes(info, sig);
186 if(err != 0)
187 goto exitCheck;
188
189 for (i = 0 ; i < data_disks ; i++) {
190 int disk = geo_map(i, start, raid_disks, level, layout);
191 blocks[i] = stripes[disk];
192 block_index_for_slot[disk] = i;
193 printf("%d->%d\n", i, disk);
194 }
195
196 qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
197 diskP = geo_map(-1, start, raid_disks, level, layout);
198 diskQ = geo_map(-2, start, raid_disks, level, layout);
199 blocks[data_disks] = stripes[diskP];
200 block_index_for_slot[diskP] = data_disks;
201 blocks[data_disks+1] = stripes[diskQ];
202 block_index_for_slot[diskQ] = data_disks+1;
203
204 if (memcmp(p, stripes[diskP], chunk_size) != 0) {
205 printf("P(%d) wrong at %llu\n", diskP, start);
206 }
207 if (memcmp(q, stripes[diskQ], chunk_size) != 0) {
208 printf("Q(%d) wrong at %llu\n", diskQ, start);
209 }
210 raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results);
211 disk = raid6_stats(results, raid_disks, chunk_size);
212
213 if(disk >= -2) {
214 disk = geo_map(disk, start, raid_disks, level, layout);
215 }
216 if(disk >= 0) {
217 printf("Error detected at %llu: possible failed disk slot: %d --> %s\n",
218 start, disk, name[disk]);
219 }
220 if(disk == -65535) {
221 printf("Error detected at %llu: disk slot unknown\n", start);
222 }
223 if(repair == 1) {
224 printf("Repairing stripe %llu\n", start);
225 printf("Assuming slots %d (%s) and %d (%s) are incorrect\n",
226 failed_disk1, name[failed_disk1],
227 failed_disk2, name[failed_disk2]);
228
229 if (failed_disk1 == diskQ || failed_disk2 == diskQ) {
230 char *all_but_failed_blocks[data_disks];
231 int failed_data_or_p;
232 int failed_block_index;
233
234 if (failed_disk1 == diskQ)
235 failed_data_or_p = failed_disk2;
236 else
237 failed_data_or_p = failed_disk1;
238 printf("Repairing D/P(%d) and Q\n", failed_data_or_p);
239 failed_block_index = block_index_for_slot[failed_data_or_p];
240 for (i=0; i < data_disks; i++)
241 if (failed_block_index == i)
242 all_but_failed_blocks[i] = stripes[diskP];
243 else
244 all_but_failed_blocks[i] = blocks[i];
245 xor_blocks(stripes[failed_data_or_p],
246 all_but_failed_blocks, data_disks, chunk_size);
247 qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
248 } else {
249 ensure_zero_has_size(chunk_size);
250 if (failed_disk1 == diskP || failed_disk2 == diskP) {
251 int failed_data, failed_block_index;
252 if (failed_disk1 == diskP)
253 failed_data = failed_disk2;
254 else
255 failed_data = failed_disk1;
256 failed_block_index = block_index_for_slot[failed_data];
257 printf("Repairing D(%d) and P\n", failed_data);
258 raid6_datap_recov(raid_disks, chunk_size, failed_block_index, (uint8_t**)blocks);
259 } else {
260 printf("Repairing D and D\n");
261 int failed_block_index1 = block_index_for_slot[failed_disk1];
262 int failed_block_index2 = block_index_for_slot[failed_disk2];
263 if (failed_block_index1 > failed_block_index2) {
264 int t = failed_block_index1;
265 failed_block_index1 = failed_block_index2;
266 failed_block_index2 = t;
267 }
268 raid6_2data_recov(raid_disks, chunk_size, failed_block_index1, failed_block_index2, (uint8_t**)blocks);
269 }
270 }
271
272 err = lock_stripe(info, start, chunk_size, data_disks, sig);
273 if(err != 0) {
274 if (err != 2)
275 unlock_all_stripes(info, sig);
276 goto exitCheck;
277 }
278
279 lseek64(source[failed_disk1], offsets[failed_disk1] + start * chunk_size, 0);
280 write(source[failed_disk1], stripes[failed_disk1], chunk_size);
281 lseek64(source[failed_disk2], offsets[failed_disk2] + start * chunk_size, 0);
282 write(source[failed_disk2], stripes[failed_disk2], chunk_size);
283
284 err = unlock_all_stripes(info, sig);
285 if(err != 0)
286 goto exitCheck;
287 } else if (disk >= 0 && repair == 2) {
288 printf("Auto-repairing slot %d (%s)\n", disk, name[disk]);
289 if (disk == diskQ) {
290 qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
291 } else {
292 char *all_but_failed_blocks[data_disks];
293 int failed_block_index = block_index_for_slot[disk];
294 for (i=0; i < data_disks; i++)
295 if (failed_block_index == i)
296 all_but_failed_blocks[i] = stripes[diskP];
297 else
298 all_but_failed_blocks[i] = blocks[i];
299 xor_blocks(stripes[disk],
300 all_but_failed_blocks, data_disks, chunk_size);
301 }
302
303 err = lock_stripe(info, start, chunk_size, data_disks, sig);
304 if(err != 0) {
305 if (err != 2)
306 unlock_all_stripes(info, sig);
307 goto exitCheck;
308 }
309
310 lseek64(source[disk], offsets[disk] + start * chunk_size, 0);
311 write(source[disk], stripes[disk], chunk_size);
312
313 err = unlock_all_stripes(info, sig);
314 if(err != 0)
315 goto exitCheck;
316 }
317
318
319 length--;
320 start++;
321 }
322
323 exitCheck:
324
325 free(stripe_buf);
326 free(stripes);
327 free(blocks);
328 free(p);
329 free(q);
330 free(results);
331
332 return err;
333 }
334
335 unsigned long long getnum(char *str, char **err)
336 {
337 char *e;
338 unsigned long long rv = strtoull(str, &e, 10);
339 if (e==str || *e) {
340 *err = str;
341 return 0;
342 }
343 return rv;
344 }
345
346 int main(int argc, char *argv[])
347 {
348 /* md_device start length */
349 int *fds = NULL;
350 char *buf = NULL;
351 char **disk_name = NULL;
352 unsigned long long *offsets = NULL;
353 int raid_disks = 0;
354 int active_disks;
355 int chunk_size = 0;
356 int layout = -1;
357 int level = 6;
358 int repair = 0;
359 int failed_disk1, failed_disk2;
360 unsigned long long start, length;
361 int i;
362 int mdfd;
363 struct mdinfo *info = NULL, *comp = NULL;
364 char *err = NULL;
365 int exit_err = 0;
366 int close_flag = 0;
367 char *prg = strrchr(argv[0], '/');
368
369 if (prg == NULL)
370 prg = argv[0];
371 else
372 prg++;
373
374 if (argc < 4) {
375 fprintf(stderr, "Usage: %s md_device start_stripe length_stripes [autorepair]\n", prg);
376 fprintf(stderr, " or: %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
377 exit_err = 1;
378 goto exitHere;
379 }
380
381 mdfd = open(argv[1], O_RDONLY);
382 if(mdfd < 0) {
383 perror(argv[1]);
384 fprintf(stderr, "%s: cannot open %s\n", prg, argv[1]);
385 exit_err = 2;
386 goto exitHere;
387 }
388
389 info = sysfs_read(mdfd, -1,
390 GET_LEVEL|
391 GET_LAYOUT|
392 GET_DISKS|
393 GET_DEGRADED |
394 GET_COMPONENT|
395 GET_CHUNK|
396 GET_DEVS|
397 GET_OFFSET|
398 GET_SIZE);
399
400 if(info == NULL) {
401 fprintf(stderr, "%s: Error reading sysfs information of %s\n", prg, argv[1]);
402 exit_err = 9;
403 goto exitHere;
404 }
405
406 if(info->array.level != level) {
407 fprintf(stderr, "%s: %s not a RAID-6\n", prg, argv[1]);
408 exit_err = 3;
409 goto exitHere;
410 }
411
412 if(info->array.failed_disks > 0) {
413 fprintf(stderr, "%s: %s degraded array\n", prg, argv[1]);
414 exit_err = 8;
415 goto exitHere;
416 }
417
418 printf("layout: %d\n", info->array.layout);
419 printf("disks: %d\n", info->array.raid_disks);
420 printf("component size: %llu\n", info->component_size * 512);
421 printf("total stripes: %llu\n", (info->component_size * 512) / info->array.chunk_size);
422 printf("chunk size: %d\n", info->array.chunk_size);
423 printf("\n");
424
425 comp = info->devs;
426 for(i = 0, active_disks = 0; active_disks < info->array.raid_disks; i++) {
427 printf("disk: %d - offset: %llu - size: %llu - name: %s - slot: %d\n",
428 i, comp->data_offset * 512, comp->component_size * 512,
429 map_dev(comp->disk.major, comp->disk.minor, 0),
430 comp->disk.raid_disk);
431 if(comp->disk.raid_disk >= 0)
432 active_disks++;
433 comp = comp->next;
434 }
435 printf("\n");
436
437 close(mdfd);
438
439 raid_disks = info->array.raid_disks;
440 chunk_size = info->array.chunk_size;
441 layout = info->array.layout;
442 if (strcmp(argv[2], "repair")==0) {
443 if (argc < 6) {
444 fprintf(stderr, "For repair mode, call %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
445 exit_err = 1;
446 goto exitHere;
447 }
448 repair = 1;
449 start = getnum(argv[3], &err);
450 length = 1;
451 failed_disk1 = getnum(argv[4], &err);
452 failed_disk2 = getnum(argv[5], &err);
453
454 if(failed_disk1 >= info->array.raid_disks) {
455 fprintf(stderr, "%s: failed_slot_1 index is higher than number of devices in raid\n", prg);
456 exit_err = 4;
457 goto exitHere;
458 }
459 if(failed_disk2 >= info->array.raid_disks) {
460 fprintf(stderr, "%s: failed_slot_2 index is higher than number of devices in raid\n", prg);
461 exit_err = 4;
462 goto exitHere;
463 }
464 if(failed_disk1 == failed_disk2) {
465 fprintf(stderr, "%s: failed_slot_1 and failed_slot_2 are the same\n", prg);
466 exit_err = 4;
467 goto exitHere;
468 }
469 }
470 else {
471 start = getnum(argv[2], &err);
472 length = getnum(argv[3], &err);
473 if (argc >= 5 && strcmp(argv[4], "autorepair")==0)
474 repair = 2;
475 }
476
477 if (err) {
478 fprintf(stderr, "%s: Bad number: %s\n", prg, err);
479 exit_err = 4;
480 goto exitHere;
481 }
482
483 if(start > ((info->component_size * 512) / chunk_size)) {
484 start = (info->component_size * 512) / chunk_size;
485 fprintf(stderr, "%s: start beyond disks size\n", prg);
486 }
487
488 if((length == 0) ||
489 ((length + start) > ((info->component_size * 512) / chunk_size))) {
490 length = (info->component_size * 512) / chunk_size - start;
491 }
492
493 disk_name = xmalloc(raid_disks * sizeof(*disk_name));
494 fds = xmalloc(raid_disks * sizeof(*fds));
495 offsets = xcalloc(raid_disks, sizeof(*offsets));
496 buf = xmalloc(raid_disks * chunk_size);
497
498 for(i=0; i<raid_disks; i++) {
499 fds[i] = -1;
500 }
501 close_flag = 1;
502
503 comp = info->devs;
504 for (i=0, active_disks=0; active_disks<raid_disks; i++) {
505 int disk_slot = comp->disk.raid_disk;
506 if(disk_slot >= 0) {
507 disk_name[disk_slot] = map_dev(comp->disk.major, comp->disk.minor, 0);
508 offsets[disk_slot] = comp->data_offset * 512;
509 fds[disk_slot] = open(disk_name[disk_slot], O_RDWR);
510 if (fds[disk_slot] < 0) {
511 perror(disk_name[disk_slot]);
512 fprintf(stderr,"%s: cannot open %s\n", prg, disk_name[disk_slot]);
513 exit_err = 6;
514 goto exitHere;
515 }
516 active_disks++;
517 }
518 comp = comp->next;
519 }
520
521 int rv = check_stripes(info, fds, offsets,
522 raid_disks, chunk_size, level, layout,
523 start, length, disk_name, repair, failed_disk1, failed_disk2);
524 if (rv != 0) {
525 fprintf(stderr,
526 "%s: check_stripes returned %d\n", prg, rv);
527 exit_err = 7;
528 goto exitHere;
529 }
530
531 exitHere:
532
533 if (close_flag)
534 for(i = 0; i < raid_disks; i++)
535 close(fds[i]);
536
537 free(disk_name);
538 free(fds);
539 free(offsets);
540 free(buf);
541
542 exit(exit_err);
543 }