]> git.ipfire.org Git - thirdparty/mdadm.git/blob - raid6check.c
raid6check.c: reduce verbosity
[thirdparty/mdadm.git] / raid6check.c
1 /*
2 * raid6check - extended consistency check for RAID-6
3 *
4 * Copyright (C) 2011 Piergiorgio Sartor
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Piergiorgio Sartor
22 * Based on "restripe.c" from "mdadm" codebase
23 */
24
25 #include "mdadm.h"
26 #include <stdint.h>
27 #include <signal.h>
28 #include <sys/mman.h>
29
30 #define CHECK_PAGE_BITS (12)
31 #define CHECK_PAGE_SIZE (1 << CHECK_PAGE_BITS)
32
33 enum repair {
34 NO_REPAIR = 0,
35 MANUAL_REPAIR,
36 AUTO_REPAIR
37 };
38
39 int geo_map(int block, unsigned long long stripe, int raid_disks,
40 int level, int layout);
41 void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size);
42 void make_tables(void);
43 void ensure_zero_has_size(int chunk_size);
44 void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs);
45 void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
46 uint8_t **ptrs);
47 void xor_blocks(char *target, char **sources, int disks, int size);
48
49 /* Collect per stripe consistency information */
50 void raid6_collect(int chunk_size, uint8_t *p, uint8_t *q,
51 char *chunkP, char *chunkQ, int *results)
52 {
53 int i;
54 int data_id;
55 uint8_t Px, Qx;
56 extern uint8_t raid6_gflog[];
57
58 for(i = 0; i < chunk_size; i++) {
59 Px = (uint8_t)chunkP[i] ^ (uint8_t)p[i];
60 Qx = (uint8_t)chunkQ[i] ^ (uint8_t)q[i];
61
62 if((Px != 0) && (Qx == 0))
63 results[i] = -1;
64
65 if((Px == 0) && (Qx != 0))
66 results[i] = -2;
67
68 if((Px != 0) && (Qx != 0)) {
69 data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
70 if(data_id < 0) data_id += 255;
71 results[i] = data_id;
72 }
73
74 if((Px == 0) && (Qx == 0))
75 results[i] = -255;
76 }
77 }
78
79 /* Try to find out if a specific disk has problems in a CHECK_PAGE_SIZE page size */
80 int raid6_stats_blk(int *results, int raid_disks)
81 {
82 int i;
83 int curr_broken_disk = -255;
84 int prev_broken_disk = -255;
85 int broken_status = 0;
86
87 for(i = 0; i < CHECK_PAGE_SIZE; i++) {
88
89 if(results[i] != -255)
90 curr_broken_disk = results[i];
91
92 if(curr_broken_disk >= raid_disks)
93 broken_status = 2;
94
95 switch(broken_status) {
96 case 0:
97 if(curr_broken_disk != -255) {
98 prev_broken_disk = curr_broken_disk;
99 broken_status = 1;
100 }
101 break;
102
103 case 1:
104 if(curr_broken_disk != prev_broken_disk)
105 broken_status = 2;
106 break;
107
108 case 2:
109 default:
110 curr_broken_disk = prev_broken_disk = -65535;
111 break;
112 }
113 }
114
115 return curr_broken_disk;
116 }
117
118 /* Collect disks status for a strip in CHECK_PAGE_SIZE page size blocks */
119 void raid6_stats(int *disk, int *results, int raid_disks, int chunk_size)
120 {
121 int i, j;
122
123 for(i = 0, j = 0; i < chunk_size; i += CHECK_PAGE_SIZE, j++) {
124 disk[j] = raid6_stats_blk(&results[i], raid_disks);
125 }
126 }
127
128 int lock_stripe(struct mdinfo *info, unsigned long long start,
129 int chunk_size, int data_disks, sighandler_t *sig) {
130 int rv;
131 if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
132 return 2;
133 }
134
135 sig[0] = signal(SIGTERM, SIG_IGN);
136 sig[1] = signal(SIGINT, SIG_IGN);
137 sig[2] = signal(SIGQUIT, SIG_IGN);
138
139 rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
140 rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
141 return rv * 256;
142 }
143
144 int unlock_all_stripes(struct mdinfo *info, sighandler_t *sig) {
145 int rv;
146 rv = sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
147 rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
148 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
149
150 signal(SIGQUIT, sig[2]);
151 signal(SIGINT, sig[1]);
152 signal(SIGTERM, sig[0]);
153
154 if(munlockall() != 0)
155 return 3;
156 return rv * 256;
157 }
158
159 int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets,
160 int raid_disks, int chunk_size, int level, int layout,
161 unsigned long long start, unsigned long long length, char *name[],
162 enum repair repair, int failed_disk1, int failed_disk2)
163 {
164 /* read the data and p and q blocks, and check we got them right */
165 char *stripe_buf = xmalloc(raid_disks * chunk_size);
166 char **stripes = xmalloc(raid_disks * sizeof(char*));
167 char **blocks = xmalloc(raid_disks * sizeof(char*));
168 char **blocks_page = xmalloc(raid_disks * sizeof(char*));
169 int *block_index_for_slot = xmalloc(raid_disks * sizeof(int));
170 uint8_t *p = xmalloc(chunk_size);
171 uint8_t *q = xmalloc(chunk_size);
172 int *results = xmalloc(chunk_size * sizeof(int));
173 sighandler_t *sig = xmalloc(3 * sizeof(sighandler_t));
174
175 int i, j;
176 int diskP, diskQ;
177 int data_disks = raid_disks - 2;
178 int err = 0;
179
180 extern int tables_ready;
181
182 if (!tables_ready)
183 make_tables();
184
185 for ( i = 0 ; i < raid_disks ; i++)
186 stripes[i] = stripe_buf + i * chunk_size;
187
188 while (length > 0) {
189 int disk[chunk_size >> CHECK_PAGE_BITS];
190
191 err = lock_stripe(info, start, chunk_size, data_disks, sig);
192 if(err != 0) {
193 if (err != 2)
194 unlock_all_stripes(info, sig);
195 goto exitCheck;
196 }
197 for (i = 0 ; i < raid_disks ; i++) {
198 off64_t seek_res = lseek64(source[i], offsets[i] + start * chunk_size,
199 SEEK_SET);
200 if (seek_res < 0) {
201 fprintf(stderr, "lseek to source %d failed\n", i);
202 unlock_all_stripes(info, sig);
203 err = -1;
204 goto exitCheck;
205 }
206 int read_res = read(source[i], stripes[i], chunk_size);
207 if (read_res < chunk_size) {
208 fprintf(stderr, "Failed to read complete chunk disk %d, aborting\n", i);
209 unlock_all_stripes(info, sig);
210 err = -1;
211 goto exitCheck;
212 }
213 }
214 err = unlock_all_stripes(info, sig);
215 if(err != 0)
216 goto exitCheck;
217
218 for (i = 0 ; i < data_disks ; i++) {
219 int disk = geo_map(i, start, raid_disks, level, layout);
220 blocks[i] = stripes[disk];
221 block_index_for_slot[disk] = i;
222 }
223
224 qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
225 diskP = geo_map(-1, start, raid_disks, level, layout);
226 diskQ = geo_map(-2, start, raid_disks, level, layout);
227 blocks[data_disks] = stripes[diskP];
228 block_index_for_slot[diskP] = data_disks;
229 blocks[data_disks+1] = stripes[diskQ];
230 block_index_for_slot[diskQ] = data_disks+1;
231 /* Do we really need the code below? */
232 #if 0
233 if (memcmp(p, stripes[diskP], chunk_size) != 0) {
234 printf("P(%d) wrong at %llu\n", diskP, start);
235 }
236 if (memcmp(q, stripes[diskQ], chunk_size) != 0) {
237 printf("Q(%d) wrong at %llu\n", diskQ, start);
238 }
239 #endif
240 raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results);
241 raid6_stats(disk, results, raid_disks, chunk_size);
242
243 for(j = 0; j < (chunk_size >> CHECK_PAGE_BITS); j++) {
244 if(disk[j] >= -2) {
245 disk[j] = geo_map(disk[j], start, raid_disks, level, layout);
246 }
247 if(disk[j] >= 0) {
248 printf("Error detected at %llu, page %d: possible failed disk slot: %d --> %s\n",
249 start, j, disk[j], name[disk[j]]);
250 }
251 if(disk[j] == -65535) {
252 printf("Error detected at %llu, page %d: disk slot unknown\n", start, j);
253 }
254 }
255
256 if(repair == MANUAL_REPAIR) {
257 printf("Repairing stripe %llu\n", start);
258 printf("Assuming slots %d (%s) and %d (%s) are incorrect\n",
259 failed_disk1, name[failed_disk1],
260 failed_disk2, name[failed_disk2]);
261
262 if (failed_disk1 == diskQ || failed_disk2 == diskQ) {
263 char *all_but_failed_blocks[data_disks];
264 int failed_data_or_p;
265 int failed_block_index;
266
267 if (failed_disk1 == diskQ)
268 failed_data_or_p = failed_disk2;
269 else
270 failed_data_or_p = failed_disk1;
271 printf("Repairing D/P(%d) and Q\n", failed_data_or_p);
272 failed_block_index = block_index_for_slot[failed_data_or_p];
273 for (i=0; i < data_disks; i++)
274 if (failed_block_index == i)
275 all_but_failed_blocks[i] = stripes[diskP];
276 else
277 all_but_failed_blocks[i] = blocks[i];
278 xor_blocks(stripes[failed_data_or_p],
279 all_but_failed_blocks, data_disks, chunk_size);
280 qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
281 } else {
282 ensure_zero_has_size(chunk_size);
283 if (failed_disk1 == diskP || failed_disk2 == diskP) {
284 int failed_data, failed_block_index;
285 if (failed_disk1 == diskP)
286 failed_data = failed_disk2;
287 else
288 failed_data = failed_disk1;
289 failed_block_index = block_index_for_slot[failed_data];
290 printf("Repairing D(%d) and P\n", failed_data);
291 raid6_datap_recov(raid_disks, chunk_size, failed_block_index, (uint8_t**)blocks);
292 } else {
293 printf("Repairing D and D\n");
294 int failed_block_index1 = block_index_for_slot[failed_disk1];
295 int failed_block_index2 = block_index_for_slot[failed_disk2];
296 if (failed_block_index1 > failed_block_index2) {
297 int t = failed_block_index1;
298 failed_block_index1 = failed_block_index2;
299 failed_block_index2 = t;
300 }
301 raid6_2data_recov(raid_disks, chunk_size, failed_block_index1, failed_block_index2, (uint8_t**)blocks);
302 }
303 }
304
305 err = lock_stripe(info, start, chunk_size, data_disks, sig);
306 if(err != 0) {
307 if (err != 2)
308 unlock_all_stripes(info, sig);
309 goto exitCheck;
310 }
311
312 int write_res1, write_res2;
313 off64_t seek_res;
314
315 seek_res = lseek64(source[failed_disk1],
316 offsets[failed_disk1] + start * chunk_size, SEEK_SET);
317 if (seek_res < 0) {
318 fprintf(stderr, "lseek failed for failed_disk1\n");
319 unlock_all_stripes(info, sig);
320 err = -1;
321 goto exitCheck;
322 }
323 write_res1 = write(source[failed_disk1], stripes[failed_disk1], chunk_size);
324
325 seek_res = lseek64(source[failed_disk2],
326 offsets[failed_disk2] + start * chunk_size, SEEK_SET);
327 if (seek_res < 0) {
328 fprintf(stderr, "lseek failed for failed_disk1\n");
329 unlock_all_stripes(info, sig);
330 err = -1;
331 goto exitCheck;
332 }
333 write_res2 = write(source[failed_disk2], stripes[failed_disk2], chunk_size);
334
335 err = unlock_all_stripes(info, sig);
336 if(err != 0)
337 goto exitCheck;
338
339 if (write_res1 != chunk_size || write_res2 != chunk_size) {
340 fprintf(stderr, "Failed to write a complete chunk.\n");
341 goto exitCheck;
342 }
343
344 }
345
346 int pages_to_write_count = 0;
347 int page_to_write[chunk_size >> CHECK_PAGE_BITS];
348 for(j = 0; j < (chunk_size >> CHECK_PAGE_BITS); j++) {
349 if (disk[j] >= 0 && repair == AUTO_REPAIR) {
350 printf("Auto-repairing slot %d (%s)\n", disk[j], name[disk[j]]);
351 pages_to_write_count++;
352 page_to_write[j] = 1;
353 for(i = 0; i < raid_disks; i++) {
354 blocks_page[i] = blocks[i] + j * CHECK_PAGE_SIZE;
355 }
356 if (disk[j] == diskQ) {
357 qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks_page, data_disks, CHECK_PAGE_SIZE);
358 } else {
359 char *all_but_failed_blocks[data_disks];
360 int failed_block_index = block_index_for_slot[disk[j]];
361 for (i=0; i < data_disks; i++)
362 if (failed_block_index == i)
363 all_but_failed_blocks[i] = stripes[diskP] + j * CHECK_PAGE_SIZE;
364 else
365 all_but_failed_blocks[i] = blocks_page[i];
366 xor_blocks(stripes[disk[j]] + j * CHECK_PAGE_SIZE,
367 all_but_failed_blocks, data_disks, CHECK_PAGE_SIZE);
368 }
369 } else {
370 page_to_write[j] = 0;
371 }
372 }
373
374 if(pages_to_write_count > 0) {
375
376 err = lock_stripe(info, start, chunk_size, data_disks, sig);
377 if(err != 0) {
378 if (err != 2)
379 unlock_all_stripes(info, sig);
380 goto exitCheck;
381 }
382
383 int write_res = 0;
384 for(j = 0; j < (chunk_size >> CHECK_PAGE_BITS); j++) {
385 if(page_to_write[j] == 1) {
386 lseek64(source[disk[j]], offsets[disk[j]] + start * chunk_size + j * CHECK_PAGE_SIZE, 0);
387 write_res += write(source[disk[j]], stripes[disk[j]] + j * CHECK_PAGE_SIZE, CHECK_PAGE_SIZE);
388 }
389 }
390
391 err = unlock_all_stripes(info, sig);
392 if (err != 0 || write_res != (CHECK_PAGE_SIZE * pages_to_write_count))
393 goto exitCheck;
394
395 if (write_res != (CHECK_PAGE_SIZE * pages_to_write_count)) {
396 fprintf(stderr, "Failed to write a full chunk.\n");
397 goto exitCheck;
398 }
399 }
400
401 length--;
402 start++;
403 }
404
405 exitCheck:
406
407 free(stripe_buf);
408 free(stripes);
409 free(blocks);
410 free(blocks_page);
411 free(block_index_for_slot);
412 free(p);
413 free(q);
414 free(results);
415 free(sig);
416
417 return err;
418 }
419
420 unsigned long long getnum(char *str, char **err)
421 {
422 char *e;
423 unsigned long long rv = strtoull(str, &e, 10);
424 if (e==str || *e) {
425 *err = str;
426 return 0;
427 }
428 return rv;
429 }
430
431 int main(int argc, char *argv[])
432 {
433 /* md_device start length */
434 int *fds = NULL;
435 char *buf = NULL;
436 char **disk_name = NULL;
437 unsigned long long *offsets = NULL;
438 int raid_disks = 0;
439 int active_disks;
440 int chunk_size = 0;
441 int layout = -1;
442 int level = 6;
443 enum repair repair = NO_REPAIR;
444 int failed_disk1 = -1;
445 int failed_disk2 = -1;
446 unsigned long long start, length;
447 int i;
448 int mdfd;
449 struct mdinfo *info = NULL, *comp = NULL;
450 char *err = NULL;
451 int exit_err = 0;
452 int close_flag = 0;
453 char *prg = strrchr(argv[0], '/');
454
455 if (prg == NULL)
456 prg = argv[0];
457 else
458 prg++;
459
460 if (argc < 4) {
461 fprintf(stderr, "Usage: %s md_device start_stripe length_stripes [autorepair]\n", prg);
462 fprintf(stderr, " or: %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
463 exit_err = 1;
464 goto exitHere;
465 }
466
467 mdfd = open(argv[1], O_RDONLY);
468 if(mdfd < 0) {
469 perror(argv[1]);
470 fprintf(stderr, "%s: cannot open %s\n", prg, argv[1]);
471 exit_err = 2;
472 goto exitHere;
473 }
474
475 info = sysfs_read(mdfd, NULL,
476 GET_LEVEL|
477 GET_LAYOUT|
478 GET_DISKS|
479 GET_DEGRADED |
480 GET_COMPONENT|
481 GET_CHUNK|
482 GET_DEVS|
483 GET_OFFSET|
484 GET_SIZE);
485
486 if(info == NULL) {
487 fprintf(stderr, "%s: Error reading sysfs information of %s\n", prg, argv[1]);
488 exit_err = 9;
489 goto exitHere;
490 }
491
492 if(info->array.level != level) {
493 fprintf(stderr, "%s: %s not a RAID-6\n", prg, argv[1]);
494 exit_err = 3;
495 goto exitHere;
496 }
497
498 if(info->array.failed_disks > 0) {
499 fprintf(stderr, "%s: %s degraded array\n", prg, argv[1]);
500 exit_err = 8;
501 goto exitHere;
502 }
503
504 printf("layout: %d\n", info->array.layout);
505 printf("disks: %d\n", info->array.raid_disks);
506 printf("component size: %llu\n", info->component_size * 512);
507 printf("total stripes: %llu\n", (info->component_size * 512) / info->array.chunk_size);
508 printf("chunk size: %d\n", info->array.chunk_size);
509 printf("\n");
510
511 comp = info->devs;
512 for(i = 0, active_disks = 0; active_disks < info->array.raid_disks; i++) {
513 printf("disk: %d - offset: %llu - size: %llu - name: %s - slot: %d\n",
514 i, comp->data_offset * 512, comp->component_size * 512,
515 map_dev(comp->disk.major, comp->disk.minor, 0),
516 comp->disk.raid_disk);
517 if(comp->disk.raid_disk >= 0)
518 active_disks++;
519 comp = comp->next;
520 }
521 printf("\n");
522
523 close(mdfd);
524
525 raid_disks = info->array.raid_disks;
526 chunk_size = info->array.chunk_size;
527 layout = info->array.layout;
528 if (strcmp(argv[2], "repair")==0) {
529 if (argc < 6) {
530 fprintf(stderr, "For repair mode, call %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
531 exit_err = 1;
532 goto exitHere;
533 }
534 repair = MANUAL_REPAIR;
535 start = getnum(argv[3], &err);
536 length = 1;
537 failed_disk1 = getnum(argv[4], &err);
538 failed_disk2 = getnum(argv[5], &err);
539
540 if(failed_disk1 >= info->array.raid_disks) {
541 fprintf(stderr, "%s: failed_slot_1 index is higher than number of devices in raid\n", prg);
542 exit_err = 4;
543 goto exitHere;
544 }
545 if(failed_disk2 >= info->array.raid_disks) {
546 fprintf(stderr, "%s: failed_slot_2 index is higher than number of devices in raid\n", prg);
547 exit_err = 4;
548 goto exitHere;
549 }
550 if(failed_disk1 == failed_disk2) {
551 fprintf(stderr, "%s: failed_slot_1 and failed_slot_2 are the same\n", prg);
552 exit_err = 4;
553 goto exitHere;
554 }
555 }
556 else {
557 start = getnum(argv[2], &err);
558 length = getnum(argv[3], &err);
559 if (argc >= 5 && strcmp(argv[4], "autorepair")==0)
560 repair = AUTO_REPAIR;
561 }
562
563 if (err) {
564 fprintf(stderr, "%s: Bad number: %s\n", prg, err);
565 exit_err = 4;
566 goto exitHere;
567 }
568
569 if(start > ((info->component_size * 512) / chunk_size)) {
570 start = (info->component_size * 512) / chunk_size;
571 fprintf(stderr, "%s: start beyond disks size\n", prg);
572 }
573
574 if((length == 0) ||
575 ((length + start) > ((info->component_size * 512) / chunk_size))) {
576 length = (info->component_size * 512) / chunk_size - start;
577 }
578
579 disk_name = xmalloc(raid_disks * sizeof(*disk_name));
580 fds = xmalloc(raid_disks * sizeof(*fds));
581 offsets = xcalloc(raid_disks, sizeof(*offsets));
582 buf = xmalloc(raid_disks * chunk_size);
583
584 for(i=0; i<raid_disks; i++) {
585 fds[i] = -1;
586 }
587 close_flag = 1;
588
589 comp = info->devs;
590 for (i=0, active_disks=0; active_disks<raid_disks; i++) {
591 int disk_slot = comp->disk.raid_disk;
592 if(disk_slot >= 0) {
593 disk_name[disk_slot] = map_dev(comp->disk.major, comp->disk.minor, 0);
594 offsets[disk_slot] = comp->data_offset * 512;
595 fds[disk_slot] = open(disk_name[disk_slot], O_RDWR);
596 if (fds[disk_slot] < 0) {
597 perror(disk_name[disk_slot]);
598 fprintf(stderr,"%s: cannot open %s\n", prg, disk_name[disk_slot]);
599 exit_err = 6;
600 goto exitHere;
601 }
602 active_disks++;
603 }
604 comp = comp->next;
605 }
606
607 int rv = check_stripes(info, fds, offsets,
608 raid_disks, chunk_size, level, layout,
609 start, length, disk_name, repair, failed_disk1, failed_disk2);
610 if (rv != 0) {
611 fprintf(stderr,
612 "%s: check_stripes returned %d\n", prg, rv);
613 exit_err = 7;
614 goto exitHere;
615 }
616
617 exitHere:
618
619 if (close_flag)
620 for(i = 0; i < raid_disks; i++)
621 close(fds[i]);
622
623 free(disk_name);
624 free(fds);
625 free(offsets);
626 free(buf);
627
628 exit(exit_err);
629 }