]> git.ipfire.org Git - thirdparty/mdadm.git/blob - raid6check.c
raid6check.c: reduce verbosity
[thirdparty/mdadm.git] / raid6check.c
1 /*
2 * raid6check - extended consistency check for RAID-6
3 *
4 * Copyright (C) 2011 Piergiorgio Sartor
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Piergiorgio Sartor
22 * Based on "restripe.c" from "mdadm" codebase
23 */
24
25 #include "mdadm.h"
26 #include <stdint.h>
27 #include <signal.h>
28 #include <sys/mman.h>
29
30 #define CHECK_PAGE_BITS (12)
31 #define CHECK_PAGE_SIZE (1 << CHECK_PAGE_BITS)
32
33 enum repair {
34 NO_REPAIR = 0,
35 MANUAL_REPAIR,
36 AUTO_REPAIR
37 };
38
39 int geo_map(int block, unsigned long long stripe, int raid_disks,
40 int level, int layout);
41 void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size);
42 void make_tables(void);
43 void ensure_zero_has_size(int chunk_size);
44 void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs);
45 void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
46 uint8_t **ptrs);
47 void xor_blocks(char *target, char **sources, int disks, int size);
48
49 /* Collect per stripe consistency information */
50 void raid6_collect(int chunk_size, uint8_t *p, uint8_t *q,
51 char *chunkP, char *chunkQ, int *results)
52 {
53 int i;
54 int data_id;
55 uint8_t Px, Qx;
56 extern uint8_t raid6_gflog[];
57
58 for(i = 0; i < chunk_size; i++) {
59 Px = (uint8_t)chunkP[i] ^ (uint8_t)p[i];
60 Qx = (uint8_t)chunkQ[i] ^ (uint8_t)q[i];
61
62 if((Px != 0) && (Qx == 0))
63 results[i] = -1;
64
65 if((Px == 0) && (Qx != 0))
66 results[i] = -2;
67
68 if((Px != 0) && (Qx != 0)) {
69 data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
70 if(data_id < 0) data_id += 255;
71 results[i] = data_id;
72 }
73
74 if((Px == 0) && (Qx == 0))
75 results[i] = -255;
76 }
77 }
78
79 /* Try to find out if a specific disk has problems in a CHECK_PAGE_SIZE page size */
80 int raid6_stats_blk(int *results, int raid_disks)
81 {
82 int i;
83 int curr_broken_disk = -255;
84 int prev_broken_disk = -255;
85 int broken_status = 0;
86
87 for(i = 0; i < CHECK_PAGE_SIZE; i++) {
88
89 if(results[i] != -255)
90 curr_broken_disk = results[i];
91
92 if(curr_broken_disk >= raid_disks)
93 broken_status = 2;
94
95 switch(broken_status) {
96 case 0:
97 if(curr_broken_disk != -255) {
98 prev_broken_disk = curr_broken_disk;
99 broken_status = 1;
100 }
101 break;
102
103 case 1:
104 if(curr_broken_disk != prev_broken_disk)
105 broken_status = 2;
106 break;
107
108 case 2:
109 default:
110 curr_broken_disk = prev_broken_disk = -65535;
111 break;
112 }
113 }
114
115 return curr_broken_disk;
116 }
117
118 /* Collect disks status for a strip in CHECK_PAGE_SIZE page size blocks */
119 void raid6_stats(int *disk, int *results, int raid_disks, int chunk_size)
120 {
121 int i, j;
122
123 for(i = 0, j = 0; i < chunk_size; i += CHECK_PAGE_SIZE, j++) {
124 disk[j] = raid6_stats_blk(&results[i], raid_disks);
125 }
126 }
127
128 int lock_stripe(struct mdinfo *info, unsigned long long start,
129 int chunk_size, int data_disks, sighandler_t *sig) {
130 int rv;
131 if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
132 return 2;
133 }
134
135 sig[0] = signal(SIGTERM, SIG_IGN);
136 sig[1] = signal(SIGINT, SIG_IGN);
137 sig[2] = signal(SIGQUIT, SIG_IGN);
138
139 rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
140 rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
141 return rv * 256;
142 }
143
144 int unlock_all_stripes(struct mdinfo *info, sighandler_t *sig) {
145 int rv;
146 rv = sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
147 rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
148 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
149
150 signal(SIGQUIT, sig[2]);
151 signal(SIGINT, sig[1]);
152 signal(SIGTERM, sig[0]);
153
154 if(munlockall() != 0)
155 return 3;
156 return rv * 256;
157 }
158
159 int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets,
160 int raid_disks, int chunk_size, int level, int layout,
161 unsigned long long start, unsigned long long length, char *name[],
162 enum repair repair, int failed_disk1, int failed_disk2)
163 {
164 /* read the data and p and q blocks, and check we got them right */
165 char *stripe_buf = xmalloc(raid_disks * chunk_size);
166 char **stripes = xmalloc(raid_disks * sizeof(char*));
167 char **blocks = xmalloc(raid_disks * sizeof(char*));
168 char **blocks_page = xmalloc(raid_disks * sizeof(char*));
169 int *block_index_for_slot = xmalloc(raid_disks * sizeof(int));
170 uint8_t *p = xmalloc(chunk_size);
171 uint8_t *q = xmalloc(chunk_size);
172 int *results = xmalloc(chunk_size * sizeof(int));
173 sighandler_t *sig = xmalloc(3 * sizeof(sighandler_t));
174
175 int i, j;
176 int diskP, diskQ;
177 int data_disks = raid_disks - 2;
178 int err = 0;
179
180 extern int tables_ready;
181
182 if (!tables_ready)
183 make_tables();
184
185 for ( i = 0 ; i < raid_disks ; i++)
186 stripes[i] = stripe_buf + i * chunk_size;
187
188 while (length > 0) {
189 int disk[chunk_size >> CHECK_PAGE_BITS];
190
191 err = lock_stripe(info, start, chunk_size, data_disks, sig);
192 if(err != 0) {
193 if (err != 2)
194 unlock_all_stripes(info, sig);
195 goto exitCheck;
196 }
197 for (i = 0 ; i < raid_disks ; i++) {
198 off64_t seek_res = lseek64(source[i], offsets[i] + start * chunk_size,
199 SEEK_SET);
200 if (seek_res < 0) {
201 fprintf(stderr, "lseek to source %d failed\n", i);
202 unlock_all_stripes(info, sig);
203 err = -1;
204 goto exitCheck;
205 }
206 int read_res = read(source[i], stripes[i], chunk_size);
207 if (read_res < chunk_size) {
208 fprintf(stderr, "Failed to read complete chunk disk %d, aborting\n", i);
209 unlock_all_stripes(info, sig);
210 err = -1;
211 goto exitCheck;
212 }
213 }
214 err = unlock_all_stripes(info, sig);
215 if(err != 0)
216 goto exitCheck;
217
218 for (i = 0 ; i < data_disks ; i++) {
219 int disk = geo_map(i, start, raid_disks, level, layout);
220 blocks[i] = stripes[disk];
221 block_index_for_slot[disk] = i;
222 }
223
224 qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
225 diskP = geo_map(-1, start, raid_disks, level, layout);
226 diskQ = geo_map(-2, start, raid_disks, level, layout);
227 blocks[data_disks] = stripes[diskP];
228 block_index_for_slot[diskP] = data_disks;
229 blocks[data_disks+1] = stripes[diskQ];
230 block_index_for_slot[diskQ] = data_disks+1;
231
232 raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results);
233 raid6_stats(disk, results, raid_disks, chunk_size);
234
235 for(j = 0; j < (chunk_size >> CHECK_PAGE_BITS); j++) {
236 if(disk[j] >= -2) {
237 disk[j] = geo_map(disk[j], start, raid_disks, level, layout);
238 }
239 if(disk[j] >= 0) {
240 printf("Error detected at stripe %llu, page %d: possible failed disk slot: %d --> %s\n",
241 start, j, disk[j], name[disk[j]]);
242 }
243 if(disk[j] == -65535) {
244 printf("Error detected at stripe %llu, page %d: disk slot unknown\n", start, j);
245 }
246 }
247
248 if(repair == MANUAL_REPAIR) {
249 printf("Repairing stripe %llu\n", start);
250 printf("Assuming slots %d (%s) and %d (%s) are incorrect\n",
251 failed_disk1, name[failed_disk1],
252 failed_disk2, name[failed_disk2]);
253
254 if (failed_disk1 == diskQ || failed_disk2 == diskQ) {
255 char *all_but_failed_blocks[data_disks];
256 int failed_data_or_p;
257 int failed_block_index;
258
259 if (failed_disk1 == diskQ)
260 failed_data_or_p = failed_disk2;
261 else
262 failed_data_or_p = failed_disk1;
263 printf("Repairing D/P(%d) and Q\n", failed_data_or_p);
264 failed_block_index = block_index_for_slot[failed_data_or_p];
265 for (i=0; i < data_disks; i++)
266 if (failed_block_index == i)
267 all_but_failed_blocks[i] = stripes[diskP];
268 else
269 all_but_failed_blocks[i] = blocks[i];
270 xor_blocks(stripes[failed_data_or_p],
271 all_but_failed_blocks, data_disks, chunk_size);
272 qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
273 } else {
274 ensure_zero_has_size(chunk_size);
275 if (failed_disk1 == diskP || failed_disk2 == diskP) {
276 int failed_data, failed_block_index;
277 if (failed_disk1 == diskP)
278 failed_data = failed_disk2;
279 else
280 failed_data = failed_disk1;
281 failed_block_index = block_index_for_slot[failed_data];
282 printf("Repairing D(%d) and P\n", failed_data);
283 raid6_datap_recov(raid_disks, chunk_size, failed_block_index, (uint8_t**)blocks);
284 } else {
285 printf("Repairing D and D\n");
286 int failed_block_index1 = block_index_for_slot[failed_disk1];
287 int failed_block_index2 = block_index_for_slot[failed_disk2];
288 if (failed_block_index1 > failed_block_index2) {
289 int t = failed_block_index1;
290 failed_block_index1 = failed_block_index2;
291 failed_block_index2 = t;
292 }
293 raid6_2data_recov(raid_disks, chunk_size, failed_block_index1, failed_block_index2, (uint8_t**)blocks);
294 }
295 }
296
297 err = lock_stripe(info, start, chunk_size, data_disks, sig);
298 if(err != 0) {
299 if (err != 2)
300 unlock_all_stripes(info, sig);
301 goto exitCheck;
302 }
303
304 int write_res1, write_res2;
305 off64_t seek_res;
306
307 seek_res = lseek64(source[failed_disk1],
308 offsets[failed_disk1] + start * chunk_size, SEEK_SET);
309 if (seek_res < 0) {
310 fprintf(stderr, "lseek failed for failed_disk1\n");
311 unlock_all_stripes(info, sig);
312 err = -1;
313 goto exitCheck;
314 }
315 write_res1 = write(source[failed_disk1], stripes[failed_disk1], chunk_size);
316
317 seek_res = lseek64(source[failed_disk2],
318 offsets[failed_disk2] + start * chunk_size, SEEK_SET);
319 if (seek_res < 0) {
320 fprintf(stderr, "lseek failed for failed_disk1\n");
321 unlock_all_stripes(info, sig);
322 err = -1;
323 goto exitCheck;
324 }
325 write_res2 = write(source[failed_disk2], stripes[failed_disk2], chunk_size);
326
327 err = unlock_all_stripes(info, sig);
328 if(err != 0)
329 goto exitCheck;
330
331 if (write_res1 != chunk_size || write_res2 != chunk_size) {
332 fprintf(stderr, "Failed to write a complete chunk.\n");
333 goto exitCheck;
334 }
335
336 }
337
338 int pages_to_write_count = 0;
339 int page_to_write[chunk_size >> CHECK_PAGE_BITS];
340 for(j = 0; j < (chunk_size >> CHECK_PAGE_BITS); j++) {
341 if (disk[j] >= 0 && repair == AUTO_REPAIR) {
342 printf("Auto-repairing slot %d (%s)\n", disk[j], name[disk[j]]);
343 pages_to_write_count++;
344 page_to_write[j] = 1;
345 for(i = 0; i < raid_disks; i++) {
346 blocks_page[i] = blocks[i] + j * CHECK_PAGE_SIZE;
347 }
348 if (disk[j] == diskQ) {
349 qsyndrome(p, (uint8_t*)stripes[diskQ] + j * CHECK_PAGE_SIZE, (uint8_t**)blocks_page, data_disks, CHECK_PAGE_SIZE);
350 } else {
351 char *all_but_failed_blocks[data_disks];
352 int failed_block_index = block_index_for_slot[disk[j]];
353 for (i=0; i < data_disks; i++)
354 if (failed_block_index == i)
355 all_but_failed_blocks[i] = stripes[diskP] + j * CHECK_PAGE_SIZE;
356 else
357 all_but_failed_blocks[i] = blocks_page[i];
358 xor_blocks(stripes[disk[j]] + j * CHECK_PAGE_SIZE,
359 all_but_failed_blocks, data_disks, CHECK_PAGE_SIZE);
360 }
361 } else {
362 page_to_write[j] = 0;
363 }
364 }
365
366 if(pages_to_write_count > 0) {
367
368 err = lock_stripe(info, start, chunk_size, data_disks, sig);
369 if(err != 0) {
370 if (err != 2)
371 unlock_all_stripes(info, sig);
372 goto exitCheck;
373 }
374
375 int write_res = 0;
376 for(j = 0; j < (chunk_size >> CHECK_PAGE_BITS); j++) {
377 if(page_to_write[j] == 1) {
378 lseek64(source[disk[j]], offsets[disk[j]] + start * chunk_size + j * CHECK_PAGE_SIZE, 0);
379 write_res += write(source[disk[j]], stripes[disk[j]] + j * CHECK_PAGE_SIZE, CHECK_PAGE_SIZE);
380 }
381 }
382
383 err = unlock_all_stripes(info, sig);
384 if (err != 0 || write_res != (CHECK_PAGE_SIZE * pages_to_write_count))
385 goto exitCheck;
386
387 if (write_res != (CHECK_PAGE_SIZE * pages_to_write_count)) {
388 fprintf(stderr, "Failed to write a full chunk.\n");
389 goto exitCheck;
390 }
391 }
392
393 length--;
394 start++;
395 }
396
397 exitCheck:
398
399 free(stripe_buf);
400 free(stripes);
401 free(blocks);
402 free(blocks_page);
403 free(block_index_for_slot);
404 free(p);
405 free(q);
406 free(results);
407 free(sig);
408
409 return err;
410 }
411
412 unsigned long long getnum(char *str, char **err)
413 {
414 char *e;
415 unsigned long long rv = strtoull(str, &e, 10);
416 if (e==str || *e) {
417 *err = str;
418 return 0;
419 }
420 return rv;
421 }
422
423 int main(int argc, char *argv[])
424 {
425 /* md_device start length */
426 int *fds = NULL;
427 char *buf = NULL;
428 char **disk_name = NULL;
429 unsigned long long *offsets = NULL;
430 int raid_disks = 0;
431 int active_disks;
432 int chunk_size = 0;
433 int layout = -1;
434 int level = 6;
435 enum repair repair = NO_REPAIR;
436 int failed_disk1 = -1;
437 int failed_disk2 = -1;
438 unsigned long long start, length;
439 int i;
440 int mdfd;
441 struct mdinfo *info = NULL, *comp = NULL;
442 char *err = NULL;
443 int exit_err = 0;
444 int close_flag = 0;
445 char *prg = strrchr(argv[0], '/');
446
447 if (prg == NULL)
448 prg = argv[0];
449 else
450 prg++;
451
452 if (argc < 4) {
453 fprintf(stderr, "Usage: %s md_device start_stripe length_stripes [autorepair]\n", prg);
454 fprintf(stderr, " or: %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
455 exit_err = 1;
456 goto exitHere;
457 }
458
459 mdfd = open(argv[1], O_RDONLY);
460 if(mdfd < 0) {
461 perror(argv[1]);
462 fprintf(stderr, "%s: cannot open %s\n", prg, argv[1]);
463 exit_err = 2;
464 goto exitHere;
465 }
466
467 info = sysfs_read(mdfd, NULL,
468 GET_LEVEL|
469 GET_LAYOUT|
470 GET_DISKS|
471 GET_DEGRADED |
472 GET_COMPONENT|
473 GET_CHUNK|
474 GET_DEVS|
475 GET_OFFSET|
476 GET_SIZE);
477
478 if(info == NULL) {
479 fprintf(stderr, "%s: Error reading sysfs information of %s\n", prg, argv[1]);
480 exit_err = 9;
481 goto exitHere;
482 }
483
484 if(info->array.level != level) {
485 fprintf(stderr, "%s: %s not a RAID-6\n", prg, argv[1]);
486 exit_err = 3;
487 goto exitHere;
488 }
489
490 if(info->array.failed_disks > 0) {
491 fprintf(stderr, "%s: %s degraded array\n", prg, argv[1]);
492 exit_err = 8;
493 goto exitHere;
494 }
495
496 printf("layout: %d\n", info->array.layout);
497 printf("disks: %d\n", info->array.raid_disks);
498 printf("component size: %llu\n", info->component_size * 512);
499 printf("total stripes: %llu\n", (info->component_size * 512) / info->array.chunk_size);
500 printf("chunk size: %d\n", info->array.chunk_size);
501 printf("\n");
502
503 comp = info->devs;
504 for(i = 0, active_disks = 0; active_disks < info->array.raid_disks; i++) {
505 printf("disk: %d - offset: %llu - size: %llu - name: %s - slot: %d\n",
506 i, comp->data_offset * 512, comp->component_size * 512,
507 map_dev(comp->disk.major, comp->disk.minor, 0),
508 comp->disk.raid_disk);
509 if(comp->disk.raid_disk >= 0)
510 active_disks++;
511 comp = comp->next;
512 }
513 printf("\n");
514
515 close(mdfd);
516
517 raid_disks = info->array.raid_disks;
518 chunk_size = info->array.chunk_size;
519 layout = info->array.layout;
520 if (strcmp(argv[2], "repair")==0) {
521 if (argc < 6) {
522 fprintf(stderr, "For repair mode, call %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
523 exit_err = 1;
524 goto exitHere;
525 }
526 repair = MANUAL_REPAIR;
527 start = getnum(argv[3], &err);
528 length = 1;
529 failed_disk1 = getnum(argv[4], &err);
530 failed_disk2 = getnum(argv[5], &err);
531
532 if(failed_disk1 >= info->array.raid_disks) {
533 fprintf(stderr, "%s: failed_slot_1 index is higher than number of devices in raid\n", prg);
534 exit_err = 4;
535 goto exitHere;
536 }
537 if(failed_disk2 >= info->array.raid_disks) {
538 fprintf(stderr, "%s: failed_slot_2 index is higher than number of devices in raid\n", prg);
539 exit_err = 4;
540 goto exitHere;
541 }
542 if(failed_disk1 == failed_disk2) {
543 fprintf(stderr, "%s: failed_slot_1 and failed_slot_2 are the same\n", prg);
544 exit_err = 4;
545 goto exitHere;
546 }
547 }
548 else {
549 start = getnum(argv[2], &err);
550 length = getnum(argv[3], &err);
551 if (argc >= 5 && strcmp(argv[4], "autorepair")==0)
552 repair = AUTO_REPAIR;
553 }
554
555 if (err) {
556 fprintf(stderr, "%s: Bad number: %s\n", prg, err);
557 exit_err = 4;
558 goto exitHere;
559 }
560
561 if(start > ((info->component_size * 512) / chunk_size)) {
562 start = (info->component_size * 512) / chunk_size;
563 fprintf(stderr, "%s: start beyond disks size\n", prg);
564 }
565
566 if((length == 0) ||
567 ((length + start) > ((info->component_size * 512) / chunk_size))) {
568 length = (info->component_size * 512) / chunk_size - start;
569 }
570
571 disk_name = xmalloc(raid_disks * sizeof(*disk_name));
572 fds = xmalloc(raid_disks * sizeof(*fds));
573 offsets = xcalloc(raid_disks, sizeof(*offsets));
574 buf = xmalloc(raid_disks * chunk_size);
575
576 for(i=0; i<raid_disks; i++) {
577 fds[i] = -1;
578 }
579 close_flag = 1;
580
581 comp = info->devs;
582 for (i=0, active_disks=0; active_disks<raid_disks; i++) {
583 int disk_slot = comp->disk.raid_disk;
584 if(disk_slot >= 0) {
585 disk_name[disk_slot] = map_dev(comp->disk.major, comp->disk.minor, 0);
586 offsets[disk_slot] = comp->data_offset * 512;
587 fds[disk_slot] = open(disk_name[disk_slot], O_RDWR | O_SYNC);
588 if (fds[disk_slot] < 0) {
589 perror(disk_name[disk_slot]);
590 fprintf(stderr,"%s: cannot open %s\n", prg, disk_name[disk_slot]);
591 exit_err = 6;
592 goto exitHere;
593 }
594 active_disks++;
595 }
596 comp = comp->next;
597 }
598
599 int rv = check_stripes(info, fds, offsets,
600 raid_disks, chunk_size, level, layout,
601 start, length, disk_name, repair, failed_disk1, failed_disk2);
602 if (rv != 0) {
603 fprintf(stderr,
604 "%s: check_stripes returned %d\n", prg, rv);
605 exit_err = 7;
606 goto exitHere;
607 }
608
609 exitHere:
610
611 if (close_flag)
612 for(i = 0; i < raid_disks; i++)
613 close(fds[i]);
614
615 free(disk_name);
616 free(fds);
617 free(offsets);
618 free(buf);
619
620 exit(exit_err);
621 }