]> git.ipfire.org Git - thirdparty/mdadm.git/blob - raid6check.c
Don't break long strings onto multiple lines.
[thirdparty/mdadm.git] / raid6check.c
1 /*
2 * raid6check - extended consistency check for RAID-6
3 *
4 * Copyright (C) 2011 Piergiorgio Sartor
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Piergiorgio Sartor
22 * Based on "restripe.c" from "mdadm" codebase
23 */
24
25 #include "mdadm.h"
26 #include <stdint.h>
27 #include <signal.h>
28 #include <sys/mman.h>
29
30 #define CHECK_PAGE_BITS (12)
31 #define CHECK_PAGE_SIZE (1 << CHECK_PAGE_BITS)
32
33 enum repair {
34 NO_REPAIR = 0,
35 MANUAL_REPAIR,
36 AUTO_REPAIR
37 };
38
39 int geo_map(int block, unsigned long long stripe, int raid_disks,
40 int level, int layout);
41 void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size);
42 void make_tables(void);
43 void ensure_zero_has_size(int chunk_size);
44 void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs);
45 void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
46 uint8_t **ptrs);
47 void xor_blocks(char *target, char **sources, int disks, int size);
48
49 /* Collect per stripe consistency information */
50 void raid6_collect(int chunk_size, uint8_t *p, uint8_t *q,
51 char *chunkP, char *chunkQ, int *results)
52 {
53 int i;
54 int data_id;
55 uint8_t Px, Qx;
56 extern uint8_t raid6_gflog[];
57
58 for(i = 0; i < chunk_size; i++) {
59 Px = (uint8_t)chunkP[i] ^ (uint8_t)p[i];
60 Qx = (uint8_t)chunkQ[i] ^ (uint8_t)q[i];
61
62 if((Px != 0) && (Qx == 0))
63 results[i] = -1;
64
65 if((Px == 0) && (Qx != 0))
66 results[i] = -2;
67
68 if((Px != 0) && (Qx != 0)) {
69 data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
70 if(data_id < 0) data_id += 255;
71 results[i] = data_id;
72 }
73
74 if((Px == 0) && (Qx == 0))
75 results[i] = -255;
76 }
77 }
78
79 /* Try to find out if a specific disk has problems in a CHECK_PAGE_SIZE page size */
80 int raid6_stats_blk(int *results, int raid_disks)
81 {
82 int i;
83 int curr_broken_disk = -255;
84 int prev_broken_disk = -255;
85 int broken_status = 0;
86
87 for(i = 0; i < CHECK_PAGE_SIZE; i++) {
88
89 if(results[i] != -255)
90 curr_broken_disk = results[i];
91
92 if(curr_broken_disk >= raid_disks)
93 broken_status = 2;
94
95 switch(broken_status) {
96 case 0:
97 if(curr_broken_disk != -255) {
98 prev_broken_disk = curr_broken_disk;
99 broken_status = 1;
100 }
101 break;
102
103 case 1:
104 if(curr_broken_disk != prev_broken_disk)
105 broken_status = 2;
106 break;
107
108 case 2:
109 default:
110 curr_broken_disk = prev_broken_disk = -65535;
111 break;
112 }
113 }
114
115 return curr_broken_disk;
116 }
117
118 /* Collect disks status for a strip in CHECK_PAGE_SIZE page size blocks */
119 void raid6_stats(int *disk, int *results, int raid_disks, int chunk_size)
120 {
121 int i, j;
122
123 for(i = 0, j = 0; i < chunk_size; i += CHECK_PAGE_SIZE, j++) {
124 disk[j] = raid6_stats_blk(&results[i], raid_disks);
125 }
126 }
127
128 int lock_stripe(struct mdinfo *info, unsigned long long start,
129 int chunk_size, int data_disks, sighandler_t *sig) {
130 int rv;
131 if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
132 return 2;
133 }
134
135 sig[0] = signal(SIGTERM, SIG_IGN);
136 sig[1] = signal(SIGINT, SIG_IGN);
137 sig[2] = signal(SIGQUIT, SIG_IGN);
138
139 rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
140 rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
141 return rv * 256;
142 }
143
144 int unlock_all_stripes(struct mdinfo *info, sighandler_t *sig) {
145 int rv;
146 rv = sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
147 rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
148 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
149
150 signal(SIGQUIT, sig[2]);
151 signal(SIGINT, sig[1]);
152 signal(SIGTERM, sig[0]);
153
154 if(munlockall() != 0)
155 return 3;
156 return rv * 256;
157 }
158
159 /* Autorepair */
160 int autorepair(int *disk, int diskP, int diskQ, unsigned long long start, int chunk_size,
161 char *name[], int raid_disks, int data_disks, char **blocks_page,
162 char **blocks, uint8_t *p, char **stripes, int *block_index_for_slot,
163 int *source, unsigned long long *offsets)
164 {
165 int i, j;
166 int pages_to_write_count = 0;
167 int page_to_write[chunk_size >> CHECK_PAGE_BITS];
168 for(j = 0; j < (chunk_size >> CHECK_PAGE_BITS); j++) {
169 if (disk[j] >= 0) {
170 printf("Auto-repairing slot %d (%s)\n", disk[j], name[disk[j]]);
171 pages_to_write_count++;
172 page_to_write[j] = 1;
173 for(i = 0; i < raid_disks; i++) {
174 blocks_page[i] = blocks[i] + j * CHECK_PAGE_SIZE;
175 }
176 if (disk[j] == diskQ) {
177 qsyndrome(p, (uint8_t*)stripes[diskQ] + j * CHECK_PAGE_SIZE, (uint8_t**)blocks_page, data_disks, CHECK_PAGE_SIZE);
178 }
179 else {
180 char *all_but_failed_blocks[data_disks];
181 int failed_block_index = block_index_for_slot[disk[j]];
182 for(i = 0; i < data_disks; i++) {
183 if (failed_block_index == i) {
184 all_but_failed_blocks[i] = stripes[diskP] + j * CHECK_PAGE_SIZE;
185 }
186 else {
187 all_but_failed_blocks[i] = blocks_page[i];
188 }
189 }
190 xor_blocks(stripes[disk[j]] + j * CHECK_PAGE_SIZE,
191 all_but_failed_blocks, data_disks, CHECK_PAGE_SIZE);
192 }
193 }
194 else {
195 page_to_write[j] = 0;
196 }
197 }
198
199 if(pages_to_write_count > 0) {
200 int write_res = 0;
201 for(j = 0; j < (chunk_size >> CHECK_PAGE_BITS); j++) {
202 if(page_to_write[j] == 1) {
203 lseek64(source[disk[j]], offsets[disk[j]] + start * chunk_size + j * CHECK_PAGE_SIZE, SEEK_SET);
204 write_res += write(source[disk[j]], stripes[disk[j]] + j * CHECK_PAGE_SIZE, CHECK_PAGE_SIZE);
205 }
206 }
207
208 if (write_res != (CHECK_PAGE_SIZE * pages_to_write_count)) {
209 fprintf(stderr, "Failed to write a full chunk.\n");
210 return -1;
211 }
212 }
213
214 return 0;
215 }
216
217 /* Manual repair */
218 int manual_repair(int diskP, int diskQ, int chunk_size, int raid_disks, int data_disks,
219 int failed_disk1, int failed_disk2, unsigned long long start, int *block_index_for_slot,
220 char *name[], char **stripes, char **blocks, uint8_t *p, struct mdinfo *info, sighandler_t *sig,
221 int *source, unsigned long long *offsets)
222 {
223 int err = 0;
224 int i;
225 printf("Repairing stripe %llu\n", start);
226 printf("Assuming slots %d (%s) and %d (%s) are incorrect\n",
227 failed_disk1, name[failed_disk1],
228 failed_disk2, name[failed_disk2]);
229
230 if (failed_disk1 == diskQ || failed_disk2 == diskQ) {
231 char *all_but_failed_blocks[data_disks];
232 int failed_data_or_p;
233 int failed_block_index;
234
235 if (failed_disk1 == diskQ) {
236 failed_data_or_p = failed_disk2;
237 }
238 else {
239 failed_data_or_p = failed_disk1;
240 }
241 printf("Repairing D/P(%d) and Q\n", failed_data_or_p);
242 failed_block_index = block_index_for_slot[failed_data_or_p];
243 for (i = 0; i < data_disks; i++) {
244 if (failed_block_index == i) {
245 all_but_failed_blocks[i] = stripes[diskP];
246 }
247 else {
248 all_but_failed_blocks[i] = blocks[i];
249 }
250 }
251 xor_blocks(stripes[failed_data_or_p],
252 all_but_failed_blocks, data_disks, chunk_size);
253 qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
254 }
255 else {
256 ensure_zero_has_size(chunk_size);
257 if (failed_disk1 == diskP || failed_disk2 == diskP) {
258 int failed_data, failed_block_index;
259 if (failed_disk1 == diskP) {
260 failed_data = failed_disk2;
261 }
262 else {
263 failed_data = failed_disk1;
264 }
265 failed_block_index = block_index_for_slot[failed_data];
266 printf("Repairing D(%d) and P\n", failed_data);
267 raid6_datap_recov(raid_disks, chunk_size, failed_block_index, (uint8_t**)blocks);
268 }
269 else {
270 printf("Repairing D and D\n");
271 int failed_block_index1 = block_index_for_slot[failed_disk1];
272 int failed_block_index2 = block_index_for_slot[failed_disk2];
273 if (failed_block_index1 > failed_block_index2) {
274 int t = failed_block_index1;
275 failed_block_index1 = failed_block_index2;
276 failed_block_index2 = t;
277 }
278 raid6_2data_recov(raid_disks, chunk_size, failed_block_index1, failed_block_index2, (uint8_t**)blocks);
279 }
280 }
281
282 err = lock_stripe(info, start, chunk_size, data_disks, sig);
283 if(err != 0) {
284 if (err != 2) {
285 return -1;
286 }
287 return -2;;
288 }
289
290 int write_res1, write_res2;
291 off64_t seek_res;
292
293 seek_res = lseek64(source[failed_disk1],
294 offsets[failed_disk1] + start * chunk_size, SEEK_SET);
295 if (seek_res < 0) {
296 fprintf(stderr, "lseek failed for failed_disk1\n");
297 return -1;
298 }
299 write_res1 = write(source[failed_disk1], stripes[failed_disk1], chunk_size);
300
301 seek_res = lseek64(source[failed_disk2],
302 offsets[failed_disk2] + start * chunk_size, SEEK_SET);
303 if (seek_res < 0) {
304 fprintf(stderr, "lseek failed for failed_disk1\n");
305 return -1;
306 }
307 write_res2 = write(source[failed_disk2], stripes[failed_disk2], chunk_size);
308
309 err = unlock_all_stripes(info, sig);
310 if(err != 0) {
311 return -2;
312 }
313
314 if (write_res1 != chunk_size || write_res2 != chunk_size) {
315 fprintf(stderr, "Failed to write a complete chunk.\n");
316 return -2;
317 }
318
319 return 0;
320 }
321
322 int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets,
323 int raid_disks, int chunk_size, int level, int layout,
324 unsigned long long start, unsigned long long length, char *name[],
325 enum repair repair, int failed_disk1, int failed_disk2)
326 {
327 /* read the data and p and q blocks, and check we got them right */
328 char *stripe_buf = xmalloc(raid_disks * chunk_size);
329 char **stripes = xmalloc(raid_disks * sizeof(char*));
330 char **blocks = xmalloc(raid_disks * sizeof(char*));
331 char **blocks_page = xmalloc(raid_disks * sizeof(char*));
332 int *block_index_for_slot = xmalloc(raid_disks * sizeof(int));
333 uint8_t *p = xmalloc(chunk_size);
334 uint8_t *q = xmalloc(chunk_size);
335 int *results = xmalloc(chunk_size * sizeof(int));
336 sighandler_t *sig = xmalloc(3 * sizeof(sighandler_t));
337
338 int i, j;
339 int diskP, diskQ;
340 int data_disks = raid_disks - 2;
341 int err = 0;
342
343 extern int tables_ready;
344
345 if (!tables_ready)
346 make_tables();
347
348 for ( i = 0 ; i < raid_disks ; i++)
349 stripes[i] = stripe_buf + i * chunk_size;
350
351 while (length > 0) {
352 int disk[chunk_size >> CHECK_PAGE_BITS];
353
354 err = lock_stripe(info, start, chunk_size, data_disks, sig);
355 if(err != 0) {
356 if (err != 2)
357 unlock_all_stripes(info, sig);
358 goto exitCheck;
359 }
360 for (i = 0 ; i < raid_disks ; i++) {
361 off64_t seek_res = lseek64(source[i], offsets[i] + start * chunk_size,
362 SEEK_SET);
363 if (seek_res < 0) {
364 fprintf(stderr, "lseek to source %d failed\n", i);
365 unlock_all_stripes(info, sig);
366 err = -1;
367 goto exitCheck;
368 }
369 int read_res = read(source[i], stripes[i], chunk_size);
370 if (read_res < chunk_size) {
371 fprintf(stderr, "Failed to read complete chunk disk %d, aborting\n", i);
372 unlock_all_stripes(info, sig);
373 err = -1;
374 goto exitCheck;
375 }
376 }
377
378 for (i = 0 ; i < data_disks ; i++) {
379 int disk = geo_map(i, start, raid_disks, level, layout);
380 blocks[i] = stripes[disk];
381 block_index_for_slot[disk] = i;
382 }
383
384 qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
385 diskP = geo_map(-1, start, raid_disks, level, layout);
386 diskQ = geo_map(-2, start, raid_disks, level, layout);
387 blocks[data_disks] = stripes[diskP];
388 block_index_for_slot[diskP] = data_disks;
389 blocks[data_disks+1] = stripes[diskQ];
390 block_index_for_slot[diskQ] = data_disks+1;
391
392 raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results);
393 raid6_stats(disk, results, raid_disks, chunk_size);
394
395 for(j = 0; j < (chunk_size >> CHECK_PAGE_BITS); j++) {
396 if(disk[j] >= -2) {
397 disk[j] = geo_map(disk[j], start, raid_disks, level, layout);
398 }
399 if(disk[j] >= 0) {
400 printf("Error detected at stripe %llu, page %d: possible failed disk slot: %d --> %s\n",
401 start, j, disk[j], name[disk[j]]);
402 }
403 if(disk[j] == -65535) {
404 printf("Error detected at stripe %llu, page %d: disk slot unknown\n", start, j);
405 }
406 }
407
408 if(repair == AUTO_REPAIR) {
409 err = autorepair(disk, diskP, diskQ, start, chunk_size,
410 name, raid_disks, data_disks, blocks_page,
411 blocks, p, stripes, block_index_for_slot,
412 source, offsets);
413 if(err != 0) {
414 unlock_all_stripes(info, sig);
415 goto exitCheck;
416 }
417 }
418
419 err = unlock_all_stripes(info, sig);
420 if(err != 0) {
421 goto exitCheck;
422 }
423
424 if(repair == MANUAL_REPAIR) {
425 err = manual_repair(diskP, diskQ, chunk_size, raid_disks, data_disks,
426 failed_disk1, failed_disk2, start, block_index_for_slot,
427 name, stripes, blocks, p, info, sig,
428 source, offsets);
429 if(err == -1) {
430 unlock_all_stripes(info, sig);
431 goto exitCheck;
432 }
433 }
434
435 length--;
436 start++;
437 }
438
439 exitCheck:
440
441 free(stripe_buf);
442 free(stripes);
443 free(blocks);
444 free(blocks_page);
445 free(block_index_for_slot);
446 free(p);
447 free(q);
448 free(results);
449 free(sig);
450
451 return err;
452 }
453
454 unsigned long long getnum(char *str, char **err)
455 {
456 char *e;
457 unsigned long long rv = strtoull(str, &e, 10);
458 if (e==str || *e) {
459 *err = str;
460 return 0;
461 }
462 return rv;
463 }
464
465 int main(int argc, char *argv[])
466 {
467 /* md_device start length */
468 int *fds = NULL;
469 char *buf = NULL;
470 char **disk_name = NULL;
471 unsigned long long *offsets = NULL;
472 int raid_disks = 0;
473 int active_disks;
474 int chunk_size = 0;
475 int layout = -1;
476 int level = 6;
477 enum repair repair = NO_REPAIR;
478 int failed_disk1 = -1;
479 int failed_disk2 = -1;
480 unsigned long long start, length;
481 int i;
482 int mdfd;
483 struct mdinfo *info = NULL, *comp = NULL;
484 char *err = NULL;
485 int exit_err = 0;
486 int close_flag = 0;
487 char *prg = strrchr(argv[0], '/');
488
489 if (prg == NULL)
490 prg = argv[0];
491 else
492 prg++;
493
494 if (argc < 4) {
495 fprintf(stderr, "Usage: %s md_device start_stripe length_stripes [autorepair]\n", prg);
496 fprintf(stderr, " or: %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
497 exit_err = 1;
498 goto exitHere;
499 }
500
501 mdfd = open(argv[1], O_RDONLY);
502 if(mdfd < 0) {
503 perror(argv[1]);
504 fprintf(stderr, "%s: cannot open %s\n", prg, argv[1]);
505 exit_err = 2;
506 goto exitHere;
507 }
508
509 info = sysfs_read(mdfd, NULL,
510 GET_LEVEL|
511 GET_LAYOUT|
512 GET_DISKS|
513 GET_DEGRADED |
514 GET_COMPONENT|
515 GET_CHUNK|
516 GET_DEVS|
517 GET_OFFSET|
518 GET_SIZE);
519
520 if(info == NULL) {
521 fprintf(stderr, "%s: Error reading sysfs information of %s\n", prg, argv[1]);
522 exit_err = 9;
523 goto exitHere;
524 }
525
526 if(info->array.level != level) {
527 fprintf(stderr, "%s: %s not a RAID-6\n", prg, argv[1]);
528 exit_err = 3;
529 goto exitHere;
530 }
531
532 if(info->array.failed_disks > 0) {
533 fprintf(stderr, "%s: %s degraded array\n", prg, argv[1]);
534 exit_err = 8;
535 goto exitHere;
536 }
537
538 printf("layout: %d\n", info->array.layout);
539 printf("disks: %d\n", info->array.raid_disks);
540 printf("component size: %llu\n", info->component_size * 512);
541 printf("total stripes: %llu\n", (info->component_size * 512) / info->array.chunk_size);
542 printf("chunk size: %d\n", info->array.chunk_size);
543 printf("\n");
544
545 comp = info->devs;
546 for(i = 0, active_disks = 0; active_disks < info->array.raid_disks; i++) {
547 printf("disk: %d - offset: %llu - size: %llu - name: %s - slot: %d\n",
548 i, comp->data_offset * 512, comp->component_size * 512,
549 map_dev(comp->disk.major, comp->disk.minor, 0),
550 comp->disk.raid_disk);
551 if(comp->disk.raid_disk >= 0)
552 active_disks++;
553 comp = comp->next;
554 }
555 printf("\n");
556
557 close(mdfd);
558
559 raid_disks = info->array.raid_disks;
560 chunk_size = info->array.chunk_size;
561 layout = info->array.layout;
562 if (strcmp(argv[2], "repair")==0) {
563 if (argc < 6) {
564 fprintf(stderr, "For repair mode, call %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
565 exit_err = 1;
566 goto exitHere;
567 }
568 repair = MANUAL_REPAIR;
569 start = getnum(argv[3], &err);
570 length = 1;
571 failed_disk1 = getnum(argv[4], &err);
572 failed_disk2 = getnum(argv[5], &err);
573
574 if(failed_disk1 >= info->array.raid_disks) {
575 fprintf(stderr, "%s: failed_slot_1 index is higher than number of devices in raid\n", prg);
576 exit_err = 4;
577 goto exitHere;
578 }
579 if(failed_disk2 >= info->array.raid_disks) {
580 fprintf(stderr, "%s: failed_slot_2 index is higher than number of devices in raid\n", prg);
581 exit_err = 4;
582 goto exitHere;
583 }
584 if(failed_disk1 == failed_disk2) {
585 fprintf(stderr, "%s: failed_slot_1 and failed_slot_2 are the same\n", prg);
586 exit_err = 4;
587 goto exitHere;
588 }
589 }
590 else {
591 start = getnum(argv[2], &err);
592 length = getnum(argv[3], &err);
593 if (argc >= 5 && strcmp(argv[4], "autorepair")==0)
594 repair = AUTO_REPAIR;
595 }
596
597 if (err) {
598 fprintf(stderr, "%s: Bad number: %s\n", prg, err);
599 exit_err = 4;
600 goto exitHere;
601 }
602
603 if(start > ((info->component_size * 512) / chunk_size)) {
604 start = (info->component_size * 512) / chunk_size;
605 fprintf(stderr, "%s: start beyond disks size\n", prg);
606 }
607
608 if((length == 0) ||
609 ((length + start) > ((info->component_size * 512) / chunk_size))) {
610 length = (info->component_size * 512) / chunk_size - start;
611 }
612
613 disk_name = xmalloc(raid_disks * sizeof(*disk_name));
614 fds = xmalloc(raid_disks * sizeof(*fds));
615 offsets = xcalloc(raid_disks, sizeof(*offsets));
616 buf = xmalloc(raid_disks * chunk_size);
617
618 for(i=0; i<raid_disks; i++) {
619 fds[i] = -1;
620 }
621 close_flag = 1;
622
623 comp = info->devs;
624 for (i=0, active_disks=0; active_disks<raid_disks; i++) {
625 int disk_slot = comp->disk.raid_disk;
626 if(disk_slot >= 0) {
627 disk_name[disk_slot] = map_dev(comp->disk.major, comp->disk.minor, 0);
628 offsets[disk_slot] = comp->data_offset * 512;
629 fds[disk_slot] = open(disk_name[disk_slot], O_RDWR | O_SYNC);
630 if (fds[disk_slot] < 0) {
631 perror(disk_name[disk_slot]);
632 fprintf(stderr,"%s: cannot open %s\n", prg, disk_name[disk_slot]);
633 exit_err = 6;
634 goto exitHere;
635 }
636 active_disks++;
637 }
638 comp = comp->next;
639 }
640
641 int rv = check_stripes(info, fds, offsets,
642 raid_disks, chunk_size, level, layout,
643 start, length, disk_name, repair, failed_disk1, failed_disk2);
644 if (rv != 0) {
645 fprintf(stderr, "%s: check_stripes returned %d\n", prg, rv);
646 exit_err = 7;
647 goto exitHere;
648 }
649
650 exitHere:
651
652 if (close_flag)
653 for(i = 0; i < raid_disks; i++)
654 close(fds[i]);
655
656 free(disk_name);
657 free(fds);
658 free(offsets);
659 free(buf);
660
661 exit(exit_err);
662 }