]> git.ipfire.org Git - thirdparty/mdadm.git/blob - raid6check.c
aa6ce234e37805b7fa34a2174872d23e8f4a296b
[thirdparty/mdadm.git] / raid6check.c
1 /*
2 * raid6check - extended consistency check for RAID-6
3 *
4 * Copyright (C) 2011 Piergiorgio Sartor
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Piergiorgio Sartor
22 * Based on "restripe.c" from "mdadm" codebase
23 */
24
25 #include "mdadm.h"
26 #include <stdint.h>
27 #include <signal.h>
28 #include <sys/mman.h>
29
30 enum repair {
31 NO_REPAIR = 0,
32 MANUAL_REPAIR,
33 AUTO_REPAIR
34 };
35
36 int geo_map(int block, unsigned long long stripe, int raid_disks,
37 int level, int layout);
38 void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size);
39 void make_tables(void);
40 void ensure_zero_has_size(int chunk_size);
41 void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs);
42 void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
43 uint8_t **ptrs);
44 void xor_blocks(char *target, char **sources, int disks, int size);
45
46
47 /* Collect per stripe consistency information */
48 void raid6_collect(int chunk_size, uint8_t *p, uint8_t *q,
49 char *chunkP, char *chunkQ, int *results)
50 {
51 int i;
52 int data_id;
53 uint8_t Px, Qx;
54 extern uint8_t raid6_gflog[];
55
56 for(i = 0; i < chunk_size; i++) {
57 Px = (uint8_t)chunkP[i] ^ (uint8_t)p[i];
58 Qx = (uint8_t)chunkQ[i] ^ (uint8_t)q[i];
59
60 if((Px != 0) && (Qx == 0))
61 results[i] = -1;
62
63 if((Px == 0) && (Qx != 0))
64 results[i] = -2;
65
66 if((Px != 0) && (Qx != 0)) {
67 data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
68 if(data_id < 0) data_id += 255;
69 results[i] = data_id;
70 }
71
72 if((Px == 0) && (Qx == 0))
73 results[i] = -255;
74 }
75 }
76
77 /* Try to find out if a specific disk has problems */
78 int raid6_stats(int *results, int raid_disks, int chunk_size)
79 {
80 int i;
81 int curr_broken_disk = -255;
82 int prev_broken_disk = -255;
83 int broken_status = 0;
84
85 for(i = 0; i < chunk_size; i++) {
86
87 if(results[i] != -255)
88 curr_broken_disk = results[i];
89
90 if(curr_broken_disk >= raid_disks)
91 broken_status = 2;
92
93 switch(broken_status) {
94 case 0:
95 if(curr_broken_disk != -255) {
96 prev_broken_disk = curr_broken_disk;
97 broken_status = 1;
98 }
99 break;
100
101 case 1:
102 if(curr_broken_disk != prev_broken_disk)
103 broken_status = 2;
104 break;
105
106 case 2:
107 default:
108 curr_broken_disk = prev_broken_disk = -65535;
109 break;
110 }
111 }
112
113 return curr_broken_disk;
114 }
115
116 int lock_stripe(struct mdinfo *info, unsigned long long start,
117 int chunk_size, int data_disks, sighandler_t *sig) {
118 int rv;
119 if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
120 return 2;
121 }
122
123 sig[0] = signal(SIGTERM, SIG_IGN);
124 sig[1] = signal(SIGINT, SIG_IGN);
125 sig[2] = signal(SIGQUIT, SIG_IGN);
126
127 rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
128 rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
129 return rv * 256;
130 }
131
132 int unlock_all_stripes(struct mdinfo *info, sighandler_t *sig) {
133 int rv;
134 rv = sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
135 rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
136 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
137
138 signal(SIGQUIT, sig[2]);
139 signal(SIGINT, sig[1]);
140 signal(SIGTERM, sig[0]);
141
142 if(munlockall() != 0)
143 return 3;
144 return rv * 256;
145 }
146
147
148 int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets,
149 int raid_disks, int chunk_size, int level, int layout,
150 unsigned long long start, unsigned long long length, char *name[],
151 enum repair repair, int failed_disk1, int failed_disk2)
152 {
153 /* read the data and p and q blocks, and check we got them right */
154 char *stripe_buf = xmalloc(raid_disks * chunk_size);
155 char **stripes = xmalloc(raid_disks * sizeof(char*));
156 char **blocks = xmalloc(raid_disks * sizeof(char*));
157 int *block_index_for_slot = xmalloc(raid_disks * sizeof(int));
158 uint8_t *p = xmalloc(chunk_size);
159 uint8_t *q = xmalloc(chunk_size);
160 int *results = xmalloc(chunk_size * sizeof(int));
161 sighandler_t *sig = xmalloc(3 * sizeof(sighandler_t));
162
163 int i;
164 int diskP, diskQ;
165 int data_disks = raid_disks - 2;
166 int err = 0;
167
168 extern int tables_ready;
169
170 if (!tables_ready)
171 make_tables();
172
173 for ( i = 0 ; i < raid_disks ; i++)
174 stripes[i] = stripe_buf + i * chunk_size;
175
176 while (length > 0) {
177 int disk;
178
179 printf("pos --> %llu\n", start);
180
181 err = lock_stripe(info, start, chunk_size, data_disks, sig);
182 if(err != 0) {
183 if (err != 2)
184 unlock_all_stripes(info, sig);
185 goto exitCheck;
186 }
187 for (i = 0 ; i < raid_disks ; i++) {
188 off64_t seek_res = lseek64(source[i], offsets[i] + start * chunk_size,
189 SEEK_SET);
190 if (seek_res < 0) {
191 fprintf(stderr, "lseek to source %d failed\n", i);
192 unlock_all_stripes(info, sig);
193 err = -1;
194 goto exitCheck;
195 }
196 int read_res = read(source[i], stripes[i], chunk_size);
197 if (read_res < chunk_size) {
198 fprintf(stderr, "Failed to read complete chunk disk %d, aborting\n", i);
199 unlock_all_stripes(info, sig);
200 err = -1;
201 goto exitCheck;
202 }
203 }
204 err = unlock_all_stripes(info, sig);
205 if(err != 0)
206 goto exitCheck;
207
208 for (i = 0 ; i < data_disks ; i++) {
209 int disk = geo_map(i, start, raid_disks, level, layout);
210 blocks[i] = stripes[disk];
211 block_index_for_slot[disk] = i;
212 printf("%d->%d\n", i, disk);
213 }
214
215 qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
216 diskP = geo_map(-1, start, raid_disks, level, layout);
217 diskQ = geo_map(-2, start, raid_disks, level, layout);
218 blocks[data_disks] = stripes[diskP];
219 block_index_for_slot[diskP] = data_disks;
220 blocks[data_disks+1] = stripes[diskQ];
221 block_index_for_slot[diskQ] = data_disks+1;
222
223 if (memcmp(p, stripes[diskP], chunk_size) != 0) {
224 printf("P(%d) wrong at %llu\n", diskP, start);
225 }
226 if (memcmp(q, stripes[diskQ], chunk_size) != 0) {
227 printf("Q(%d) wrong at %llu\n", diskQ, start);
228 }
229 raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results);
230 disk = raid6_stats(results, raid_disks, chunk_size);
231
232 if(disk >= -2) {
233 disk = geo_map(disk, start, raid_disks, level, layout);
234 }
235 if(disk >= 0) {
236 printf("Error detected at %llu: possible failed disk slot: %d --> %s\n",
237 start, disk, name[disk]);
238 }
239 if(disk == -65535) {
240 printf("Error detected at %llu: disk slot unknown\n", start);
241 }
242 if(repair == MANUAL_REPAIR) {
243 printf("Repairing stripe %llu\n", start);
244 printf("Assuming slots %d (%s) and %d (%s) are incorrect\n",
245 failed_disk1, name[failed_disk1],
246 failed_disk2, name[failed_disk2]);
247
248 if (failed_disk1 == diskQ || failed_disk2 == diskQ) {
249 char *all_but_failed_blocks[data_disks];
250 int failed_data_or_p;
251 int failed_block_index;
252
253 if (failed_disk1 == diskQ)
254 failed_data_or_p = failed_disk2;
255 else
256 failed_data_or_p = failed_disk1;
257 printf("Repairing D/P(%d) and Q\n", failed_data_or_p);
258 failed_block_index = block_index_for_slot[failed_data_or_p];
259 for (i=0; i < data_disks; i++)
260 if (failed_block_index == i)
261 all_but_failed_blocks[i] = stripes[diskP];
262 else
263 all_but_failed_blocks[i] = blocks[i];
264 xor_blocks(stripes[failed_data_or_p],
265 all_but_failed_blocks, data_disks, chunk_size);
266 qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
267 } else {
268 ensure_zero_has_size(chunk_size);
269 if (failed_disk1 == diskP || failed_disk2 == diskP) {
270 int failed_data, failed_block_index;
271 if (failed_disk1 == diskP)
272 failed_data = failed_disk2;
273 else
274 failed_data = failed_disk1;
275 failed_block_index = block_index_for_slot[failed_data];
276 printf("Repairing D(%d) and P\n", failed_data);
277 raid6_datap_recov(raid_disks, chunk_size, failed_block_index, (uint8_t**)blocks);
278 } else {
279 printf("Repairing D and D\n");
280 int failed_block_index1 = block_index_for_slot[failed_disk1];
281 int failed_block_index2 = block_index_for_slot[failed_disk2];
282 if (failed_block_index1 > failed_block_index2) {
283 int t = failed_block_index1;
284 failed_block_index1 = failed_block_index2;
285 failed_block_index2 = t;
286 }
287 raid6_2data_recov(raid_disks, chunk_size, failed_block_index1, failed_block_index2, (uint8_t**)blocks);
288 }
289 }
290
291 err = lock_stripe(info, start, chunk_size, data_disks, sig);
292 if(err != 0) {
293 if (err != 2)
294 unlock_all_stripes(info, sig);
295 goto exitCheck;
296 }
297
298 int write_res1, write_res2;
299 off64_t seek_res;
300
301 seek_res = lseek64(source[failed_disk1],
302 offsets[failed_disk1] + start * chunk_size, SEEK_SET);
303 if (seek_res < 0) {
304 fprintf(stderr, "lseek failed for failed_disk1\n");
305 unlock_all_stripes(info, sig);
306 err = -1;
307 goto exitCheck;
308 }
309 write_res1 = write(source[failed_disk1], stripes[failed_disk1], chunk_size);
310
311
312 seek_res = lseek64(source[failed_disk2],
313 offsets[failed_disk2] + start * chunk_size, SEEK_SET);
314 if (seek_res < 0) {
315 fprintf(stderr, "lseek failed for failed_disk1\n");
316 unlock_all_stripes(info, sig);
317 err = -1;
318 goto exitCheck;
319 }
320 write_res2 = write(source[failed_disk2], stripes[failed_disk2], chunk_size);
321
322 err = unlock_all_stripes(info, sig);
323 if(err != 0)
324 goto exitCheck;
325
326 if (write_res1 != chunk_size || write_res2 != chunk_size) {
327 fprintf(stderr, "Failed to write a complete chunk.\n");
328 goto exitCheck;
329 }
330
331 } else if (disk >= 0 && repair == AUTO_REPAIR) {
332 printf("Auto-repairing slot %d (%s)\n", disk, name[disk]);
333 if (disk == diskQ) {
334 qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
335 } else {
336 char *all_but_failed_blocks[data_disks];
337 int failed_block_index = block_index_for_slot[disk];
338 for (i=0; i < data_disks; i++)
339 if (failed_block_index == i)
340 all_but_failed_blocks[i] = stripes[diskP];
341 else
342 all_but_failed_blocks[i] = blocks[i];
343 xor_blocks(stripes[disk],
344 all_but_failed_blocks, data_disks, chunk_size);
345 }
346
347 err = lock_stripe(info, start, chunk_size, data_disks, sig);
348 if(err != 0) {
349 if (err != 2)
350 unlock_all_stripes(info, sig);
351 goto exitCheck;
352 }
353
354 lseek64(source[disk], offsets[disk] + start * chunk_size, 0);
355 int write_res = write(source[disk], stripes[disk], chunk_size);
356
357 err = unlock_all_stripes(info, sig);
358 if(err != 0 || write_res != chunk_size)
359 goto exitCheck;
360
361 if (write_res != chunk_size) {
362 fprintf(stderr, "Failed to write a full chunk.\n");
363 goto exitCheck;
364 }
365 }
366
367
368 length--;
369 start++;
370 }
371
372 exitCheck:
373
374 free(stripe_buf);
375 free(stripes);
376 free(blocks);
377 free(block_index_for_slot);
378 free(p);
379 free(q);
380 free(results);
381 free(sig);
382
383 return err;
384 }
385
386 unsigned long long getnum(char *str, char **err)
387 {
388 char *e;
389 unsigned long long rv = strtoull(str, &e, 10);
390 if (e==str || *e) {
391 *err = str;
392 return 0;
393 }
394 return rv;
395 }
396
397 int main(int argc, char *argv[])
398 {
399 /* md_device start length */
400 int *fds = NULL;
401 char *buf = NULL;
402 char **disk_name = NULL;
403 unsigned long long *offsets = NULL;
404 int raid_disks = 0;
405 int active_disks;
406 int chunk_size = 0;
407 int layout = -1;
408 int level = 6;
409 enum repair repair = NO_REPAIR;
410 int failed_disk1 = -1;
411 int failed_disk2 = -1;
412 unsigned long long start, length;
413 int i;
414 int mdfd;
415 struct mdinfo *info = NULL, *comp = NULL;
416 char *err = NULL;
417 int exit_err = 0;
418 int close_flag = 0;
419 char *prg = strrchr(argv[0], '/');
420
421 if (prg == NULL)
422 prg = argv[0];
423 else
424 prg++;
425
426 if (argc < 4) {
427 fprintf(stderr, "Usage: %s md_device start_stripe length_stripes [autorepair]\n", prg);
428 fprintf(stderr, " or: %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
429 exit_err = 1;
430 goto exitHere;
431 }
432
433 mdfd = open(argv[1], O_RDONLY);
434 if(mdfd < 0) {
435 perror(argv[1]);
436 fprintf(stderr, "%s: cannot open %s\n", prg, argv[1]);
437 exit_err = 2;
438 goto exitHere;
439 }
440
441 info = sysfs_read(mdfd, NULL,
442 GET_LEVEL|
443 GET_LAYOUT|
444 GET_DISKS|
445 GET_DEGRADED |
446 GET_COMPONENT|
447 GET_CHUNK|
448 GET_DEVS|
449 GET_OFFSET|
450 GET_SIZE);
451
452 if(info == NULL) {
453 fprintf(stderr, "%s: Error reading sysfs information of %s\n", prg, argv[1]);
454 exit_err = 9;
455 goto exitHere;
456 }
457
458 if(info->array.level != level) {
459 fprintf(stderr, "%s: %s not a RAID-6\n", prg, argv[1]);
460 exit_err = 3;
461 goto exitHere;
462 }
463
464 if(info->array.failed_disks > 0) {
465 fprintf(stderr, "%s: %s degraded array\n", prg, argv[1]);
466 exit_err = 8;
467 goto exitHere;
468 }
469
470 printf("layout: %d\n", info->array.layout);
471 printf("disks: %d\n", info->array.raid_disks);
472 printf("component size: %llu\n", info->component_size * 512);
473 printf("total stripes: %llu\n", (info->component_size * 512) / info->array.chunk_size);
474 printf("chunk size: %d\n", info->array.chunk_size);
475 printf("\n");
476
477 comp = info->devs;
478 for(i = 0, active_disks = 0; active_disks < info->array.raid_disks; i++) {
479 printf("disk: %d - offset: %llu - size: %llu - name: %s - slot: %d\n",
480 i, comp->data_offset * 512, comp->component_size * 512,
481 map_dev(comp->disk.major, comp->disk.minor, 0),
482 comp->disk.raid_disk);
483 if(comp->disk.raid_disk >= 0)
484 active_disks++;
485 comp = comp->next;
486 }
487 printf("\n");
488
489 close(mdfd);
490
491 raid_disks = info->array.raid_disks;
492 chunk_size = info->array.chunk_size;
493 layout = info->array.layout;
494 if (strcmp(argv[2], "repair")==0) {
495 if (argc < 6) {
496 fprintf(stderr, "For repair mode, call %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
497 exit_err = 1;
498 goto exitHere;
499 }
500 repair = MANUAL_REPAIR;
501 start = getnum(argv[3], &err);
502 length = 1;
503 failed_disk1 = getnum(argv[4], &err);
504 failed_disk2 = getnum(argv[5], &err);
505
506 if(failed_disk1 >= info->array.raid_disks) {
507 fprintf(stderr, "%s: failed_slot_1 index is higher than number of devices in raid\n", prg);
508 exit_err = 4;
509 goto exitHere;
510 }
511 if(failed_disk2 >= info->array.raid_disks) {
512 fprintf(stderr, "%s: failed_slot_2 index is higher than number of devices in raid\n", prg);
513 exit_err = 4;
514 goto exitHere;
515 }
516 if(failed_disk1 == failed_disk2) {
517 fprintf(stderr, "%s: failed_slot_1 and failed_slot_2 are the same\n", prg);
518 exit_err = 4;
519 goto exitHere;
520 }
521 }
522 else {
523 start = getnum(argv[2], &err);
524 length = getnum(argv[3], &err);
525 if (argc >= 5 && strcmp(argv[4], "autorepair")==0)
526 repair = AUTO_REPAIR;
527 }
528
529 if (err) {
530 fprintf(stderr, "%s: Bad number: %s\n", prg, err);
531 exit_err = 4;
532 goto exitHere;
533 }
534
535 if(start > ((info->component_size * 512) / chunk_size)) {
536 start = (info->component_size * 512) / chunk_size;
537 fprintf(stderr, "%s: start beyond disks size\n", prg);
538 }
539
540 if((length == 0) ||
541 ((length + start) > ((info->component_size * 512) / chunk_size))) {
542 length = (info->component_size * 512) / chunk_size - start;
543 }
544
545 disk_name = xmalloc(raid_disks * sizeof(*disk_name));
546 fds = xmalloc(raid_disks * sizeof(*fds));
547 offsets = xcalloc(raid_disks, sizeof(*offsets));
548 buf = xmalloc(raid_disks * chunk_size);
549
550 for(i=0; i<raid_disks; i++) {
551 fds[i] = -1;
552 }
553 close_flag = 1;
554
555 comp = info->devs;
556 for (i=0, active_disks=0; active_disks<raid_disks; i++) {
557 int disk_slot = comp->disk.raid_disk;
558 if(disk_slot >= 0) {
559 disk_name[disk_slot] = map_dev(comp->disk.major, comp->disk.minor, 0);
560 offsets[disk_slot] = comp->data_offset * 512;
561 fds[disk_slot] = open(disk_name[disk_slot], O_RDWR);
562 if (fds[disk_slot] < 0) {
563 perror(disk_name[disk_slot]);
564 fprintf(stderr,"%s: cannot open %s\n", prg, disk_name[disk_slot]);
565 exit_err = 6;
566 goto exitHere;
567 }
568 active_disks++;
569 }
570 comp = comp->next;
571 }
572
573 int rv = check_stripes(info, fds, offsets,
574 raid_disks, chunk_size, level, layout,
575 start, length, disk_name, repair, failed_disk1, failed_disk2);
576 if (rv != 0) {
577 fprintf(stderr,
578 "%s: check_stripes returned %d\n", prg, rv);
579 exit_err = 7;
580 goto exitHere;
581 }
582
583 exitHere:
584
585 if (close_flag)
586 for(i = 0; i < raid_disks; i++)
587 close(fds[i]);
588
589 free(disk_name);
590 free(fds);
591 free(offsets);
592 free(buf);
593
594 exit(exit_err);
595 }