]> git.ipfire.org Git - thirdparty/mdadm.git/blame - raid6check.c
platform-intel - cache 'intel_devices' for a few seconds.
[thirdparty/mdadm.git] / raid6check.c
CommitLineData
979afcb8
PS
1/*
2 * raid6check - extended consistency check for RAID-6
3 *
4 * Copyright (C) 2011 Piergiorgio Sartor
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Piergiorgio Sartor
22 * Based on "restripe.c" from "mdadm" codebase
23 */
24
25#include "mdadm.h"
26#include <stdint.h>
8d8ab389
PS
27#include <signal.h>
28#include <sys/mman.h>
979afcb8
PS
29
30int geo_map(int block, unsigned long long stripe, int raid_disks,
31 int level, int layout);
32void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size);
33void make_tables(void);
f2e29ad6
RB
34void ensure_zero_has_size(int chunk_size);
35void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs);
36void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
37 uint8_t **ptrs);
38void xor_blocks(char *target, char **sources, int disks, int size);
39
979afcb8
PS
40
41/* Collect per stripe consistency information */
42void raid6_collect(int chunk_size, uint8_t *p, uint8_t *q,
43 char *chunkP, char *chunkQ, int *results)
44{
45 int i;
46 int data_id;
47 uint8_t Px, Qx;
48 extern uint8_t raid6_gflog[];
49
50 for(i = 0; i < chunk_size; i++) {
51 Px = (uint8_t)chunkP[i] ^ (uint8_t)p[i];
52 Qx = (uint8_t)chunkQ[i] ^ (uint8_t)q[i];
53
54 if((Px != 0) && (Qx == 0))
55 results[i] = -1;
56
57 if((Px == 0) && (Qx != 0))
58 results[i] = -2;
59
60 if((Px != 0) && (Qx != 0)) {
61 data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
62 if(data_id < 0) data_id += 255;
63 results[i] = data_id;
64 }
65
66 if((Px == 0) && (Qx == 0))
67 results[i] = -255;
68 }
69}
70
71/* Try to find out if a specific disk has problems */
72int raid6_stats(int *results, int raid_disks, int chunk_size)
73{
74 int i;
75 int curr_broken_disk = -255;
76 int prev_broken_disk = -255;
77 int broken_status = 0;
78
79 for(i = 0; i < chunk_size; i++) {
80
81 if(results[i] != -255)
82 curr_broken_disk = results[i];
83
84 if(curr_broken_disk >= raid_disks)
85 broken_status = 2;
86
87 switch(broken_status) {
88 case 0:
89 if(curr_broken_disk != -255) {
90 prev_broken_disk = curr_broken_disk;
91 broken_status = 1;
92 }
93 break;
94
95 case 1:
96 if(curr_broken_disk != prev_broken_disk)
97 broken_status = 2;
98 break;
99
100 case 2:
101 default:
102 curr_broken_disk = prev_broken_disk = -65535;
103 break;
104 }
105 }
106
107 return curr_broken_disk;
108}
109
351d7680
RB
110int lock_stripe(struct mdinfo *info, unsigned long long start,
111 int chunk_size, int data_disks, sighandler_t *sig) {
112 int rv;
113 if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
114 return 2;
115 }
116
117 sig[0] = signal(SIGTERM, SIG_IGN);
118 sig[1] = signal(SIGINT, SIG_IGN);
119 sig[2] = signal(SIGQUIT, SIG_IGN);
120
121 rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
122 rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
123 return rv * 256;
124}
125
126int unlock_all_stripes(struct mdinfo *info, sighandler_t *sig) {
127 int rv;
128 rv = sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
129 rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
130 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
131
132 signal(SIGQUIT, sig[2]);
133 signal(SIGINT, sig[1]);
134 signal(SIGTERM, sig[0]);
135
136 if(munlockall() != 0)
137 return 3;
138 return rv * 256;
139}
140
141
8d8ab389 142int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets,
979afcb8 143 int raid_disks, int chunk_size, int level, int layout,
f2e29ad6
RB
144 unsigned long long start, unsigned long long length, char *name[],
145 int repair, int failed_disk1, int failed_disk2)
979afcb8
PS
146{
147 /* read the data and p and q blocks, and check we got them right */
503975b9
N
148 char *stripe_buf = xmalloc(raid_disks * chunk_size);
149 char **stripes = xmalloc(raid_disks * sizeof(char*));
150 char **blocks = xmalloc(raid_disks * sizeof(char*));
696e95a1 151 int *block_index_for_slot = xmalloc(raid_disks * sizeof(int));
503975b9
N
152 uint8_t *p = xmalloc(chunk_size);
153 uint8_t *q = xmalloc(chunk_size);
154 int *results = xmalloc(chunk_size * sizeof(int));
351d7680 155 sighandler_t *sig = xmalloc(3 * sizeof(sighandler_t));
979afcb8
PS
156
157 int i;
158 int diskP, diskQ;
159 int data_disks = raid_disks - 2;
af3c3750 160 int err = 0;
979afcb8
PS
161
162 extern int tables_ready;
163
164 if (!tables_ready)
165 make_tables();
166
167 for ( i = 0 ; i < raid_disks ; i++)
168 stripes[i] = stripe_buf + i * chunk_size;
169
170 while (length > 0) {
171 int disk;
172
af3c3750
PS
173 printf("pos --> %llu\n", start);
174
351d7680
RB
175 err = lock_stripe(info, start, chunk_size, data_disks, sig);
176 if(err != 0) {
177 if (err != 2)
178 unlock_all_stripes(info, sig);
8d8ab389
PS
179 goto exitCheck;
180 }
979afcb8 181 for (i = 0 ; i < raid_disks ; i++) {
af3c3750 182 lseek64(source[i], offsets[i] + start * chunk_size, 0);
979afcb8
PS
183 read(source[i], stripes[i], chunk_size);
184 }
351d7680
RB
185 err = unlock_all_stripes(info, sig);
186 if(err != 0)
8d8ab389 187 goto exitCheck;
8d8ab389 188
979afcb8 189 for (i = 0 ; i < data_disks ; i++) {
af3c3750 190 int disk = geo_map(i, start, raid_disks, level, layout);
979afcb8 191 blocks[i] = stripes[disk];
696e95a1 192 block_index_for_slot[disk] = i;
979afcb8
PS
193 printf("%d->%d\n", i, disk);
194 }
195
196 qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
af3c3750 197 diskP = geo_map(-1, start, raid_disks, level, layout);
f2e29ad6
RB
198 diskQ = geo_map(-2, start, raid_disks, level, layout);
199 blocks[data_disks] = stripes[diskP];
696e95a1 200 block_index_for_slot[diskP] = data_disks;
f2e29ad6 201 blocks[data_disks+1] = stripes[diskQ];
696e95a1 202 block_index_for_slot[diskQ] = data_disks+1;
f2e29ad6 203
979afcb8 204 if (memcmp(p, stripes[diskP], chunk_size) != 0) {
af3c3750 205 printf("P(%d) wrong at %llu\n", diskP, start);
979afcb8 206 }
979afcb8 207 if (memcmp(q, stripes[diskQ], chunk_size) != 0) {
af3c3750 208 printf("Q(%d) wrong at %llu\n", diskQ, start);
979afcb8 209 }
af3c3750 210 raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results);
979afcb8
PS
211 disk = raid6_stats(results, raid_disks, chunk_size);
212
213 if(disk >= -2) {
af3c3750 214 disk = geo_map(disk, start, raid_disks, level, layout);
979afcb8
PS
215 }
216 if(disk >= 0) {
af3c3750
PS
217 printf("Error detected at %llu: possible failed disk slot: %d --> %s\n",
218 start, disk, name[disk]);
979afcb8
PS
219 }
220 if(disk == -65535) {
af3c3750 221 printf("Error detected at %llu: disk slot unknown\n", start);
979afcb8 222 }
f2e29ad6
RB
223 if(repair == 1) {
224 printf("Repairing stripe %llu\n", start);
225 printf("Assuming slots %d (%s) and %d (%s) are incorrect\n",
226 failed_disk1, name[failed_disk1],
227 failed_disk2, name[failed_disk2]);
228
229 if (failed_disk1 == diskQ || failed_disk2 == diskQ) {
230 char *all_but_failed_blocks[data_disks];
696e95a1 231 int failed_data_or_p;
f2e29ad6
RB
232 int failed_block_index;
233
234 if (failed_disk1 == diskQ)
696e95a1 235 failed_data_or_p = failed_disk2;
f2e29ad6 236 else
696e95a1
RB
237 failed_data_or_p = failed_disk1;
238 printf("Repairing D/P(%d) and Q\n", failed_data_or_p);
239 failed_block_index = block_index_for_slot[failed_data_or_p];
f2e29ad6
RB
240 for (i=0; i < data_disks; i++)
241 if (failed_block_index == i)
242 all_but_failed_blocks[i] = stripes[diskP];
243 else
244 all_but_failed_blocks[i] = blocks[i];
696e95a1 245 xor_blocks(stripes[failed_data_or_p],
f2e29ad6
RB
246 all_but_failed_blocks, data_disks, chunk_size);
247 qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
248 } else {
249 ensure_zero_has_size(chunk_size);
250 if (failed_disk1 == diskP || failed_disk2 == diskP) {
251 int failed_data, failed_block_index;
252 if (failed_disk1 == diskP)
253 failed_data = failed_disk2;
254 else
255 failed_data = failed_disk1;
696e95a1 256 failed_block_index = block_index_for_slot[failed_data];
f2e29ad6
RB
257 printf("Repairing D(%d) and P\n", failed_data);
258 raid6_datap_recov(raid_disks, chunk_size, failed_block_index, (uint8_t**)blocks);
259 } else {
260 printf("Repairing D and D\n");
696e95a1
RB
261 int failed_block_index1 = block_index_for_slot[failed_disk1];
262 int failed_block_index2 = block_index_for_slot[failed_disk2];
f2e29ad6
RB
263 if (failed_block_index1 > failed_block_index2) {
264 int t = failed_block_index1;
265 failed_block_index1 = failed_block_index2;
266 failed_block_index2 = t;
267 }
268 raid6_2data_recov(raid_disks, chunk_size, failed_block_index1, failed_block_index2, (uint8_t**)blocks);
269 }
270 }
351d7680
RB
271
272 err = lock_stripe(info, start, chunk_size, data_disks, sig);
273 if(err != 0) {
274 if (err != 2)
275 unlock_all_stripes(info, sig);
f2e29ad6
RB
276 goto exitCheck;
277 }
351d7680 278
f2e29ad6
RB
279 lseek64(source[failed_disk1], offsets[failed_disk1] + start * chunk_size, 0);
280 write(source[failed_disk1], stripes[failed_disk1], chunk_size);
281 lseek64(source[failed_disk2], offsets[failed_disk2] + start * chunk_size, 0);
282 write(source[failed_disk2], stripes[failed_disk2], chunk_size);
f2e29ad6 283
8a63c731
RB
284 err = unlock_all_stripes(info, sig);
285 if(err != 0)
286 goto exitCheck;
287 } else if (disk >= 0 && repair == 2) {
288 printf("Auto-repairing slot %d (%s)\n", disk, name[disk]);
289 if (disk == diskQ) {
290 qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
291 } else {
292 char *all_but_failed_blocks[data_disks];
293 int failed_block_index = block_index_for_slot[disk];
294 for (i=0; i < data_disks; i++)
295 if (failed_block_index == i)
296 all_but_failed_blocks[i] = stripes[diskP];
297 else
298 all_but_failed_blocks[i] = blocks[i];
299 xor_blocks(stripes[disk],
300 all_but_failed_blocks, data_disks, chunk_size);
301 }
302
303 err = lock_stripe(info, start, chunk_size, data_disks, sig);
304 if(err != 0) {
305 if (err != 2)
306 unlock_all_stripes(info, sig);
307 goto exitCheck;
308 }
309
310 lseek64(source[disk], offsets[disk] + start * chunk_size, 0);
311 write(source[disk], stripes[disk], chunk_size);
312
351d7680
RB
313 err = unlock_all_stripes(info, sig);
314 if(err != 0)
f2e29ad6 315 goto exitCheck;
f2e29ad6
RB
316 }
317
979afcb8 318
af3c3750
PS
319 length--;
320 start++;
979afcb8
PS
321 }
322
af3c3750
PS
323exitCheck:
324
979afcb8
PS
325 free(stripe_buf);
326 free(stripes);
327 free(blocks);
328 free(p);
329 free(q);
330 free(results);
331
af3c3750 332 return err;
979afcb8
PS
333}
334
335unsigned long long getnum(char *str, char **err)
336{
337 char *e;
338 unsigned long long rv = strtoull(str, &e, 10);
339 if (e==str || *e) {
340 *err = str;
341 return 0;
342 }
343 return rv;
344}
345
346int main(int argc, char *argv[])
347{
a9c2c6c6 348 /* md_device start length */
af3c3750
PS
349 int *fds = NULL;
350 char *buf = NULL;
351 char **disk_name = NULL;
352 unsigned long long *offsets = NULL;
353 int raid_disks = 0;
2cf31121 354 int active_disks;
af3c3750
PS
355 int chunk_size = 0;
356 int layout = -1;
979afcb8 357 int level = 6;
f2e29ad6
RB
358 int repair = 0;
359 int failed_disk1, failed_disk2;
979afcb8
PS
360 unsigned long long start, length;
361 int i;
a9c2c6c6 362 int mdfd;
8d8ab389 363 struct mdinfo *info = NULL, *comp = NULL;
979afcb8 364 char *err = NULL;
af3c3750
PS
365 int exit_err = 0;
366 int close_flag = 0;
367 char *prg = strrchr(argv[0], '/');
368
369 if (prg == NULL)
370 prg = argv[0];
371 else
372 prg++;
373
374 if (argc < 4) {
8a63c731 375 fprintf(stderr, "Usage: %s md_device start_stripe length_stripes [autorepair]\n", prg);
f2e29ad6 376 fprintf(stderr, " or: %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
af3c3750
PS
377 exit_err = 1;
378 goto exitHere;
979afcb8
PS
379 }
380
a9c2c6c6
PS
381 mdfd = open(argv[1], O_RDONLY);
382 if(mdfd < 0) {
383 perror(argv[1]);
e7b84f9d 384 fprintf(stderr, "%s: cannot open %s\n", prg, argv[1]);
af3c3750
PS
385 exit_err = 2;
386 goto exitHere;
a9c2c6c6
PS
387 }
388
389 info = sysfs_read(mdfd, -1,
390 GET_LEVEL|
391 GET_LAYOUT|
392 GET_DISKS|
2cf31121 393 GET_DEGRADED |
a9c2c6c6
PS
394 GET_COMPONENT|
395 GET_CHUNK|
396 GET_DEVS|
397 GET_OFFSET|
398 GET_SIZE);
399
8d8ab389
PS
400 if(info == NULL) {
401 fprintf(stderr, "%s: Error reading sysfs information of %s\n", prg, argv[1]);
402 exit_err = 9;
403 goto exitHere;
404 }
405
a9c2c6c6
PS
406 if(info->array.level != level) {
407 fprintf(stderr, "%s: %s not a RAID-6\n", prg, argv[1]);
af3c3750
PS
408 exit_err = 3;
409 goto exitHere;
a9c2c6c6
PS
410 }
411
2cf31121
PS
412 if(info->array.failed_disks > 0) {
413 fprintf(stderr, "%s: %s degraded array\n", prg, argv[1]);
414 exit_err = 8;
415 goto exitHere;
416 }
417
a9c2c6c6
PS
418 printf("layout: %d\n", info->array.layout);
419 printf("disks: %d\n", info->array.raid_disks);
af3c3750
PS
420 printf("component size: %llu\n", info->component_size * 512);
421 printf("total stripes: %llu\n", (info->component_size * 512) / info->array.chunk_size);
a9c2c6c6
PS
422 printf("chunk size: %d\n", info->array.chunk_size);
423 printf("\n");
424
425 comp = info->devs;
2cf31121 426 for(i = 0, active_disks = 0; active_disks < info->array.raid_disks; i++) {
a9c2c6c6 427 printf("disk: %d - offset: %llu - size: %llu - name: %s - slot: %d\n",
af3c3750 428 i, comp->data_offset * 512, comp->component_size * 512,
a9c2c6c6
PS
429 map_dev(comp->disk.major, comp->disk.minor, 0),
430 comp->disk.raid_disk);
2cf31121
PS
431 if(comp->disk.raid_disk >= 0)
432 active_disks++;
a9c2c6c6
PS
433 comp = comp->next;
434 }
435 printf("\n");
436
437 close(mdfd);
438
439 raid_disks = info->array.raid_disks;
440 chunk_size = info->array.chunk_size;
441 layout = info->array.layout;
f2e29ad6
RB
442 if (strcmp(argv[2], "repair")==0) {
443 if (argc < 6) {
444 fprintf(stderr, "For repair mode, call %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
445 exit_err = 1;
446 goto exitHere;
447 }
448 repair = 1;
449 start = getnum(argv[3], &err);
450 length = 1;
451 failed_disk1 = getnum(argv[4], &err);
452 failed_disk2 = getnum(argv[5], &err);
453
b67e45b8 454 if(failed_disk1 >= info->array.raid_disks) {
f2e29ad6
RB
455 fprintf(stderr, "%s: failed_slot_1 index is higher than number of devices in raid\n", prg);
456 exit_err = 4;
457 goto exitHere;
458 }
b67e45b8 459 if(failed_disk2 >= info->array.raid_disks) {
f2e29ad6
RB
460 fprintf(stderr, "%s: failed_slot_2 index is higher than number of devices in raid\n", prg);
461 exit_err = 4;
462 goto exitHere;
463 }
464 if(failed_disk1 == failed_disk2) {
465 fprintf(stderr, "%s: failed_slot_1 and failed_slot_2 are the same\n", prg);
466 exit_err = 4;
467 goto exitHere;
468 }
469 }
470 else {
471 start = getnum(argv[2], &err);
472 length = getnum(argv[3], &err);
8a63c731
RB
473 if (argc >= 5 && strcmp(argv[4], "autorepair")==0)
474 repair = 2;
f2e29ad6 475 }
a9c2c6c6 476
979afcb8 477 if (err) {
a9c2c6c6 478 fprintf(stderr, "%s: Bad number: %s\n", prg, err);
af3c3750
PS
479 exit_err = 4;
480 goto exitHere;
979afcb8 481 }
a9c2c6c6 482
af3c3750
PS
483 if(start > ((info->component_size * 512) / chunk_size)) {
484 start = (info->component_size * 512) / chunk_size;
485 fprintf(stderr, "%s: start beyond disks size\n", prg);
486 }
a9c2c6c6 487
af3c3750
PS
488 if((length == 0) ||
489 ((length + start) > ((info->component_size * 512) / chunk_size))) {
490 length = (info->component_size * 512) / chunk_size - start;
979afcb8 491 }
a9c2c6c6 492
503975b9
N
493 disk_name = xmalloc(raid_disks * sizeof(*disk_name));
494 fds = xmalloc(raid_disks * sizeof(*fds));
495 offsets = xcalloc(raid_disks, sizeof(*offsets));
496 buf = xmalloc(raid_disks * chunk_size);
af3c3750 497
af3c3750
PS
498 for(i=0; i<raid_disks; i++) {
499 fds[i] = -1;
500 }
501 close_flag = 1;
979afcb8 502
a9c2c6c6 503 comp = info->devs;
2cf31121 504 for (i=0, active_disks=0; active_disks<raid_disks; i++) {
a9c2c6c6 505 int disk_slot = comp->disk.raid_disk;
2cf31121
PS
506 if(disk_slot >= 0) {
507 disk_name[disk_slot] = map_dev(comp->disk.major, comp->disk.minor, 0);
508 offsets[disk_slot] = comp->data_offset * 512;
509 fds[disk_slot] = open(disk_name[disk_slot], O_RDWR);
510 if (fds[disk_slot] < 0) {
511 perror(disk_name[disk_slot]);
512 fprintf(stderr,"%s: cannot open %s\n", prg, disk_name[disk_slot]);
513 exit_err = 6;
514 goto exitHere;
515 }
516 active_disks++;
979afcb8 517 }
a9c2c6c6 518 comp = comp->next;
979afcb8
PS
519 }
520
8d8ab389 521 int rv = check_stripes(info, fds, offsets,
979afcb8 522 raid_disks, chunk_size, level, layout,
f2e29ad6 523 start, length, disk_name, repair, failed_disk1, failed_disk2);
979afcb8
PS
524 if (rv != 0) {
525 fprintf(stderr,
a9c2c6c6 526 "%s: check_stripes returned %d\n", prg, rv);
af3c3750
PS
527 exit_err = 7;
528 goto exitHere;
979afcb8
PS
529 }
530
af3c3750
PS
531exitHere:
532
533 if (close_flag)
534 for(i = 0; i < raid_disks; i++)
535 close(fds[i]);
536
a9c2c6c6 537 free(disk_name);
979afcb8
PS
538 free(fds);
539 free(offsets);
540 free(buf);
541
af3c3750 542 exit(exit_err);
979afcb8 543}