]> git.ipfire.org Git - thirdparty/mdadm.git/blob - raid6check.c
raid6check: Repair mode used geo_map incorrectly
[thirdparty/mdadm.git] / raid6check.c
1 /*
2 * raid6check - extended consistency check for RAID-6
3 *
4 * Copyright (C) 2011 Piergiorgio Sartor
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Piergiorgio Sartor
22 * Based on "restripe.c" from "mdadm" codebase
23 */
24
25 #include "mdadm.h"
26 #include <stdint.h>
27 #include <signal.h>
28 #include <sys/mman.h>
29
30 int geo_map(int block, unsigned long long stripe, int raid_disks,
31 int level, int layout);
32 void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size);
33 void make_tables(void);
34 void ensure_zero_has_size(int chunk_size);
35 void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs);
36 void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
37 uint8_t **ptrs);
38 void xor_blocks(char *target, char **sources, int disks, int size);
39
40
41 /* Collect per stripe consistency information */
42 void raid6_collect(int chunk_size, uint8_t *p, uint8_t *q,
43 char *chunkP, char *chunkQ, int *results)
44 {
45 int i;
46 int data_id;
47 uint8_t Px, Qx;
48 extern uint8_t raid6_gflog[];
49
50 for(i = 0; i < chunk_size; i++) {
51 Px = (uint8_t)chunkP[i] ^ (uint8_t)p[i];
52 Qx = (uint8_t)chunkQ[i] ^ (uint8_t)q[i];
53
54 if((Px != 0) && (Qx == 0))
55 results[i] = -1;
56
57 if((Px == 0) && (Qx != 0))
58 results[i] = -2;
59
60 if((Px != 0) && (Qx != 0)) {
61 data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
62 if(data_id < 0) data_id += 255;
63 results[i] = data_id;
64 }
65
66 if((Px == 0) && (Qx == 0))
67 results[i] = -255;
68 }
69 }
70
71 /* Try to find out if a specific disk has problems */
72 int raid6_stats(int *results, int raid_disks, int chunk_size)
73 {
74 int i;
75 int curr_broken_disk = -255;
76 int prev_broken_disk = -255;
77 int broken_status = 0;
78
79 for(i = 0; i < chunk_size; i++) {
80
81 if(results[i] != -255)
82 curr_broken_disk = results[i];
83
84 if(curr_broken_disk >= raid_disks)
85 broken_status = 2;
86
87 switch(broken_status) {
88 case 0:
89 if(curr_broken_disk != -255) {
90 prev_broken_disk = curr_broken_disk;
91 broken_status = 1;
92 }
93 break;
94
95 case 1:
96 if(curr_broken_disk != prev_broken_disk)
97 broken_status = 2;
98 break;
99
100 case 2:
101 default:
102 curr_broken_disk = prev_broken_disk = -65535;
103 break;
104 }
105 }
106
107 return curr_broken_disk;
108 }
109
110 int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets,
111 int raid_disks, int chunk_size, int level, int layout,
112 unsigned long long start, unsigned long long length, char *name[],
113 int repair, int failed_disk1, int failed_disk2)
114 {
115 /* read the data and p and q blocks, and check we got them right */
116 char *stripe_buf = xmalloc(raid_disks * chunk_size);
117 char **stripes = xmalloc(raid_disks * sizeof(char*));
118 char **blocks = xmalloc(raid_disks * sizeof(char*));
119 int *block_index_for_slot = xmalloc(raid_disks * sizeof(int));
120 uint8_t *p = xmalloc(chunk_size);
121 uint8_t *q = xmalloc(chunk_size);
122 int *results = xmalloc(chunk_size * sizeof(int));
123
124 int i;
125 int diskP, diskQ;
126 int data_disks = raid_disks - 2;
127 int err = 0;
128 sighandler_t sig[3];
129 int rv;
130
131 extern int tables_ready;
132
133 if (!tables_ready)
134 make_tables();
135
136 for ( i = 0 ; i < raid_disks ; i++)
137 stripes[i] = stripe_buf + i * chunk_size;
138
139 while (length > 0) {
140 int disk;
141
142 printf("pos --> %llu\n", start);
143
144 if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
145 err = 2;
146 goto exitCheck;
147 }
148 sig[0] = signal(SIGTERM, SIG_IGN);
149 sig[1] = signal(SIGINT, SIG_IGN);
150 sig[2] = signal(SIGQUIT, SIG_IGN);
151 rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
152 rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
153 for (i = 0 ; i < raid_disks ; i++) {
154 lseek64(source[i], offsets[i] + start * chunk_size, 0);
155 read(source[i], stripes[i], chunk_size);
156 }
157 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
158 rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
159 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
160 signal(SIGQUIT, sig[2]);
161 signal(SIGINT, sig[1]);
162 signal(SIGTERM, sig[0]);
163 if(munlockall() != 0) {
164 err = 3;
165 goto exitCheck;
166 }
167
168 if(rv != 0) {
169 err = rv * 256;
170 goto exitCheck;
171 }
172
173 for (i = 0 ; i < data_disks ; i++) {
174 int disk = geo_map(i, start, raid_disks, level, layout);
175 blocks[i] = stripes[disk];
176 block_index_for_slot[disk] = i;
177 printf("%d->%d\n", i, disk);
178 }
179
180 qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
181 diskP = geo_map(-1, start, raid_disks, level, layout);
182 diskQ = geo_map(-2, start, raid_disks, level, layout);
183 blocks[data_disks] = stripes[diskP];
184 block_index_for_slot[diskP] = data_disks;
185 blocks[data_disks+1] = stripes[diskQ];
186 block_index_for_slot[diskQ] = data_disks+1;
187
188 if (memcmp(p, stripes[diskP], chunk_size) != 0) {
189 printf("P(%d) wrong at %llu\n", diskP, start);
190 }
191 if (memcmp(q, stripes[diskQ], chunk_size) != 0) {
192 printf("Q(%d) wrong at %llu\n", diskQ, start);
193 }
194 raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results);
195 disk = raid6_stats(results, raid_disks, chunk_size);
196
197 if(disk >= -2) {
198 disk = geo_map(disk, start, raid_disks, level, layout);
199 }
200 if(disk >= 0) {
201 printf("Error detected at %llu: possible failed disk slot: %d --> %s\n",
202 start, disk, name[disk]);
203 }
204 if(disk == -65535) {
205 printf("Error detected at %llu: disk slot unknown\n", start);
206 }
207 if(repair == 1) {
208 printf("Repairing stripe %llu\n", start);
209 printf("Assuming slots %d (%s) and %d (%s) are incorrect\n",
210 failed_disk1, name[failed_disk1],
211 failed_disk2, name[failed_disk2]);
212
213 if (failed_disk1 == diskQ || failed_disk2 == diskQ) {
214 char *all_but_failed_blocks[data_disks];
215 int failed_data_or_p;
216 int failed_block_index;
217
218 if (failed_disk1 == diskQ)
219 failed_data_or_p = failed_disk2;
220 else
221 failed_data_or_p = failed_disk1;
222 printf("Repairing D/P(%d) and Q\n", failed_data_or_p);
223 failed_block_index = block_index_for_slot[failed_data_or_p];
224 for (i=0; i < data_disks; i++)
225 if (failed_block_index == i)
226 all_but_failed_blocks[i] = stripes[diskP];
227 else
228 all_but_failed_blocks[i] = blocks[i];
229 xor_blocks(stripes[failed_data_or_p],
230 all_but_failed_blocks, data_disks, chunk_size);
231 qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
232 } else {
233 ensure_zero_has_size(chunk_size);
234 if (failed_disk1 == diskP || failed_disk2 == diskP) {
235 int failed_data, failed_block_index;
236 if (failed_disk1 == diskP)
237 failed_data = failed_disk2;
238 else
239 failed_data = failed_disk1;
240 failed_block_index = block_index_for_slot[failed_data];
241 printf("Repairing D(%d) and P\n", failed_data);
242 raid6_datap_recov(raid_disks, chunk_size, failed_block_index, (uint8_t**)blocks);
243 } else {
244 printf("Repairing D and D\n");
245 int failed_block_index1 = block_index_for_slot[failed_disk1];
246 int failed_block_index2 = block_index_for_slot[failed_disk2];
247 if (failed_block_index1 > failed_block_index2) {
248 int t = failed_block_index1;
249 failed_block_index1 = failed_block_index2;
250 failed_block_index2 = t;
251 }
252 raid6_2data_recov(raid_disks, chunk_size, failed_block_index1, failed_block_index2, (uint8_t**)blocks);
253 }
254 }
255 if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
256 err = 2;
257 goto exitCheck;
258 }
259 sig[0] = signal(SIGTERM, SIG_IGN);
260 sig[1] = signal(SIGINT, SIG_IGN);
261 sig[2] = signal(SIGQUIT, SIG_IGN);
262 rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
263 rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
264 lseek64(source[failed_disk1], offsets[failed_disk1] + start * chunk_size, 0);
265 write(source[failed_disk1], stripes[failed_disk1], chunk_size);
266 lseek64(source[failed_disk2], offsets[failed_disk2] + start * chunk_size, 0);
267 write(source[failed_disk2], stripes[failed_disk2], chunk_size);
268 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
269 rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
270 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
271 signal(SIGQUIT, sig[2]);
272 signal(SIGINT, sig[1]);
273 signal(SIGTERM, sig[0]);
274 if(munlockall() != 0) {
275 err = 3;
276 goto exitCheck;
277 }
278
279 if(rv != 0) {
280 err = rv * 256;
281 goto exitCheck;
282 }
283 }
284
285
286 length--;
287 start++;
288 }
289
290 exitCheck:
291
292 free(stripe_buf);
293 free(stripes);
294 free(blocks);
295 free(p);
296 free(q);
297 free(results);
298
299 return err;
300 }
301
302 unsigned long long getnum(char *str, char **err)
303 {
304 char *e;
305 unsigned long long rv = strtoull(str, &e, 10);
306 if (e==str || *e) {
307 *err = str;
308 return 0;
309 }
310 return rv;
311 }
312
313 int main(int argc, char *argv[])
314 {
315 /* md_device start length */
316 int *fds = NULL;
317 char *buf = NULL;
318 char **disk_name = NULL;
319 unsigned long long *offsets = NULL;
320 int raid_disks = 0;
321 int active_disks;
322 int chunk_size = 0;
323 int layout = -1;
324 int level = 6;
325 int repair = 0;
326 int failed_disk1, failed_disk2;
327 unsigned long long start, length;
328 int i;
329 int mdfd;
330 struct mdinfo *info = NULL, *comp = NULL;
331 char *err = NULL;
332 int exit_err = 0;
333 int close_flag = 0;
334 char *prg = strrchr(argv[0], '/');
335
336 if (prg == NULL)
337 prg = argv[0];
338 else
339 prg++;
340
341 if (argc < 4) {
342 fprintf(stderr, "Usage: %s md_device start_stripe length_stripes\n", prg);
343 fprintf(stderr, " or: %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
344 exit_err = 1;
345 goto exitHere;
346 }
347
348 mdfd = open(argv[1], O_RDONLY);
349 if(mdfd < 0) {
350 perror(argv[1]);
351 fprintf(stderr, "%s: cannot open %s\n", prg, argv[1]);
352 exit_err = 2;
353 goto exitHere;
354 }
355
356 info = sysfs_read(mdfd, -1,
357 GET_LEVEL|
358 GET_LAYOUT|
359 GET_DISKS|
360 GET_DEGRADED |
361 GET_COMPONENT|
362 GET_CHUNK|
363 GET_DEVS|
364 GET_OFFSET|
365 GET_SIZE);
366
367 if(info == NULL) {
368 fprintf(stderr, "%s: Error reading sysfs information of %s\n", prg, argv[1]);
369 exit_err = 9;
370 goto exitHere;
371 }
372
373 if(info->array.level != level) {
374 fprintf(stderr, "%s: %s not a RAID-6\n", prg, argv[1]);
375 exit_err = 3;
376 goto exitHere;
377 }
378
379 if(info->array.failed_disks > 0) {
380 fprintf(stderr, "%s: %s degraded array\n", prg, argv[1]);
381 exit_err = 8;
382 goto exitHere;
383 }
384
385 printf("layout: %d\n", info->array.layout);
386 printf("disks: %d\n", info->array.raid_disks);
387 printf("component size: %llu\n", info->component_size * 512);
388 printf("total stripes: %llu\n", (info->component_size * 512) / info->array.chunk_size);
389 printf("chunk size: %d\n", info->array.chunk_size);
390 printf("\n");
391
392 comp = info->devs;
393 for(i = 0, active_disks = 0; active_disks < info->array.raid_disks; i++) {
394 printf("disk: %d - offset: %llu - size: %llu - name: %s - slot: %d\n",
395 i, comp->data_offset * 512, comp->component_size * 512,
396 map_dev(comp->disk.major, comp->disk.minor, 0),
397 comp->disk.raid_disk);
398 if(comp->disk.raid_disk >= 0)
399 active_disks++;
400 comp = comp->next;
401 }
402 printf("\n");
403
404 close(mdfd);
405
406 raid_disks = info->array.raid_disks;
407 chunk_size = info->array.chunk_size;
408 layout = info->array.layout;
409 if (strcmp(argv[2], "repair")==0) {
410 if (argc < 6) {
411 fprintf(stderr, "For repair mode, call %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
412 exit_err = 1;
413 goto exitHere;
414 }
415 repair = 1;
416 start = getnum(argv[3], &err);
417 length = 1;
418 failed_disk1 = getnum(argv[4], &err);
419 failed_disk2 = getnum(argv[5], &err);
420
421 if(failed_disk1 >= info->array.raid_disks) {
422 fprintf(stderr, "%s: failed_slot_1 index is higher than number of devices in raid\n", prg);
423 exit_err = 4;
424 goto exitHere;
425 }
426 if(failed_disk2 >= info->array.raid_disks) {
427 fprintf(stderr, "%s: failed_slot_2 index is higher than number of devices in raid\n", prg);
428 exit_err = 4;
429 goto exitHere;
430 }
431 if(failed_disk1 == failed_disk2) {
432 fprintf(stderr, "%s: failed_slot_1 and failed_slot_2 are the same\n", prg);
433 exit_err = 4;
434 goto exitHere;
435 }
436 }
437 else {
438 start = getnum(argv[2], &err);
439 length = getnum(argv[3], &err);
440 }
441
442 if (err) {
443 fprintf(stderr, "%s: Bad number: %s\n", prg, err);
444 exit_err = 4;
445 goto exitHere;
446 }
447
448 if(start > ((info->component_size * 512) / chunk_size)) {
449 start = (info->component_size * 512) / chunk_size;
450 fprintf(stderr, "%s: start beyond disks size\n", prg);
451 }
452
453 if((length == 0) ||
454 ((length + start) > ((info->component_size * 512) / chunk_size))) {
455 length = (info->component_size * 512) / chunk_size - start;
456 }
457
458 disk_name = xmalloc(raid_disks * sizeof(*disk_name));
459 fds = xmalloc(raid_disks * sizeof(*fds));
460 offsets = xcalloc(raid_disks, sizeof(*offsets));
461 buf = xmalloc(raid_disks * chunk_size);
462
463 for(i=0; i<raid_disks; i++) {
464 fds[i] = -1;
465 }
466 close_flag = 1;
467
468 comp = info->devs;
469 for (i=0, active_disks=0; active_disks<raid_disks; i++) {
470 int disk_slot = comp->disk.raid_disk;
471 if(disk_slot >= 0) {
472 disk_name[disk_slot] = map_dev(comp->disk.major, comp->disk.minor, 0);
473 offsets[disk_slot] = comp->data_offset * 512;
474 fds[disk_slot] = open(disk_name[disk_slot], O_RDWR);
475 if (fds[disk_slot] < 0) {
476 perror(disk_name[disk_slot]);
477 fprintf(stderr,"%s: cannot open %s\n", prg, disk_name[disk_slot]);
478 exit_err = 6;
479 goto exitHere;
480 }
481 active_disks++;
482 }
483 comp = comp->next;
484 }
485
486 int rv = check_stripes(info, fds, offsets,
487 raid_disks, chunk_size, level, layout,
488 start, length, disk_name, repair, failed_disk1, failed_disk2);
489 if (rv != 0) {
490 fprintf(stderr,
491 "%s: check_stripes returned %d\n", prg, rv);
492 exit_err = 7;
493 goto exitHere;
494 }
495
496 exitHere:
497
498 if (close_flag)
499 for(i = 0; i < raid_disks; i++)
500 close(fds[i]);
501
502 free(disk_name);
503 free(fds);
504 free(offsets);
505 free(buf);
506
507 exit(exit_err);
508 }