]> git.ipfire.org Git - thirdparty/mdadm.git/blame - raid6check.c
raid6check: Fix off-by-one in argument check
[thirdparty/mdadm.git] / raid6check.c
CommitLineData
979afcb8
PS
1/*
2 * raid6check - extended consistency check for RAID-6
3 *
4 * Copyright (C) 2011 Piergiorgio Sartor
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Piergiorgio Sartor
22 * Based on "restripe.c" from "mdadm" codebase
23 */
24
25#include "mdadm.h"
26#include <stdint.h>
8d8ab389
PS
27#include <signal.h>
28#include <sys/mman.h>
979afcb8
PS
29
30int geo_map(int block, unsigned long long stripe, int raid_disks,
31 int level, int layout);
32void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size);
33void make_tables(void);
f2e29ad6
RB
34void ensure_zero_has_size(int chunk_size);
35void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs);
36void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
37 uint8_t **ptrs);
38void xor_blocks(char *target, char **sources, int disks, int size);
39
979afcb8
PS
40
41/* Collect per stripe consistency information */
42void raid6_collect(int chunk_size, uint8_t *p, uint8_t *q,
43 char *chunkP, char *chunkQ, int *results)
44{
45 int i;
46 int data_id;
47 uint8_t Px, Qx;
48 extern uint8_t raid6_gflog[];
49
50 for(i = 0; i < chunk_size; i++) {
51 Px = (uint8_t)chunkP[i] ^ (uint8_t)p[i];
52 Qx = (uint8_t)chunkQ[i] ^ (uint8_t)q[i];
53
54 if((Px != 0) && (Qx == 0))
55 results[i] = -1;
56
57 if((Px == 0) && (Qx != 0))
58 results[i] = -2;
59
60 if((Px != 0) && (Qx != 0)) {
61 data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
62 if(data_id < 0) data_id += 255;
63 results[i] = data_id;
64 }
65
66 if((Px == 0) && (Qx == 0))
67 results[i] = -255;
68 }
69}
70
71/* Try to find out if a specific disk has problems */
72int raid6_stats(int *results, int raid_disks, int chunk_size)
73{
74 int i;
75 int curr_broken_disk = -255;
76 int prev_broken_disk = -255;
77 int broken_status = 0;
78
79 for(i = 0; i < chunk_size; i++) {
80
81 if(results[i] != -255)
82 curr_broken_disk = results[i];
83
84 if(curr_broken_disk >= raid_disks)
85 broken_status = 2;
86
87 switch(broken_status) {
88 case 0:
89 if(curr_broken_disk != -255) {
90 prev_broken_disk = curr_broken_disk;
91 broken_status = 1;
92 }
93 break;
94
95 case 1:
96 if(curr_broken_disk != prev_broken_disk)
97 broken_status = 2;
98 break;
99
100 case 2:
101 default:
102 curr_broken_disk = prev_broken_disk = -65535;
103 break;
104 }
105 }
106
107 return curr_broken_disk;
108}
109
8d8ab389 110int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets,
979afcb8 111 int raid_disks, int chunk_size, int level, int layout,
f2e29ad6
RB
112 unsigned long long start, unsigned long long length, char *name[],
113 int repair, int failed_disk1, int failed_disk2)
979afcb8
PS
114{
115 /* read the data and p and q blocks, and check we got them right */
503975b9
N
116 char *stripe_buf = xmalloc(raid_disks * chunk_size);
117 char **stripes = xmalloc(raid_disks * sizeof(char*));
118 char **blocks = xmalloc(raid_disks * sizeof(char*));
119 uint8_t *p = xmalloc(chunk_size);
120 uint8_t *q = xmalloc(chunk_size);
121 int *results = xmalloc(chunk_size * sizeof(int));
979afcb8
PS
122
123 int i;
124 int diskP, diskQ;
125 int data_disks = raid_disks - 2;
af3c3750 126 int err = 0;
8d8ab389
PS
127 sighandler_t sig[3];
128 int rv;
979afcb8
PS
129
130 extern int tables_ready;
131
132 if (!tables_ready)
133 make_tables();
134
135 for ( i = 0 ; i < raid_disks ; i++)
136 stripes[i] = stripe_buf + i * chunk_size;
137
138 while (length > 0) {
139 int disk;
140
af3c3750
PS
141 printf("pos --> %llu\n", start);
142
8d8ab389
PS
143 if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
144 err = 2;
145 goto exitCheck;
146 }
147 sig[0] = signal(SIGTERM, SIG_IGN);
148 sig[1] = signal(SIGINT, SIG_IGN);
149 sig[2] = signal(SIGQUIT, SIG_IGN);
150 rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
151 rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
979afcb8 152 for (i = 0 ; i < raid_disks ; i++) {
af3c3750 153 lseek64(source[i], offsets[i] + start * chunk_size, 0);
979afcb8
PS
154 read(source[i], stripes[i], chunk_size);
155 }
8d8ab389
PS
156 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
157 rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
158 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
159 signal(SIGQUIT, sig[2]);
160 signal(SIGINT, sig[1]);
161 signal(SIGTERM, sig[0]);
162 if(munlockall() != 0) {
163 err = 3;
164 goto exitCheck;
165 }
166
167 if(rv != 0) {
168 err = rv * 256;
169 goto exitCheck;
170 }
171
979afcb8 172 for (i = 0 ; i < data_disks ; i++) {
af3c3750 173 int disk = geo_map(i, start, raid_disks, level, layout);
979afcb8
PS
174 blocks[i] = stripes[disk];
175 printf("%d->%d\n", i, disk);
176 }
177
178 qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
af3c3750 179 diskP = geo_map(-1, start, raid_disks, level, layout);
f2e29ad6
RB
180 diskQ = geo_map(-2, start, raid_disks, level, layout);
181 blocks[data_disks] = stripes[diskP];
182 blocks[data_disks+1] = stripes[diskQ];
183
979afcb8 184 if (memcmp(p, stripes[diskP], chunk_size) != 0) {
af3c3750 185 printf("P(%d) wrong at %llu\n", diskP, start);
979afcb8 186 }
979afcb8 187 if (memcmp(q, stripes[diskQ], chunk_size) != 0) {
af3c3750 188 printf("Q(%d) wrong at %llu\n", diskQ, start);
979afcb8 189 }
af3c3750 190 raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results);
979afcb8
PS
191 disk = raid6_stats(results, raid_disks, chunk_size);
192
193 if(disk >= -2) {
af3c3750 194 disk = geo_map(disk, start, raid_disks, level, layout);
979afcb8
PS
195 }
196 if(disk >= 0) {
af3c3750
PS
197 printf("Error detected at %llu: possible failed disk slot: %d --> %s\n",
198 start, disk, name[disk]);
979afcb8
PS
199 }
200 if(disk == -65535) {
af3c3750 201 printf("Error detected at %llu: disk slot unknown\n", start);
979afcb8 202 }
f2e29ad6
RB
203 if(repair == 1) {
204 printf("Repairing stripe %llu\n", start);
205 printf("Assuming slots %d (%s) and %d (%s) are incorrect\n",
206 failed_disk1, name[failed_disk1],
207 failed_disk2, name[failed_disk2]);
208
209 if (failed_disk1 == diskQ || failed_disk2 == diskQ) {
210 char *all_but_failed_blocks[data_disks];
211 int failed_data;
212 int failed_block_index;
213
214 if (failed_disk1 == diskQ)
215 failed_data = failed_disk2;
216 else
217 failed_data = failed_disk1;
218 printf("Repairing D/P(%d) and Q\n", failed_data);
219 failed_block_index = geo_map(
220 failed_data, start, raid_disks,
221 level, layout);
222 for (i=0; i < data_disks; i++)
223 if (failed_block_index == i)
224 all_but_failed_blocks[i] = stripes[diskP];
225 else
226 all_but_failed_blocks[i] = blocks[i];
227 xor_blocks(stripes[failed_data],
228 all_but_failed_blocks, data_disks, chunk_size);
229 qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
230 } else {
231 ensure_zero_has_size(chunk_size);
232 if (failed_disk1 == diskP || failed_disk2 == diskP) {
233 int failed_data, failed_block_index;
234 if (failed_disk1 == diskP)
235 failed_data = failed_disk2;
236 else
237 failed_data = failed_disk1;
238 failed_block_index = geo_map(failed_data, start, raid_disks, level, layout);
239 printf("Repairing D(%d) and P\n", failed_data);
240 raid6_datap_recov(raid_disks, chunk_size, failed_block_index, (uint8_t**)blocks);
241 } else {
242 printf("Repairing D and D\n");
243 int failed_block_index1 = geo_map(failed_disk1, start, raid_disks, level, layout);
244 int failed_block_index2 = geo_map(failed_disk2, start, raid_disks, level, layout);
245 if (failed_block_index1 > failed_block_index2) {
246 int t = failed_block_index1;
247 failed_block_index1 = failed_block_index2;
248 failed_block_index2 = t;
249 }
250 raid6_2data_recov(raid_disks, chunk_size, failed_block_index1, failed_block_index2, (uint8_t**)blocks);
251 }
252 }
253 if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
254 err = 2;
255 goto exitCheck;
256 }
257 sig[0] = signal(SIGTERM, SIG_IGN);
258 sig[1] = signal(SIGINT, SIG_IGN);
259 sig[2] = signal(SIGQUIT, SIG_IGN);
260 rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
261 rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
262 lseek64(source[failed_disk1], offsets[failed_disk1] + start * chunk_size, 0);
263 write(source[failed_disk1], stripes[failed_disk1], chunk_size);
264 lseek64(source[failed_disk2], offsets[failed_disk2] + start * chunk_size, 0);
265 write(source[failed_disk2], stripes[failed_disk2], chunk_size);
266 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
267 rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
268 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
269 signal(SIGQUIT, sig[2]);
270 signal(SIGINT, sig[1]);
271 signal(SIGTERM, sig[0]);
272 if(munlockall() != 0) {
273 err = 3;
274 goto exitCheck;
275 }
276
277 if(rv != 0) {
278 err = rv * 256;
279 goto exitCheck;
280 }
281 }
282
979afcb8 283
af3c3750
PS
284 length--;
285 start++;
979afcb8
PS
286 }
287
af3c3750
PS
288exitCheck:
289
979afcb8
PS
290 free(stripe_buf);
291 free(stripes);
292 free(blocks);
293 free(p);
294 free(q);
295 free(results);
296
af3c3750 297 return err;
979afcb8
PS
298}
299
300unsigned long long getnum(char *str, char **err)
301{
302 char *e;
303 unsigned long long rv = strtoull(str, &e, 10);
304 if (e==str || *e) {
305 *err = str;
306 return 0;
307 }
308 return rv;
309}
310
311int main(int argc, char *argv[])
312{
a9c2c6c6 313 /* md_device start length */
af3c3750
PS
314 int *fds = NULL;
315 char *buf = NULL;
316 char **disk_name = NULL;
317 unsigned long long *offsets = NULL;
318 int raid_disks = 0;
2cf31121 319 int active_disks;
af3c3750
PS
320 int chunk_size = 0;
321 int layout = -1;
979afcb8 322 int level = 6;
f2e29ad6
RB
323 int repair = 0;
324 int failed_disk1, failed_disk2;
979afcb8
PS
325 unsigned long long start, length;
326 int i;
a9c2c6c6 327 int mdfd;
8d8ab389 328 struct mdinfo *info = NULL, *comp = NULL;
979afcb8 329 char *err = NULL;
af3c3750
PS
330 int exit_err = 0;
331 int close_flag = 0;
332 char *prg = strrchr(argv[0], '/');
333
334 if (prg == NULL)
335 prg = argv[0];
336 else
337 prg++;
338
339 if (argc < 4) {
340 fprintf(stderr, "Usage: %s md_device start_stripe length_stripes\n", prg);
f2e29ad6 341 fprintf(stderr, " or: %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
af3c3750
PS
342 exit_err = 1;
343 goto exitHere;
979afcb8
PS
344 }
345
a9c2c6c6
PS
346 mdfd = open(argv[1], O_RDONLY);
347 if(mdfd < 0) {
348 perror(argv[1]);
e7b84f9d 349 fprintf(stderr, "%s: cannot open %s\n", prg, argv[1]);
af3c3750
PS
350 exit_err = 2;
351 goto exitHere;
a9c2c6c6
PS
352 }
353
354 info = sysfs_read(mdfd, -1,
355 GET_LEVEL|
356 GET_LAYOUT|
357 GET_DISKS|
2cf31121 358 GET_DEGRADED |
a9c2c6c6
PS
359 GET_COMPONENT|
360 GET_CHUNK|
361 GET_DEVS|
362 GET_OFFSET|
363 GET_SIZE);
364
8d8ab389
PS
365 if(info == NULL) {
366 fprintf(stderr, "%s: Error reading sysfs information of %s\n", prg, argv[1]);
367 exit_err = 9;
368 goto exitHere;
369 }
370
a9c2c6c6
PS
371 if(info->array.level != level) {
372 fprintf(stderr, "%s: %s not a RAID-6\n", prg, argv[1]);
af3c3750
PS
373 exit_err = 3;
374 goto exitHere;
a9c2c6c6
PS
375 }
376
2cf31121
PS
377 if(info->array.failed_disks > 0) {
378 fprintf(stderr, "%s: %s degraded array\n", prg, argv[1]);
379 exit_err = 8;
380 goto exitHere;
381 }
382
a9c2c6c6
PS
383 printf("layout: %d\n", info->array.layout);
384 printf("disks: %d\n", info->array.raid_disks);
af3c3750
PS
385 printf("component size: %llu\n", info->component_size * 512);
386 printf("total stripes: %llu\n", (info->component_size * 512) / info->array.chunk_size);
a9c2c6c6
PS
387 printf("chunk size: %d\n", info->array.chunk_size);
388 printf("\n");
389
390 comp = info->devs;
2cf31121 391 for(i = 0, active_disks = 0; active_disks < info->array.raid_disks; i++) {
a9c2c6c6 392 printf("disk: %d - offset: %llu - size: %llu - name: %s - slot: %d\n",
af3c3750 393 i, comp->data_offset * 512, comp->component_size * 512,
a9c2c6c6
PS
394 map_dev(comp->disk.major, comp->disk.minor, 0),
395 comp->disk.raid_disk);
2cf31121
PS
396 if(comp->disk.raid_disk >= 0)
397 active_disks++;
a9c2c6c6
PS
398 comp = comp->next;
399 }
400 printf("\n");
401
402 close(mdfd);
403
404 raid_disks = info->array.raid_disks;
405 chunk_size = info->array.chunk_size;
406 layout = info->array.layout;
f2e29ad6
RB
407 if (strcmp(argv[2], "repair")==0) {
408 if (argc < 6) {
409 fprintf(stderr, "For repair mode, call %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
410 exit_err = 1;
411 goto exitHere;
412 }
413 repair = 1;
414 start = getnum(argv[3], &err);
415 length = 1;
416 failed_disk1 = getnum(argv[4], &err);
417 failed_disk2 = getnum(argv[5], &err);
418
b67e45b8 419 if(failed_disk1 >= info->array.raid_disks) {
f2e29ad6
RB
420 fprintf(stderr, "%s: failed_slot_1 index is higher than number of devices in raid\n", prg);
421 exit_err = 4;
422 goto exitHere;
423 }
b67e45b8 424 if(failed_disk2 >= info->array.raid_disks) {
f2e29ad6
RB
425 fprintf(stderr, "%s: failed_slot_2 index is higher than number of devices in raid\n", prg);
426 exit_err = 4;
427 goto exitHere;
428 }
429 if(failed_disk1 == failed_disk2) {
430 fprintf(stderr, "%s: failed_slot_1 and failed_slot_2 are the same\n", prg);
431 exit_err = 4;
432 goto exitHere;
433 }
434 }
435 else {
436 start = getnum(argv[2], &err);
437 length = getnum(argv[3], &err);
438 }
a9c2c6c6 439
979afcb8 440 if (err) {
a9c2c6c6 441 fprintf(stderr, "%s: Bad number: %s\n", prg, err);
af3c3750
PS
442 exit_err = 4;
443 goto exitHere;
979afcb8 444 }
a9c2c6c6 445
af3c3750
PS
446 if(start > ((info->component_size * 512) / chunk_size)) {
447 start = (info->component_size * 512) / chunk_size;
448 fprintf(stderr, "%s: start beyond disks size\n", prg);
449 }
a9c2c6c6 450
af3c3750
PS
451 if((length == 0) ||
452 ((length + start) > ((info->component_size * 512) / chunk_size))) {
453 length = (info->component_size * 512) / chunk_size - start;
979afcb8 454 }
a9c2c6c6 455
503975b9
N
456 disk_name = xmalloc(raid_disks * sizeof(*disk_name));
457 fds = xmalloc(raid_disks * sizeof(*fds));
458 offsets = xcalloc(raid_disks, sizeof(*offsets));
459 buf = xmalloc(raid_disks * chunk_size);
af3c3750 460
af3c3750
PS
461 for(i=0; i<raid_disks; i++) {
462 fds[i] = -1;
463 }
464 close_flag = 1;
979afcb8 465
a9c2c6c6 466 comp = info->devs;
2cf31121 467 for (i=0, active_disks=0; active_disks<raid_disks; i++) {
a9c2c6c6 468 int disk_slot = comp->disk.raid_disk;
2cf31121
PS
469 if(disk_slot >= 0) {
470 disk_name[disk_slot] = map_dev(comp->disk.major, comp->disk.minor, 0);
471 offsets[disk_slot] = comp->data_offset * 512;
472 fds[disk_slot] = open(disk_name[disk_slot], O_RDWR);
473 if (fds[disk_slot] < 0) {
474 perror(disk_name[disk_slot]);
475 fprintf(stderr,"%s: cannot open %s\n", prg, disk_name[disk_slot]);
476 exit_err = 6;
477 goto exitHere;
478 }
479 active_disks++;
979afcb8 480 }
a9c2c6c6 481 comp = comp->next;
979afcb8
PS
482 }
483
8d8ab389 484 int rv = check_stripes(info, fds, offsets,
979afcb8 485 raid_disks, chunk_size, level, layout,
f2e29ad6 486 start, length, disk_name, repair, failed_disk1, failed_disk2);
979afcb8
PS
487 if (rv != 0) {
488 fprintf(stderr,
a9c2c6c6 489 "%s: check_stripes returned %d\n", prg, rv);
af3c3750
PS
490 exit_err = 7;
491 goto exitHere;
979afcb8
PS
492 }
493
af3c3750
PS
494exitHere:
495
496 if (close_flag)
497 for(i = 0; i < raid_disks; i++)
498 close(fds[i]);
499
a9c2c6c6 500 free(disk_name);
979afcb8
PS
501 free(fds);
502 free(offsets);
503 free(buf);
504
af3c3750 505 exit(exit_err);
979afcb8 506}