]> git.ipfire.org Git - thirdparty/mdadm.git/blob - raid6check.c
Move xmalloc et al into their own file
[thirdparty/mdadm.git] / raid6check.c
1 /*
2 * raid6check - extended consistency check for RAID-6
3 *
4 * Copyright (C) 2011 Piergiorgio Sartor
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Piergiorgio Sartor
22 * Based on "restripe.c" from "mdadm" codebase
23 */
24
25 #include "mdadm.h"
26 #include <stdint.h>
27 #include <signal.h>
28 #include <sys/mman.h>
29
30 int geo_map(int block, unsigned long long stripe, int raid_disks,
31 int level, int layout);
32 void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size);
33 void make_tables(void);
34 void ensure_zero_has_size(int chunk_size);
35 void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs);
36 void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
37 uint8_t **ptrs);
38 void xor_blocks(char *target, char **sources, int disks, int size);
39
40
41 /* Collect per stripe consistency information */
42 void raid6_collect(int chunk_size, uint8_t *p, uint8_t *q,
43 char *chunkP, char *chunkQ, int *results)
44 {
45 int i;
46 int data_id;
47 uint8_t Px, Qx;
48 extern uint8_t raid6_gflog[];
49
50 for(i = 0; i < chunk_size; i++) {
51 Px = (uint8_t)chunkP[i] ^ (uint8_t)p[i];
52 Qx = (uint8_t)chunkQ[i] ^ (uint8_t)q[i];
53
54 if((Px != 0) && (Qx == 0))
55 results[i] = -1;
56
57 if((Px == 0) && (Qx != 0))
58 results[i] = -2;
59
60 if((Px != 0) && (Qx != 0)) {
61 data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
62 if(data_id < 0) data_id += 255;
63 results[i] = data_id;
64 }
65
66 if((Px == 0) && (Qx == 0))
67 results[i] = -255;
68 }
69 }
70
71 /* Try to find out if a specific disk has problems */
72 int raid6_stats(int *results, int raid_disks, int chunk_size)
73 {
74 int i;
75 int curr_broken_disk = -255;
76 int prev_broken_disk = -255;
77 int broken_status = 0;
78
79 for(i = 0; i < chunk_size; i++) {
80
81 if(results[i] != -255)
82 curr_broken_disk = results[i];
83
84 if(curr_broken_disk >= raid_disks)
85 broken_status = 2;
86
87 switch(broken_status) {
88 case 0:
89 if(curr_broken_disk != -255) {
90 prev_broken_disk = curr_broken_disk;
91 broken_status = 1;
92 }
93 break;
94
95 case 1:
96 if(curr_broken_disk != prev_broken_disk)
97 broken_status = 2;
98 break;
99
100 case 2:
101 default:
102 curr_broken_disk = prev_broken_disk = -65535;
103 break;
104 }
105 }
106
107 return curr_broken_disk;
108 }
109
110 int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets,
111 int raid_disks, int chunk_size, int level, int layout,
112 unsigned long long start, unsigned long long length, char *name[],
113 int repair, int failed_disk1, int failed_disk2)
114 {
115 /* read the data and p and q blocks, and check we got them right */
116 char *stripe_buf = xmalloc(raid_disks * chunk_size);
117 char **stripes = xmalloc(raid_disks * sizeof(char*));
118 char **blocks = xmalloc(raid_disks * sizeof(char*));
119 uint8_t *p = xmalloc(chunk_size);
120 uint8_t *q = xmalloc(chunk_size);
121 int *results = xmalloc(chunk_size * sizeof(int));
122
123 int i;
124 int diskP, diskQ;
125 int data_disks = raid_disks - 2;
126 int err = 0;
127 sighandler_t sig[3];
128 int rv;
129
130 extern int tables_ready;
131
132 if (!tables_ready)
133 make_tables();
134
135 for ( i = 0 ; i < raid_disks ; i++)
136 stripes[i] = stripe_buf + i * chunk_size;
137
138 while (length > 0) {
139 int disk;
140
141 printf("pos --> %llu\n", start);
142
143 if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
144 err = 2;
145 goto exitCheck;
146 }
147 sig[0] = signal(SIGTERM, SIG_IGN);
148 sig[1] = signal(SIGINT, SIG_IGN);
149 sig[2] = signal(SIGQUIT, SIG_IGN);
150 rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
151 rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
152 for (i = 0 ; i < raid_disks ; i++) {
153 lseek64(source[i], offsets[i] + start * chunk_size, 0);
154 read(source[i], stripes[i], chunk_size);
155 }
156 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
157 rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
158 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
159 signal(SIGQUIT, sig[2]);
160 signal(SIGINT, sig[1]);
161 signal(SIGTERM, sig[0]);
162 if(munlockall() != 0) {
163 err = 3;
164 goto exitCheck;
165 }
166
167 if(rv != 0) {
168 err = rv * 256;
169 goto exitCheck;
170 }
171
172 for (i = 0 ; i < data_disks ; i++) {
173 int disk = geo_map(i, start, raid_disks, level, layout);
174 blocks[i] = stripes[disk];
175 printf("%d->%d\n", i, disk);
176 }
177
178 qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
179 diskP = geo_map(-1, start, raid_disks, level, layout);
180 diskQ = geo_map(-2, start, raid_disks, level, layout);
181 blocks[data_disks] = stripes[diskP];
182 blocks[data_disks+1] = stripes[diskQ];
183
184 if (memcmp(p, stripes[diskP], chunk_size) != 0) {
185 printf("P(%d) wrong at %llu\n", diskP, start);
186 }
187 if (memcmp(q, stripes[diskQ], chunk_size) != 0) {
188 printf("Q(%d) wrong at %llu\n", diskQ, start);
189 }
190 raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results);
191 disk = raid6_stats(results, raid_disks, chunk_size);
192
193 if(disk >= -2) {
194 disk = geo_map(disk, start, raid_disks, level, layout);
195 }
196 if(disk >= 0) {
197 printf("Error detected at %llu: possible failed disk slot: %d --> %s\n",
198 start, disk, name[disk]);
199 }
200 if(disk == -65535) {
201 printf("Error detected at %llu: disk slot unknown\n", start);
202 }
203 if(repair == 1) {
204 printf("Repairing stripe %llu\n", start);
205 printf("Assuming slots %d (%s) and %d (%s) are incorrect\n",
206 failed_disk1, name[failed_disk1],
207 failed_disk2, name[failed_disk2]);
208
209 if (failed_disk1 == diskQ || failed_disk2 == diskQ) {
210 char *all_but_failed_blocks[data_disks];
211 int failed_data;
212 int failed_block_index;
213
214 if (failed_disk1 == diskQ)
215 failed_data = failed_disk2;
216 else
217 failed_data = failed_disk1;
218 printf("Repairing D/P(%d) and Q\n", failed_data);
219 failed_block_index = geo_map(
220 failed_data, start, raid_disks,
221 level, layout);
222 for (i=0; i < data_disks; i++)
223 if (failed_block_index == i)
224 all_but_failed_blocks[i] = stripes[diskP];
225 else
226 all_but_failed_blocks[i] = blocks[i];
227 xor_blocks(stripes[failed_data],
228 all_but_failed_blocks, data_disks, chunk_size);
229 qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
230 } else {
231 ensure_zero_has_size(chunk_size);
232 if (failed_disk1 == diskP || failed_disk2 == diskP) {
233 int failed_data, failed_block_index;
234 if (failed_disk1 == diskP)
235 failed_data = failed_disk2;
236 else
237 failed_data = failed_disk1;
238 failed_block_index = geo_map(failed_data, start, raid_disks, level, layout);
239 printf("Repairing D(%d) and P\n", failed_data);
240 raid6_datap_recov(raid_disks, chunk_size, failed_block_index, (uint8_t**)blocks);
241 } else {
242 printf("Repairing D and D\n");
243 int failed_block_index1 = geo_map(failed_disk1, start, raid_disks, level, layout);
244 int failed_block_index2 = geo_map(failed_disk2, start, raid_disks, level, layout);
245 if (failed_block_index1 > failed_block_index2) {
246 int t = failed_block_index1;
247 failed_block_index1 = failed_block_index2;
248 failed_block_index2 = t;
249 }
250 raid6_2data_recov(raid_disks, chunk_size, failed_block_index1, failed_block_index2, (uint8_t**)blocks);
251 }
252 }
253 if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
254 err = 2;
255 goto exitCheck;
256 }
257 sig[0] = signal(SIGTERM, SIG_IGN);
258 sig[1] = signal(SIGINT, SIG_IGN);
259 sig[2] = signal(SIGQUIT, SIG_IGN);
260 rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
261 rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
262 lseek64(source[failed_disk1], offsets[failed_disk1] + start * chunk_size, 0);
263 write(source[failed_disk1], stripes[failed_disk1], chunk_size);
264 lseek64(source[failed_disk2], offsets[failed_disk2] + start * chunk_size, 0);
265 write(source[failed_disk2], stripes[failed_disk2], chunk_size);
266 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
267 rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
268 rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
269 signal(SIGQUIT, sig[2]);
270 signal(SIGINT, sig[1]);
271 signal(SIGTERM, sig[0]);
272 if(munlockall() != 0) {
273 err = 3;
274 goto exitCheck;
275 }
276
277 if(rv != 0) {
278 err = rv * 256;
279 goto exitCheck;
280 }
281 }
282
283
284 length--;
285 start++;
286 }
287
288 exitCheck:
289
290 free(stripe_buf);
291 free(stripes);
292 free(blocks);
293 free(p);
294 free(q);
295 free(results);
296
297 return err;
298 }
299
300 unsigned long long getnum(char *str, char **err)
301 {
302 char *e;
303 unsigned long long rv = strtoull(str, &e, 10);
304 if (e==str || *e) {
305 *err = str;
306 return 0;
307 }
308 return rv;
309 }
310
311 int main(int argc, char *argv[])
312 {
313 /* md_device start length */
314 int *fds = NULL;
315 char *buf = NULL;
316 char **disk_name = NULL;
317 unsigned long long *offsets = NULL;
318 int raid_disks = 0;
319 int active_disks;
320 int chunk_size = 0;
321 int layout = -1;
322 int level = 6;
323 int repair = 0;
324 int failed_disk1, failed_disk2;
325 unsigned long long start, length;
326 int i;
327 int mdfd;
328 struct mdinfo *info = NULL, *comp = NULL;
329 char *err = NULL;
330 int exit_err = 0;
331 int close_flag = 0;
332 char *prg = strrchr(argv[0], '/');
333
334 if (prg == NULL)
335 prg = argv[0];
336 else
337 prg++;
338
339 if (argc < 4) {
340 fprintf(stderr, "Usage: %s md_device start_stripe length_stripes\n", prg);
341 fprintf(stderr, " or: %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
342 exit_err = 1;
343 goto exitHere;
344 }
345
346 mdfd = open(argv[1], O_RDONLY);
347 if(mdfd < 0) {
348 perror(argv[1]);
349 fprintf(stderr, "%s: cannot open %s\n", prg, argv[1]);
350 exit_err = 2;
351 goto exitHere;
352 }
353
354 info = sysfs_read(mdfd, -1,
355 GET_LEVEL|
356 GET_LAYOUT|
357 GET_DISKS|
358 GET_DEGRADED |
359 GET_COMPONENT|
360 GET_CHUNK|
361 GET_DEVS|
362 GET_OFFSET|
363 GET_SIZE);
364
365 if(info == NULL) {
366 fprintf(stderr, "%s: Error reading sysfs information of %s\n", prg, argv[1]);
367 exit_err = 9;
368 goto exitHere;
369 }
370
371 if(info->array.level != level) {
372 fprintf(stderr, "%s: %s not a RAID-6\n", prg, argv[1]);
373 exit_err = 3;
374 goto exitHere;
375 }
376
377 if(info->array.failed_disks > 0) {
378 fprintf(stderr, "%s: %s degraded array\n", prg, argv[1]);
379 exit_err = 8;
380 goto exitHere;
381 }
382
383 printf("layout: %d\n", info->array.layout);
384 printf("disks: %d\n", info->array.raid_disks);
385 printf("component size: %llu\n", info->component_size * 512);
386 printf("total stripes: %llu\n", (info->component_size * 512) / info->array.chunk_size);
387 printf("chunk size: %d\n", info->array.chunk_size);
388 printf("\n");
389
390 comp = info->devs;
391 for(i = 0, active_disks = 0; active_disks < info->array.raid_disks; i++) {
392 printf("disk: %d - offset: %llu - size: %llu - name: %s - slot: %d\n",
393 i, comp->data_offset * 512, comp->component_size * 512,
394 map_dev(comp->disk.major, comp->disk.minor, 0),
395 comp->disk.raid_disk);
396 if(comp->disk.raid_disk >= 0)
397 active_disks++;
398 comp = comp->next;
399 }
400 printf("\n");
401
402 close(mdfd);
403
404 raid_disks = info->array.raid_disks;
405 chunk_size = info->array.chunk_size;
406 layout = info->array.layout;
407 if (strcmp(argv[2], "repair")==0) {
408 if (argc < 6) {
409 fprintf(stderr, "For repair mode, call %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
410 exit_err = 1;
411 goto exitHere;
412 }
413 repair = 1;
414 start = getnum(argv[3], &err);
415 length = 1;
416 failed_disk1 = getnum(argv[4], &err);
417 failed_disk2 = getnum(argv[5], &err);
418
419 if(failed_disk1 > info->array.raid_disks) {
420 fprintf(stderr, "%s: failed_slot_1 index is higher than number of devices in raid\n", prg);
421 exit_err = 4;
422 goto exitHere;
423 }
424 if(failed_disk2 > info->array.raid_disks) {
425 fprintf(stderr, "%s: failed_slot_2 index is higher than number of devices in raid\n", prg);
426 exit_err = 4;
427 goto exitHere;
428 }
429 if(failed_disk1 == failed_disk2) {
430 fprintf(stderr, "%s: failed_slot_1 and failed_slot_2 are the same\n", prg);
431 exit_err = 4;
432 goto exitHere;
433 }
434 }
435 else {
436 start = getnum(argv[2], &err);
437 length = getnum(argv[3], &err);
438 }
439
440 if (err) {
441 fprintf(stderr, "%s: Bad number: %s\n", prg, err);
442 exit_err = 4;
443 goto exitHere;
444 }
445
446 if(start > ((info->component_size * 512) / chunk_size)) {
447 start = (info->component_size * 512) / chunk_size;
448 fprintf(stderr, "%s: start beyond disks size\n", prg);
449 }
450
451 if((length == 0) ||
452 ((length + start) > ((info->component_size * 512) / chunk_size))) {
453 length = (info->component_size * 512) / chunk_size - start;
454 }
455
456 disk_name = xmalloc(raid_disks * sizeof(*disk_name));
457 fds = xmalloc(raid_disks * sizeof(*fds));
458 offsets = xcalloc(raid_disks, sizeof(*offsets));
459 buf = xmalloc(raid_disks * chunk_size);
460
461 for(i=0; i<raid_disks; i++) {
462 fds[i] = -1;
463 }
464 close_flag = 1;
465
466 comp = info->devs;
467 for (i=0, active_disks=0; active_disks<raid_disks; i++) {
468 int disk_slot = comp->disk.raid_disk;
469 if(disk_slot >= 0) {
470 disk_name[disk_slot] = map_dev(comp->disk.major, comp->disk.minor, 0);
471 offsets[disk_slot] = comp->data_offset * 512;
472 fds[disk_slot] = open(disk_name[disk_slot], O_RDWR);
473 if (fds[disk_slot] < 0) {
474 perror(disk_name[disk_slot]);
475 fprintf(stderr,"%s: cannot open %s\n", prg, disk_name[disk_slot]);
476 exit_err = 6;
477 goto exitHere;
478 }
479 active_disks++;
480 }
481 comp = comp->next;
482 }
483
484 int rv = check_stripes(info, fds, offsets,
485 raid_disks, chunk_size, level, layout,
486 start, length, disk_name, repair, failed_disk1, failed_disk2);
487 if (rv != 0) {
488 fprintf(stderr,
489 "%s: check_stripes returned %d\n", prg, rv);
490 exit_err = 7;
491 goto exitHere;
492 }
493
494 exitHere:
495
496 if (close_flag)
497 for(i = 0; i < raid_disks; i++)
498 close(fds[i]);
499
500 free(disk_name);
501 free(fds);
502 free(offsets);
503 free(buf);
504
505 exit(exit_err);
506 }