]>
Commit | Line | Data |
---|---|---|
979afcb8 PS |
1 | /* |
2 | * raid6check - extended consistency check for RAID-6 | |
3 | * | |
4 | * Copyright (C) 2011 Piergiorgio Sartor | |
5 | * | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
20 | * | |
21 | * Author: Piergiorgio Sartor | |
22 | * Based on "restripe.c" from "mdadm" codebase | |
23 | */ | |
24 | ||
25 | #include "mdadm.h" | |
26 | #include <stdint.h> | |
8d8ab389 PS |
27 | #include <signal.h> |
28 | #include <sys/mman.h> | |
979afcb8 PS |
29 | |
30 | int geo_map(int block, unsigned long long stripe, int raid_disks, | |
31 | int level, int layout); | |
32 | void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size); | |
33 | void make_tables(void); | |
34 | ||
35 | /* Collect per stripe consistency information */ | |
36 | void raid6_collect(int chunk_size, uint8_t *p, uint8_t *q, | |
37 | char *chunkP, char *chunkQ, int *results) | |
38 | { | |
39 | int i; | |
40 | int data_id; | |
41 | uint8_t Px, Qx; | |
42 | extern uint8_t raid6_gflog[]; | |
43 | ||
44 | for(i = 0; i < chunk_size; i++) { | |
45 | Px = (uint8_t)chunkP[i] ^ (uint8_t)p[i]; | |
46 | Qx = (uint8_t)chunkQ[i] ^ (uint8_t)q[i]; | |
47 | ||
48 | if((Px != 0) && (Qx == 0)) | |
49 | results[i] = -1; | |
50 | ||
51 | if((Px == 0) && (Qx != 0)) | |
52 | results[i] = -2; | |
53 | ||
54 | if((Px != 0) && (Qx != 0)) { | |
55 | data_id = (raid6_gflog[Qx] - raid6_gflog[Px]); | |
56 | if(data_id < 0) data_id += 255; | |
57 | results[i] = data_id; | |
58 | } | |
59 | ||
60 | if((Px == 0) && (Qx == 0)) | |
61 | results[i] = -255; | |
62 | } | |
63 | } | |
64 | ||
65 | /* Try to find out if a specific disk has problems */ | |
66 | int raid6_stats(int *results, int raid_disks, int chunk_size) | |
67 | { | |
68 | int i; | |
69 | int curr_broken_disk = -255; | |
70 | int prev_broken_disk = -255; | |
71 | int broken_status = 0; | |
72 | ||
73 | for(i = 0; i < chunk_size; i++) { | |
74 | ||
75 | if(results[i] != -255) | |
76 | curr_broken_disk = results[i]; | |
77 | ||
78 | if(curr_broken_disk >= raid_disks) | |
79 | broken_status = 2; | |
80 | ||
81 | switch(broken_status) { | |
82 | case 0: | |
83 | if(curr_broken_disk != -255) { | |
84 | prev_broken_disk = curr_broken_disk; | |
85 | broken_status = 1; | |
86 | } | |
87 | break; | |
88 | ||
89 | case 1: | |
90 | if(curr_broken_disk != prev_broken_disk) | |
91 | broken_status = 2; | |
92 | break; | |
93 | ||
94 | case 2: | |
95 | default: | |
96 | curr_broken_disk = prev_broken_disk = -65535; | |
97 | break; | |
98 | } | |
99 | } | |
100 | ||
101 | return curr_broken_disk; | |
102 | } | |
103 | ||
8d8ab389 | 104 | int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets, |
979afcb8 PS |
105 | int raid_disks, int chunk_size, int level, int layout, |
106 | unsigned long long start, unsigned long long length, char *name[]) | |
107 | { | |
108 | /* read the data and p and q blocks, and check we got them right */ | |
109 | char *stripe_buf = malloc(raid_disks * chunk_size); | |
110 | char **stripes = malloc(raid_disks * sizeof(char*)); | |
111 | char **blocks = malloc(raid_disks * sizeof(char*)); | |
112 | uint8_t *p = malloc(chunk_size); | |
113 | uint8_t *q = malloc(chunk_size); | |
114 | int *results = malloc(chunk_size * sizeof(int)); | |
115 | ||
116 | int i; | |
117 | int diskP, diskQ; | |
118 | int data_disks = raid_disks - 2; | |
af3c3750 | 119 | int err = 0; |
8d8ab389 PS |
120 | sighandler_t sig[3]; |
121 | int rv; | |
979afcb8 PS |
122 | |
123 | extern int tables_ready; | |
124 | ||
af3c3750 PS |
125 | if((stripe_buf == NULL) || |
126 | (stripes == NULL) || | |
127 | (blocks == NULL) || | |
128 | (p == NULL) || | |
129 | (q == NULL) || | |
130 | (results == NULL)) { | |
131 | err = 1; | |
132 | goto exitCheck; | |
133 | } | |
134 | ||
979afcb8 PS |
135 | if (!tables_ready) |
136 | make_tables(); | |
137 | ||
138 | for ( i = 0 ; i < raid_disks ; i++) | |
139 | stripes[i] = stripe_buf + i * chunk_size; | |
140 | ||
141 | while (length > 0) { | |
142 | int disk; | |
143 | ||
af3c3750 PS |
144 | printf("pos --> %llu\n", start); |
145 | ||
8d8ab389 PS |
146 | if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) { |
147 | err = 2; | |
148 | goto exitCheck; | |
149 | } | |
150 | sig[0] = signal(SIGTERM, SIG_IGN); | |
151 | sig[1] = signal(SIGINT, SIG_IGN); | |
152 | sig[2] = signal(SIGQUIT, SIG_IGN); | |
153 | rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks); | |
154 | rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks); | |
979afcb8 | 155 | for (i = 0 ; i < raid_disks ; i++) { |
af3c3750 | 156 | lseek64(source[i], offsets[i] + start * chunk_size, 0); |
979afcb8 PS |
157 | read(source[i], stripes[i], chunk_size); |
158 | } | |
8d8ab389 PS |
159 | rv |= sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL); |
160 | rv |= sysfs_set_num(info, NULL, "suspend_hi", 0); | |
161 | rv |= sysfs_set_num(info, NULL, "suspend_lo", 0); | |
162 | signal(SIGQUIT, sig[2]); | |
163 | signal(SIGINT, sig[1]); | |
164 | signal(SIGTERM, sig[0]); | |
165 | if(munlockall() != 0) { | |
166 | err = 3; | |
167 | goto exitCheck; | |
168 | } | |
169 | ||
170 | if(rv != 0) { | |
171 | err = rv * 256; | |
172 | goto exitCheck; | |
173 | } | |
174 | ||
979afcb8 | 175 | for (i = 0 ; i < data_disks ; i++) { |
af3c3750 | 176 | int disk = geo_map(i, start, raid_disks, level, layout); |
979afcb8 PS |
177 | blocks[i] = stripes[disk]; |
178 | printf("%d->%d\n", i, disk); | |
179 | } | |
180 | ||
181 | qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size); | |
af3c3750 | 182 | diskP = geo_map(-1, start, raid_disks, level, layout); |
979afcb8 | 183 | if (memcmp(p, stripes[diskP], chunk_size) != 0) { |
af3c3750 | 184 | printf("P(%d) wrong at %llu\n", diskP, start); |
979afcb8 | 185 | } |
af3c3750 | 186 | diskQ = geo_map(-2, start, raid_disks, level, layout); |
979afcb8 | 187 | if (memcmp(q, stripes[diskQ], chunk_size) != 0) { |
af3c3750 | 188 | printf("Q(%d) wrong at %llu\n", diskQ, start); |
979afcb8 | 189 | } |
af3c3750 | 190 | raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results); |
979afcb8 PS |
191 | disk = raid6_stats(results, raid_disks, chunk_size); |
192 | ||
193 | if(disk >= -2) { | |
af3c3750 | 194 | disk = geo_map(disk, start, raid_disks, level, layout); |
979afcb8 PS |
195 | } |
196 | if(disk >= 0) { | |
af3c3750 PS |
197 | printf("Error detected at %llu: possible failed disk slot: %d --> %s\n", |
198 | start, disk, name[disk]); | |
979afcb8 PS |
199 | } |
200 | if(disk == -65535) { | |
af3c3750 | 201 | printf("Error detected at %llu: disk slot unknown\n", start); |
979afcb8 PS |
202 | } |
203 | ||
af3c3750 PS |
204 | length--; |
205 | start++; | |
979afcb8 PS |
206 | } |
207 | ||
af3c3750 PS |
208 | exitCheck: |
209 | ||
979afcb8 PS |
210 | free(stripe_buf); |
211 | free(stripes); | |
212 | free(blocks); | |
213 | free(p); | |
214 | free(q); | |
215 | free(results); | |
216 | ||
af3c3750 | 217 | return err; |
979afcb8 PS |
218 | } |
219 | ||
220 | unsigned long long getnum(char *str, char **err) | |
221 | { | |
222 | char *e; | |
223 | unsigned long long rv = strtoull(str, &e, 10); | |
224 | if (e==str || *e) { | |
225 | *err = str; | |
226 | return 0; | |
227 | } | |
228 | return rv; | |
229 | } | |
230 | ||
231 | int main(int argc, char *argv[]) | |
232 | { | |
a9c2c6c6 | 233 | /* md_device start length */ |
af3c3750 PS |
234 | int *fds = NULL; |
235 | char *buf = NULL; | |
236 | char **disk_name = NULL; | |
237 | unsigned long long *offsets = NULL; | |
238 | int raid_disks = 0; | |
2cf31121 | 239 | int active_disks; |
af3c3750 PS |
240 | int chunk_size = 0; |
241 | int layout = -1; | |
979afcb8 PS |
242 | int level = 6; |
243 | unsigned long long start, length; | |
244 | int i; | |
a9c2c6c6 | 245 | int mdfd; |
8d8ab389 | 246 | struct mdinfo *info = NULL, *comp = NULL; |
979afcb8 | 247 | char *err = NULL; |
af3c3750 PS |
248 | int exit_err = 0; |
249 | int close_flag = 0; | |
250 | char *prg = strrchr(argv[0], '/'); | |
251 | ||
252 | if (prg == NULL) | |
253 | prg = argv[0]; | |
254 | else | |
255 | prg++; | |
256 | ||
257 | if (argc < 4) { | |
258 | fprintf(stderr, "Usage: %s md_device start_stripe length_stripes\n", prg); | |
259 | exit_err = 1; | |
260 | goto exitHere; | |
979afcb8 PS |
261 | } |
262 | ||
a9c2c6c6 PS |
263 | mdfd = open(argv[1], O_RDONLY); |
264 | if(mdfd < 0) { | |
265 | perror(argv[1]); | |
266 | fprintf(stderr,"%s: cannot open %s\n", prg, argv[1]); | |
af3c3750 PS |
267 | exit_err = 2; |
268 | goto exitHere; | |
a9c2c6c6 PS |
269 | } |
270 | ||
271 | info = sysfs_read(mdfd, -1, | |
272 | GET_LEVEL| | |
273 | GET_LAYOUT| | |
274 | GET_DISKS| | |
2cf31121 | 275 | GET_DEGRADED | |
a9c2c6c6 PS |
276 | GET_COMPONENT| |
277 | GET_CHUNK| | |
278 | GET_DEVS| | |
279 | GET_OFFSET| | |
280 | GET_SIZE); | |
281 | ||
8d8ab389 PS |
282 | if(info == NULL) { |
283 | fprintf(stderr, "%s: Error reading sysfs information of %s\n", prg, argv[1]); | |
284 | exit_err = 9; | |
285 | goto exitHere; | |
286 | } | |
287 | ||
a9c2c6c6 PS |
288 | if(info->array.level != level) { |
289 | fprintf(stderr, "%s: %s not a RAID-6\n", prg, argv[1]); | |
af3c3750 PS |
290 | exit_err = 3; |
291 | goto exitHere; | |
a9c2c6c6 PS |
292 | } |
293 | ||
2cf31121 PS |
294 | if(info->array.failed_disks > 0) { |
295 | fprintf(stderr, "%s: %s degraded array\n", prg, argv[1]); | |
296 | exit_err = 8; | |
297 | goto exitHere; | |
298 | } | |
299 | ||
a9c2c6c6 PS |
300 | printf("layout: %d\n", info->array.layout); |
301 | printf("disks: %d\n", info->array.raid_disks); | |
af3c3750 PS |
302 | printf("component size: %llu\n", info->component_size * 512); |
303 | printf("total stripes: %llu\n", (info->component_size * 512) / info->array.chunk_size); | |
a9c2c6c6 PS |
304 | printf("chunk size: %d\n", info->array.chunk_size); |
305 | printf("\n"); | |
306 | ||
307 | comp = info->devs; | |
2cf31121 | 308 | for(i = 0, active_disks = 0; active_disks < info->array.raid_disks; i++) { |
a9c2c6c6 | 309 | printf("disk: %d - offset: %llu - size: %llu - name: %s - slot: %d\n", |
af3c3750 | 310 | i, comp->data_offset * 512, comp->component_size * 512, |
a9c2c6c6 PS |
311 | map_dev(comp->disk.major, comp->disk.minor, 0), |
312 | comp->disk.raid_disk); | |
2cf31121 PS |
313 | if(comp->disk.raid_disk >= 0) |
314 | active_disks++; | |
a9c2c6c6 PS |
315 | comp = comp->next; |
316 | } | |
317 | printf("\n"); | |
318 | ||
319 | close(mdfd); | |
320 | ||
321 | raid_disks = info->array.raid_disks; | |
322 | chunk_size = info->array.chunk_size; | |
323 | layout = info->array.layout; | |
324 | start = getnum(argv[2], &err); | |
325 | length = getnum(argv[3], &err); | |
326 | ||
979afcb8 | 327 | if (err) { |
a9c2c6c6 | 328 | fprintf(stderr, "%s: Bad number: %s\n", prg, err); |
af3c3750 PS |
329 | exit_err = 4; |
330 | goto exitHere; | |
979afcb8 | 331 | } |
a9c2c6c6 | 332 | |
af3c3750 PS |
333 | if(start > ((info->component_size * 512) / chunk_size)) { |
334 | start = (info->component_size * 512) / chunk_size; | |
335 | fprintf(stderr, "%s: start beyond disks size\n", prg); | |
336 | } | |
a9c2c6c6 | 337 | |
af3c3750 PS |
338 | if((length == 0) || |
339 | ((length + start) > ((info->component_size * 512) / chunk_size))) { | |
340 | length = (info->component_size * 512) / chunk_size - start; | |
979afcb8 | 341 | } |
a9c2c6c6 PS |
342 | |
343 | disk_name = malloc(raid_disks * sizeof(*disk_name)); | |
979afcb8 PS |
344 | fds = malloc(raid_disks * sizeof(*fds)); |
345 | offsets = malloc(raid_disks * sizeof(*offsets)); | |
af3c3750 PS |
346 | buf = malloc(raid_disks * chunk_size); |
347 | ||
348 | if((disk_name == NULL) || | |
349 | (fds == NULL) || | |
350 | (offsets == NULL) || | |
351 | (buf == NULL)) { | |
352 | fprintf(stderr, "%s: allocation fail\n", prg); | |
353 | exit_err = 5; | |
354 | goto exitHere; | |
355 | } | |
356 | ||
979afcb8 | 357 | memset(offsets, 0, raid_disks * sizeof(*offsets)); |
af3c3750 PS |
358 | for(i=0; i<raid_disks; i++) { |
359 | fds[i] = -1; | |
360 | } | |
361 | close_flag = 1; | |
979afcb8 | 362 | |
a9c2c6c6 | 363 | comp = info->devs; |
2cf31121 | 364 | for (i=0, active_disks=0; active_disks<raid_disks; i++) { |
a9c2c6c6 | 365 | int disk_slot = comp->disk.raid_disk; |
2cf31121 PS |
366 | if(disk_slot >= 0) { |
367 | disk_name[disk_slot] = map_dev(comp->disk.major, comp->disk.minor, 0); | |
368 | offsets[disk_slot] = comp->data_offset * 512; | |
369 | fds[disk_slot] = open(disk_name[disk_slot], O_RDWR); | |
370 | if (fds[disk_slot] < 0) { | |
371 | perror(disk_name[disk_slot]); | |
372 | fprintf(stderr,"%s: cannot open %s\n", prg, disk_name[disk_slot]); | |
373 | exit_err = 6; | |
374 | goto exitHere; | |
375 | } | |
376 | active_disks++; | |
979afcb8 | 377 | } |
a9c2c6c6 | 378 | comp = comp->next; |
979afcb8 PS |
379 | } |
380 | ||
8d8ab389 | 381 | int rv = check_stripes(info, fds, offsets, |
979afcb8 | 382 | raid_disks, chunk_size, level, layout, |
a9c2c6c6 | 383 | start, length, disk_name); |
979afcb8 PS |
384 | if (rv != 0) { |
385 | fprintf(stderr, | |
a9c2c6c6 | 386 | "%s: check_stripes returned %d\n", prg, rv); |
af3c3750 PS |
387 | exit_err = 7; |
388 | goto exitHere; | |
979afcb8 PS |
389 | } |
390 | ||
af3c3750 PS |
391 | exitHere: |
392 | ||
393 | if (close_flag) | |
394 | for(i = 0; i < raid_disks; i++) | |
395 | close(fds[i]); | |
396 | ||
a9c2c6c6 | 397 | free(disk_name); |
979afcb8 PS |
398 | free(fds); |
399 | free(offsets); | |
400 | free(buf); | |
401 | ||
af3c3750 | 402 | exit(exit_err); |
979afcb8 | 403 | } |