]>
Commit | Line | Data |
---|---|---|
979afcb8 PS |
1 | /* |
2 | * raid6check - extended consistency check for RAID-6 | |
3 | * | |
4 | * Copyright (C) 2011 Piergiorgio Sartor | |
5 | * | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
20 | * | |
21 | * Author: Piergiorgio Sartor | |
22 | * Based on "restripe.c" from "mdadm" codebase | |
23 | */ | |
24 | ||
25 | #include "mdadm.h" | |
26 | #include <stdint.h> | |
8d8ab389 PS |
27 | #include <signal.h> |
28 | #include <sys/mman.h> | |
979afcb8 PS |
29 | |
30 | int geo_map(int block, unsigned long long stripe, int raid_disks, | |
31 | int level, int layout); | |
32 | void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size); | |
33 | void make_tables(void); | |
34 | ||
35 | /* Collect per stripe consistency information */ | |
36 | void raid6_collect(int chunk_size, uint8_t *p, uint8_t *q, | |
37 | char *chunkP, char *chunkQ, int *results) | |
38 | { | |
39 | int i; | |
40 | int data_id; | |
41 | uint8_t Px, Qx; | |
42 | extern uint8_t raid6_gflog[]; | |
43 | ||
44 | for(i = 0; i < chunk_size; i++) { | |
45 | Px = (uint8_t)chunkP[i] ^ (uint8_t)p[i]; | |
46 | Qx = (uint8_t)chunkQ[i] ^ (uint8_t)q[i]; | |
47 | ||
48 | if((Px != 0) && (Qx == 0)) | |
49 | results[i] = -1; | |
50 | ||
51 | if((Px == 0) && (Qx != 0)) | |
52 | results[i] = -2; | |
53 | ||
54 | if((Px != 0) && (Qx != 0)) { | |
55 | data_id = (raid6_gflog[Qx] - raid6_gflog[Px]); | |
56 | if(data_id < 0) data_id += 255; | |
57 | results[i] = data_id; | |
58 | } | |
59 | ||
60 | if((Px == 0) && (Qx == 0)) | |
61 | results[i] = -255; | |
62 | } | |
63 | } | |
64 | ||
65 | /* Try to find out if a specific disk has problems */ | |
66 | int raid6_stats(int *results, int raid_disks, int chunk_size) | |
67 | { | |
68 | int i; | |
69 | int curr_broken_disk = -255; | |
70 | int prev_broken_disk = -255; | |
71 | int broken_status = 0; | |
72 | ||
73 | for(i = 0; i < chunk_size; i++) { | |
74 | ||
75 | if(results[i] != -255) | |
76 | curr_broken_disk = results[i]; | |
77 | ||
78 | if(curr_broken_disk >= raid_disks) | |
79 | broken_status = 2; | |
80 | ||
81 | switch(broken_status) { | |
82 | case 0: | |
83 | if(curr_broken_disk != -255) { | |
84 | prev_broken_disk = curr_broken_disk; | |
85 | broken_status = 1; | |
86 | } | |
87 | break; | |
88 | ||
89 | case 1: | |
90 | if(curr_broken_disk != prev_broken_disk) | |
91 | broken_status = 2; | |
92 | break; | |
93 | ||
94 | case 2: | |
95 | default: | |
96 | curr_broken_disk = prev_broken_disk = -65535; | |
97 | break; | |
98 | } | |
99 | } | |
100 | ||
101 | return curr_broken_disk; | |
102 | } | |
103 | ||
8d8ab389 | 104 | int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets, |
979afcb8 PS |
105 | int raid_disks, int chunk_size, int level, int layout, |
106 | unsigned long long start, unsigned long long length, char *name[]) | |
107 | { | |
108 | /* read the data and p and q blocks, and check we got them right */ | |
503975b9 N |
109 | char *stripe_buf = xmalloc(raid_disks * chunk_size); |
110 | char **stripes = xmalloc(raid_disks * sizeof(char*)); | |
111 | char **blocks = xmalloc(raid_disks * sizeof(char*)); | |
112 | uint8_t *p = xmalloc(chunk_size); | |
113 | uint8_t *q = xmalloc(chunk_size); | |
114 | int *results = xmalloc(chunk_size * sizeof(int)); | |
979afcb8 PS |
115 | |
116 | int i; | |
117 | int diskP, diskQ; | |
118 | int data_disks = raid_disks - 2; | |
af3c3750 | 119 | int err = 0; |
8d8ab389 PS |
120 | sighandler_t sig[3]; |
121 | int rv; | |
979afcb8 PS |
122 | |
123 | extern int tables_ready; | |
124 | ||
125 | if (!tables_ready) | |
126 | make_tables(); | |
127 | ||
128 | for ( i = 0 ; i < raid_disks ; i++) | |
129 | stripes[i] = stripe_buf + i * chunk_size; | |
130 | ||
131 | while (length > 0) { | |
132 | int disk; | |
133 | ||
af3c3750 PS |
134 | printf("pos --> %llu\n", start); |
135 | ||
8d8ab389 PS |
136 | if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) { |
137 | err = 2; | |
138 | goto exitCheck; | |
139 | } | |
140 | sig[0] = signal(SIGTERM, SIG_IGN); | |
141 | sig[1] = signal(SIGINT, SIG_IGN); | |
142 | sig[2] = signal(SIGQUIT, SIG_IGN); | |
143 | rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks); | |
144 | rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks); | |
979afcb8 | 145 | for (i = 0 ; i < raid_disks ; i++) { |
af3c3750 | 146 | lseek64(source[i], offsets[i] + start * chunk_size, 0); |
979afcb8 PS |
147 | read(source[i], stripes[i], chunk_size); |
148 | } | |
8d8ab389 PS |
149 | rv |= sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL); |
150 | rv |= sysfs_set_num(info, NULL, "suspend_hi", 0); | |
151 | rv |= sysfs_set_num(info, NULL, "suspend_lo", 0); | |
152 | signal(SIGQUIT, sig[2]); | |
153 | signal(SIGINT, sig[1]); | |
154 | signal(SIGTERM, sig[0]); | |
155 | if(munlockall() != 0) { | |
156 | err = 3; | |
157 | goto exitCheck; | |
158 | } | |
159 | ||
160 | if(rv != 0) { | |
161 | err = rv * 256; | |
162 | goto exitCheck; | |
163 | } | |
164 | ||
979afcb8 | 165 | for (i = 0 ; i < data_disks ; i++) { |
af3c3750 | 166 | int disk = geo_map(i, start, raid_disks, level, layout); |
979afcb8 PS |
167 | blocks[i] = stripes[disk]; |
168 | printf("%d->%d\n", i, disk); | |
169 | } | |
170 | ||
171 | qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size); | |
af3c3750 | 172 | diskP = geo_map(-1, start, raid_disks, level, layout); |
979afcb8 | 173 | if (memcmp(p, stripes[diskP], chunk_size) != 0) { |
af3c3750 | 174 | printf("P(%d) wrong at %llu\n", diskP, start); |
979afcb8 | 175 | } |
af3c3750 | 176 | diskQ = geo_map(-2, start, raid_disks, level, layout); |
979afcb8 | 177 | if (memcmp(q, stripes[diskQ], chunk_size) != 0) { |
af3c3750 | 178 | printf("Q(%d) wrong at %llu\n", diskQ, start); |
979afcb8 | 179 | } |
af3c3750 | 180 | raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results); |
979afcb8 PS |
181 | disk = raid6_stats(results, raid_disks, chunk_size); |
182 | ||
183 | if(disk >= -2) { | |
af3c3750 | 184 | disk = geo_map(disk, start, raid_disks, level, layout); |
979afcb8 PS |
185 | } |
186 | if(disk >= 0) { | |
af3c3750 PS |
187 | printf("Error detected at %llu: possible failed disk slot: %d --> %s\n", |
188 | start, disk, name[disk]); | |
979afcb8 PS |
189 | } |
190 | if(disk == -65535) { | |
af3c3750 | 191 | printf("Error detected at %llu: disk slot unknown\n", start); |
979afcb8 PS |
192 | } |
193 | ||
af3c3750 PS |
194 | length--; |
195 | start++; | |
979afcb8 PS |
196 | } |
197 | ||
af3c3750 PS |
198 | exitCheck: |
199 | ||
979afcb8 PS |
200 | free(stripe_buf); |
201 | free(stripes); | |
202 | free(blocks); | |
203 | free(p); | |
204 | free(q); | |
205 | free(results); | |
206 | ||
af3c3750 | 207 | return err; |
979afcb8 PS |
208 | } |
209 | ||
210 | unsigned long long getnum(char *str, char **err) | |
211 | { | |
212 | char *e; | |
213 | unsigned long long rv = strtoull(str, &e, 10); | |
214 | if (e==str || *e) { | |
215 | *err = str; | |
216 | return 0; | |
217 | } | |
218 | return rv; | |
219 | } | |
220 | ||
221 | int main(int argc, char *argv[]) | |
222 | { | |
a9c2c6c6 | 223 | /* md_device start length */ |
af3c3750 PS |
224 | int *fds = NULL; |
225 | char *buf = NULL; | |
226 | char **disk_name = NULL; | |
227 | unsigned long long *offsets = NULL; | |
228 | int raid_disks = 0; | |
2cf31121 | 229 | int active_disks; |
af3c3750 PS |
230 | int chunk_size = 0; |
231 | int layout = -1; | |
979afcb8 PS |
232 | int level = 6; |
233 | unsigned long long start, length; | |
234 | int i; | |
a9c2c6c6 | 235 | int mdfd; |
8d8ab389 | 236 | struct mdinfo *info = NULL, *comp = NULL; |
979afcb8 | 237 | char *err = NULL; |
af3c3750 PS |
238 | int exit_err = 0; |
239 | int close_flag = 0; | |
240 | char *prg = strrchr(argv[0], '/'); | |
241 | ||
242 | if (prg == NULL) | |
243 | prg = argv[0]; | |
244 | else | |
245 | prg++; | |
246 | ||
247 | if (argc < 4) { | |
248 | fprintf(stderr, "Usage: %s md_device start_stripe length_stripes\n", prg); | |
249 | exit_err = 1; | |
250 | goto exitHere; | |
979afcb8 PS |
251 | } |
252 | ||
a9c2c6c6 PS |
253 | mdfd = open(argv[1], O_RDONLY); |
254 | if(mdfd < 0) { | |
255 | perror(argv[1]); | |
e7b84f9d | 256 | fprintf(stderr, "%s: cannot open %s\n", prg, argv[1]); |
af3c3750 PS |
257 | exit_err = 2; |
258 | goto exitHere; | |
a9c2c6c6 PS |
259 | } |
260 | ||
261 | info = sysfs_read(mdfd, -1, | |
262 | GET_LEVEL| | |
263 | GET_LAYOUT| | |
264 | GET_DISKS| | |
2cf31121 | 265 | GET_DEGRADED | |
a9c2c6c6 PS |
266 | GET_COMPONENT| |
267 | GET_CHUNK| | |
268 | GET_DEVS| | |
269 | GET_OFFSET| | |
270 | GET_SIZE); | |
271 | ||
8d8ab389 PS |
272 | if(info == NULL) { |
273 | fprintf(stderr, "%s: Error reading sysfs information of %s\n", prg, argv[1]); | |
274 | exit_err = 9; | |
275 | goto exitHere; | |
276 | } | |
277 | ||
a9c2c6c6 PS |
278 | if(info->array.level != level) { |
279 | fprintf(stderr, "%s: %s not a RAID-6\n", prg, argv[1]); | |
af3c3750 PS |
280 | exit_err = 3; |
281 | goto exitHere; | |
a9c2c6c6 PS |
282 | } |
283 | ||
2cf31121 PS |
284 | if(info->array.failed_disks > 0) { |
285 | fprintf(stderr, "%s: %s degraded array\n", prg, argv[1]); | |
286 | exit_err = 8; | |
287 | goto exitHere; | |
288 | } | |
289 | ||
a9c2c6c6 PS |
290 | printf("layout: %d\n", info->array.layout); |
291 | printf("disks: %d\n", info->array.raid_disks); | |
af3c3750 PS |
292 | printf("component size: %llu\n", info->component_size * 512); |
293 | printf("total stripes: %llu\n", (info->component_size * 512) / info->array.chunk_size); | |
a9c2c6c6 PS |
294 | printf("chunk size: %d\n", info->array.chunk_size); |
295 | printf("\n"); | |
296 | ||
297 | comp = info->devs; | |
2cf31121 | 298 | for(i = 0, active_disks = 0; active_disks < info->array.raid_disks; i++) { |
a9c2c6c6 | 299 | printf("disk: %d - offset: %llu - size: %llu - name: %s - slot: %d\n", |
af3c3750 | 300 | i, comp->data_offset * 512, comp->component_size * 512, |
a9c2c6c6 PS |
301 | map_dev(comp->disk.major, comp->disk.minor, 0), |
302 | comp->disk.raid_disk); | |
2cf31121 PS |
303 | if(comp->disk.raid_disk >= 0) |
304 | active_disks++; | |
a9c2c6c6 PS |
305 | comp = comp->next; |
306 | } | |
307 | printf("\n"); | |
308 | ||
309 | close(mdfd); | |
310 | ||
311 | raid_disks = info->array.raid_disks; | |
312 | chunk_size = info->array.chunk_size; | |
313 | layout = info->array.layout; | |
314 | start = getnum(argv[2], &err); | |
315 | length = getnum(argv[3], &err); | |
316 | ||
979afcb8 | 317 | if (err) { |
a9c2c6c6 | 318 | fprintf(stderr, "%s: Bad number: %s\n", prg, err); |
af3c3750 PS |
319 | exit_err = 4; |
320 | goto exitHere; | |
979afcb8 | 321 | } |
a9c2c6c6 | 322 | |
af3c3750 PS |
323 | if(start > ((info->component_size * 512) / chunk_size)) { |
324 | start = (info->component_size * 512) / chunk_size; | |
325 | fprintf(stderr, "%s: start beyond disks size\n", prg); | |
326 | } | |
a9c2c6c6 | 327 | |
af3c3750 PS |
328 | if((length == 0) || |
329 | ((length + start) > ((info->component_size * 512) / chunk_size))) { | |
330 | length = (info->component_size * 512) / chunk_size - start; | |
979afcb8 | 331 | } |
a9c2c6c6 | 332 | |
503975b9 N |
333 | disk_name = xmalloc(raid_disks * sizeof(*disk_name)); |
334 | fds = xmalloc(raid_disks * sizeof(*fds)); | |
335 | offsets = xcalloc(raid_disks, sizeof(*offsets)); | |
336 | buf = xmalloc(raid_disks * chunk_size); | |
af3c3750 | 337 | |
af3c3750 PS |
338 | for(i=0; i<raid_disks; i++) { |
339 | fds[i] = -1; | |
340 | } | |
341 | close_flag = 1; | |
979afcb8 | 342 | |
a9c2c6c6 | 343 | comp = info->devs; |
2cf31121 | 344 | for (i=0, active_disks=0; active_disks<raid_disks; i++) { |
a9c2c6c6 | 345 | int disk_slot = comp->disk.raid_disk; |
2cf31121 PS |
346 | if(disk_slot >= 0) { |
347 | disk_name[disk_slot] = map_dev(comp->disk.major, comp->disk.minor, 0); | |
348 | offsets[disk_slot] = comp->data_offset * 512; | |
349 | fds[disk_slot] = open(disk_name[disk_slot], O_RDWR); | |
350 | if (fds[disk_slot] < 0) { | |
351 | perror(disk_name[disk_slot]); | |
352 | fprintf(stderr,"%s: cannot open %s\n", prg, disk_name[disk_slot]); | |
353 | exit_err = 6; | |
354 | goto exitHere; | |
355 | } | |
356 | active_disks++; | |
979afcb8 | 357 | } |
a9c2c6c6 | 358 | comp = comp->next; |
979afcb8 PS |
359 | } |
360 | ||
8d8ab389 | 361 | int rv = check_stripes(info, fds, offsets, |
979afcb8 | 362 | raid_disks, chunk_size, level, layout, |
a9c2c6c6 | 363 | start, length, disk_name); |
979afcb8 PS |
364 | if (rv != 0) { |
365 | fprintf(stderr, | |
a9c2c6c6 | 366 | "%s: check_stripes returned %d\n", prg, rv); |
af3c3750 PS |
367 | exit_err = 7; |
368 | goto exitHere; | |
979afcb8 PS |
369 | } |
370 | ||
af3c3750 PS |
371 | exitHere: |
372 | ||
373 | if (close_flag) | |
374 | for(i = 0; i < raid_disks; i++) | |
375 | close(fds[i]); | |
376 | ||
a9c2c6c6 | 377 | free(disk_name); |
979afcb8 PS |
378 | free(fds); |
379 | free(offsets); | |
380 | free(buf); | |
381 | ||
af3c3750 | 382 | exit(exit_err); |
979afcb8 | 383 | } |