]>
Commit | Line | Data |
---|---|---|
e86c9dd6 NB |
1 | /* |
2 | * mdadm - manage Linux "md" devices aka RAID arrays. | |
3 | * | |
4 | * Copyright (C) 2006 Neil Brown <neilb@suse.de> | |
5 | * | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
20 | * | |
21 | * Author: Neil Brown | |
22 | * Email: <neilb@suse.de> | |
23 | */ | |
24 | ||
25 | #include "mdadm.h" | |
26 | ||
27 | /* To restripe, we read from old geometry to a buffer, and | |
28 | * read from buffer to new geometry. | |
29 | * When reading we don't worry about parity. When writing we do. | |
30 | * | |
31 | */ | |
32 | ||
e0d95aac N |
33 | static int geo_map(int block, unsigned long long stripe, int raid_disks, |
34 | int level, int layout) | |
e86c9dd6 | 35 | { |
48327135 | 36 | /* On the given stripe, find which disk in the array will have |
e86c9dd6 | 37 | * block numbered 'block'. |
48327135 NB |
38 | * '-1' means the parity block. |
39 | * '-2' means the Q syndrome. | |
e86c9dd6 NB |
40 | */ |
41 | int pd; | |
42 | ||
43 | switch(level*100 + layout) { | |
44 | case 000: | |
45 | case 400: | |
e0d95aac | 46 | case 500 + ALGORITHM_PARITY_N: |
e86c9dd6 NB |
47 | /* raid 4 isn't messed around by parity blocks */ |
48 | if (block == -1) | |
49 | return raid_disks-1; /* parity block */ | |
50 | return block; | |
51 | case 500 + ALGORITHM_LEFT_ASYMMETRIC: | |
52 | pd = (raid_disks-1) - stripe % raid_disks; | |
53 | if (block == -1) return pd; | |
54 | if (block >= pd) | |
55 | block++; | |
56 | return block; | |
57 | ||
58 | case 500 + ALGORITHM_RIGHT_ASYMMETRIC: | |
59 | pd = stripe % raid_disks; | |
60 | if (block == -1) return pd; | |
61 | if (block >= pd) | |
62 | block++; | |
63 | return block; | |
64 | ||
65 | case 500 + ALGORITHM_LEFT_SYMMETRIC: | |
66 | pd = (raid_disks - 1) - stripe % raid_disks; | |
67 | if (block == -1) return pd; | |
68 | return (pd + 1 + block) % raid_disks; | |
69 | ||
70 | case 500 + ALGORITHM_RIGHT_SYMMETRIC: | |
71 | pd = stripe % raid_disks; | |
72 | if (block == -1) return pd; | |
73 | return (pd + 1 + block) % raid_disks; | |
74 | ||
e0d95aac N |
75 | case 500 + ALGORITHM_PARITY_0: |
76 | return block + 1; | |
77 | ||
78 | ||
79 | case 600 + ALGORITHM_PARITY_N_6: | |
80 | if (block == -2) | |
81 | return raid_disks - 1; | |
82 | if (block == -1) | |
83 | return raid_disks - 2; /* parity block */ | |
84 | return block; | |
85 | case 600 + ALGORITHM_LEFT_ASYMMETRIC_6: | |
86 | if (block == -2) | |
87 | return raid_disks - 1; | |
88 | raid_disks--; | |
89 | pd = (raid_disks-1) - stripe % raid_disks; | |
90 | if (block == -1) return pd; | |
91 | if (block >= pd) | |
92 | block++; | |
93 | return block; | |
94 | ||
95 | case 600 + ALGORITHM_RIGHT_ASYMMETRIC_6: | |
96 | if (block == -2) | |
97 | return raid_disks - 1; | |
98 | raid_disks--; | |
99 | pd = stripe % raid_disks; | |
100 | if (block == -1) return pd; | |
101 | if (block >= pd) | |
102 | block++; | |
103 | return block; | |
104 | ||
105 | case 600 + ALGORITHM_LEFT_SYMMETRIC_6: | |
106 | if (block == -2) | |
107 | return raid_disks - 1; | |
108 | raid_disks--; | |
109 | pd = (raid_disks - 1) - stripe % raid_disks; | |
110 | if (block == -1) return pd; | |
111 | return (pd + 1 + block) % raid_disks; | |
112 | ||
113 | case 600 + ALGORITHM_RIGHT_SYMMETRIC_6: | |
114 | if (block == -2) | |
115 | return raid_disks - 1; | |
116 | raid_disks--; | |
117 | pd = stripe % raid_disks; | |
118 | if (block == -1) return pd; | |
119 | return (pd + 1 + block) % raid_disks; | |
120 | ||
121 | case 600 + ALGORITHM_PARITY_0_6: | |
122 | if (block == -2) | |
123 | return raid_disks - 1; | |
124 | return block + 1; | |
125 | ||
126 | ||
127 | case 600 + ALGORITHM_PARITY_0: | |
128 | if (block == -1) | |
129 | return 0; | |
130 | if (block == -2) | |
131 | return 1; | |
132 | return block + 2; | |
133 | ||
e86c9dd6 NB |
134 | case 600 + ALGORITHM_LEFT_ASYMMETRIC: |
135 | pd = raid_disks - 1 - (stripe % raid_disks); | |
136 | if (block == -1) return pd; | |
48327135 | 137 | if (block == -2) return (pd+1) % raid_disks; |
e86c9dd6 NB |
138 | if (pd == raid_disks - 1) |
139 | return block+1; | |
140 | if (block >= pd) | |
141 | return block+2; | |
142 | return block; | |
143 | ||
e0d95aac N |
144 | case 600 + ALGORITHM_ROTATING_ZERO_RESTART: |
145 | /* Different order for calculating Q, otherwize same as ... */ | |
e86c9dd6 NB |
146 | case 600 + ALGORITHM_RIGHT_ASYMMETRIC: |
147 | pd = stripe % raid_disks; | |
148 | if (block == -1) return pd; | |
48327135 | 149 | if (block == -2) return (pd+1) % raid_disks; |
e86c9dd6 NB |
150 | if (pd == raid_disks - 1) |
151 | return block+1; | |
152 | if (block >= pd) | |
153 | return block+2; | |
154 | return block; | |
155 | ||
156 | case 600 + ALGORITHM_LEFT_SYMMETRIC: | |
157 | pd = raid_disks - 1 - (stripe % raid_disks); | |
158 | if (block == -1) return pd; | |
48327135 | 159 | if (block == -2) return (pd+1) % raid_disks; |
e86c9dd6 NB |
160 | return (pd + 2 + block) % raid_disks; |
161 | ||
162 | case 600 + ALGORITHM_RIGHT_SYMMETRIC: | |
163 | pd = stripe % raid_disks; | |
164 | if (block == -1) return pd; | |
48327135 | 165 | if (block == -2) return (pd+1) % raid_disks; |
e86c9dd6 | 166 | return (pd + 2 + block) % raid_disks; |
e0d95aac N |
167 | |
168 | ||
169 | case 600 + ALGORITHM_ROTATING_N_RESTART: | |
170 | /* Same a left_asymmetric, by first stripe is | |
171 | * D D D P Q rather than | |
172 | * Q D D D P | |
173 | */ | |
174 | pd = raid_disks - 1 - ((stripe + 1) % raid_disks); | |
175 | if (block == -1) return pd; | |
176 | if (block == -2) return (pd+1) % raid_disks; | |
177 | if (pd == raid_disks - 1) | |
178 | return block+1; | |
179 | if (block >= pd) | |
180 | return block+2; | |
181 | return block; | |
182 | ||
183 | case 600 + ALGORITHM_ROTATING_N_CONTINUE: | |
184 | /* Same as left_symmetric but Q is before P */ | |
185 | pd = raid_disks - 1 - (stripe % raid_disks); | |
186 | if (block == -1) return pd; | |
187 | if (block == -2) return (pd+raid_disks-1) % raid_disks; | |
188 | return (pd + 1 + block) % raid_disks; | |
e86c9dd6 NB |
189 | } |
190 | return -1; | |
191 | } | |
e0d95aac N |
192 | static int is_ddf(int layout) |
193 | { | |
194 | switch (layout) | |
195 | { | |
196 | default: | |
197 | return 0; | |
198 | case ALGORITHM_ROTATING_N_CONTINUE: | |
199 | case ALGORITHM_ROTATING_N_RESTART: | |
200 | case ALGORITHM_ROTATING_ZERO_RESTART: | |
201 | return 1; | |
202 | } | |
203 | } | |
e86c9dd6 NB |
204 | |
205 | ||
206 | static void xor_blocks(char *target, char **sources, int disks, int size) | |
207 | { | |
208 | int i, j; | |
209 | /* Amazingly inefficient... */ | |
210 | for (i=0; i<size; i++) { | |
211 | char c = 0; | |
212 | for (j=0 ; j<disks; j++) | |
213 | c ^= sources[j][i]; | |
214 | target[i] = c; | |
215 | } | |
216 | } | |
217 | ||
48327135 NB |
218 | static void qsyndrome(char *p, char *q, char **sources, int disks, int size) |
219 | { | |
220 | int d, z; | |
221 | char wq0, wp0, wd0, w10, w20; | |
222 | for ( d = 0; d < size; d++) { | |
223 | wq0 = wp0 = sources[disks-1][d]; | |
224 | for ( z = disks-2 ; z >= 0 ; z-- ) { | |
225 | wd0 = sources[z][d]; | |
226 | wp0 ^= wd0; | |
227 | w20 = (wq0&0x80) ? 0xff : 0x00; | |
228 | w10 = (wq0 << 1) & 0xff; | |
229 | w20 &= 0x1d; | |
230 | w10 ^= w20; | |
231 | wq0 = w10 ^ wd0; | |
232 | } | |
233 | p[d] = wp0; | |
234 | q[d] = wq0; | |
235 | } | |
236 | } | |
237 | ||
e86c9dd6 NB |
238 | /* Save data: |
239 | * We are given: | |
240 | * A list of 'fds' of the active disks. For now we require all to be present. | |
48327135 | 241 | * A geometry: raid_disks, chunk_size, level, layout |
e86c9dd6 NB |
242 | * A list of 'fds' for mirrored targets. They are already seeked to |
243 | * right (Write) location | |
244 | * A start and length | |
245 | */ | |
246 | ||
247 | int save_stripes(int *source, unsigned long long *offsets, | |
248 | int raid_disks, int chunk_size, int level, int layout, | |
249 | int nwrites, int *dest, | |
250 | unsigned long long start, unsigned long long length) | |
251 | { | |
94a20f0c N |
252 | char abuf[8192+512]; |
253 | char *buf = (char*)(((unsigned long)abuf+511)&~511UL); | |
e86c9dd6 NB |
254 | int cpos = start % chunk_size; /* where in chunk we are up to */ |
255 | int len; | |
256 | int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2); | |
257 | int disk; | |
258 | ||
259 | while (length > 0) { | |
260 | unsigned long long offset; | |
261 | int i; | |
262 | len = chunk_size - cpos; | |
94a20f0c | 263 | if (len > 8192) len = 8192; |
e86c9dd6 NB |
264 | if (len > length) len = length; |
265 | /* len bytes to be moved from one device */ | |
266 | ||
267 | offset = (start/chunk_size/data_disks)*chunk_size + cpos; | |
268 | disk = start/chunk_size % data_disks; | |
269 | disk = geo_map(disk, start/chunk_size/data_disks, | |
270 | raid_disks, level, layout); | |
271 | if (lseek64(source[disk], offsets[disk]+offset, 0) < 0) | |
272 | return -1; | |
273 | if (read(source[disk], buf, len) != len) | |
274 | return -1; | |
275 | for (i=0; i<nwrites; i++) | |
276 | if (write(dest[i], buf, len) != len) | |
277 | return -1; | |
278 | length -= len; | |
279 | start += len; | |
280 | cpos += len; | |
281 | while (cpos >= chunk_size) cpos -= chunk_size; | |
282 | } | |
283 | return 0; | |
284 | } | |
285 | ||
286 | /* Restore data: | |
287 | * We are given: | |
288 | * A list of 'fds' of the active disks. Some may be '-1' for not-available. | |
353632d9 | 289 | * A geometry: raid_disks, chunk_size, level, layout |
e86c9dd6 NB |
290 | * An 'fd' to read from. It is already seeked to the right (Read) location. |
291 | * A start and length. | |
292 | * The length must be a multiple of the stripe size. | |
293 | * | |
294 | * We build a full stripe in memory and then write it out. | |
295 | * We assume that there are enough working devices. | |
296 | */ | |
297 | int restore_stripes(int *dest, unsigned long long *offsets, | |
298 | int raid_disks, int chunk_size, int level, int layout, | |
353632d9 | 299 | int source, unsigned long long read_offset, |
e86c9dd6 NB |
300 | unsigned long long start, unsigned long long length) |
301 | { | |
302 | char *stripe_buf = malloc(raid_disks * chunk_size); | |
303 | char **stripes = malloc(raid_disks * sizeof(char*)); | |
304 | char **blocks = malloc(raid_disks * sizeof(char*)); | |
e0d95aac | 305 | char *zero = malloc(chunk_size); |
e86c9dd6 NB |
306 | int i; |
307 | ||
308 | int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2); | |
309 | ||
e0d95aac N |
310 | if (stripe_buf == NULL || stripes == NULL || blocks == NULL |
311 | || zero == NULL) { | |
e86c9dd6 NB |
312 | free(stripe_buf); |
313 | free(stripes); | |
314 | free(blocks); | |
e0d95aac | 315 | free(zero); |
e86c9dd6 NB |
316 | return -2; |
317 | } | |
e0d95aac | 318 | memset(zero, 0, chunk_size); |
e86c9dd6 NB |
319 | for (i=0; i<raid_disks; i++) |
320 | stripes[i] = stripe_buf + i * chunk_size; | |
321 | while (length > 0) { | |
322 | int len = data_disks * chunk_size; | |
323 | unsigned long long offset; | |
48327135 | 324 | int disk, qdisk; |
e86c9dd6 NB |
325 | if (length < len) |
326 | return -3; | |
327 | for (i=0; i < data_disks; i++) { | |
328 | int disk = geo_map(i, start/chunk_size/data_disks, | |
329 | raid_disks, level, layout); | |
353632d9 NB |
330 | if (lseek64(source, read_offset, 0) != read_offset) |
331 | return -1; | |
e86c9dd6 NB |
332 | if (read(source, stripes[disk], chunk_size) != chunk_size) |
333 | return -1; | |
353632d9 | 334 | read_offset += chunk_size; |
e86c9dd6 NB |
335 | } |
336 | /* We have the data, now do the parity */ | |
337 | offset = (start/chunk_size/data_disks) * chunk_size; | |
48327135 NB |
338 | switch (level) { |
339 | case 4: | |
340 | case 5: | |
341 | disk = geo_map(-1, start/chunk_size/data_disks, | |
e86c9dd6 | 342 | raid_disks, level, layout); |
e0d95aac N |
343 | for (i = 0; i < data_disks; i++) |
344 | blocks[i] = stripes[(disk+1+i) % raid_disks]; | |
e86c9dd6 | 345 | xor_blocks(stripes[disk], blocks, data_disks, chunk_size); |
48327135 NB |
346 | break; |
347 | case 6: | |
348 | disk = geo_map(-1, start/chunk_size/data_disks, | |
349 | raid_disks, level, layout); | |
350 | qdisk = geo_map(-2, start/chunk_size/data_disks, | |
351 | raid_disks, level, layout); | |
e0d95aac N |
352 | if (is_ddf(layout)) { |
353 | /* q over 'raid_disks' blocks, in device order. | |
354 | * 'p' and 'q' get to be all zero | |
355 | */ | |
356 | for (i = 0; i < raid_disks; i++) | |
357 | if (i == disk || i == qdisk) | |
358 | blocks[i] = zero; | |
359 | else | |
360 | blocks[i] = stripes[i]; | |
361 | qsyndrome(stripes[disk], stripes[qdisk], | |
362 | blocks, raid_disks, chunk_size); | |
363 | } else { | |
364 | /* for md' q is over 'data_disks' blocks, | |
365 | * starting immediately after 'q' | |
366 | */ | |
367 | for (i = 0; i < data_disks; i++) | |
368 | blocks[i] = stripes[(qdisk+1+i) % raid_disks]; | |
48327135 | 369 | |
e0d95aac N |
370 | qsyndrome(stripes[disk], stripes[qdisk], blocks, |
371 | data_disks, chunk_size); | |
372 | } | |
48327135 | 373 | break; |
e86c9dd6 NB |
374 | } |
375 | for (i=0; i < raid_disks ; i++) | |
376 | if (dest[i] >= 0) { | |
377 | if (lseek64(dest[i], offsets[i]+offset, 0) < 0) | |
378 | return -1; | |
379 | if (write(dest[i], stripes[i], chunk_size) != chunk_size) | |
380 | return -1; | |
381 | } | |
382 | length -= len; | |
383 | start += len; | |
384 | } | |
385 | return 0; | |
386 | } | |
387 | ||
388 | #ifdef MAIN | |
389 | ||
48327135 NB |
390 | int test_stripes(int *source, unsigned long long *offsets, |
391 | int raid_disks, int chunk_size, int level, int layout, | |
392 | unsigned long long start, unsigned long long length) | |
393 | { | |
394 | /* ready the data and p (and q) blocks, and check we got them right */ | |
395 | char *stripe_buf = malloc(raid_disks * chunk_size); | |
396 | char **stripes = malloc(raid_disks * sizeof(char*)); | |
397 | char **blocks = malloc(raid_disks * sizeof(char*)); | |
398 | char *p = malloc(chunk_size); | |
399 | char *q = malloc(chunk_size); | |
400 | ||
401 | int i; | |
402 | int data_disks = raid_disks - (level == 5 ? 1: 2); | |
403 | for ( i = 0 ; i < raid_disks ; i++) | |
404 | stripes[i] = stripe_buf + i * chunk_size; | |
405 | ||
406 | while (length > 0) { | |
407 | int disk; | |
408 | ||
409 | for (i = 0 ; i < raid_disks ; i++) { | |
410 | lseek64(source[i], offsets[i]+start, 0); | |
411 | read(source[i], stripes[i], chunk_size); | |
412 | } | |
413 | for (i = 0 ; i < data_disks ; i++) { | |
414 | int disk = geo_map(i, start/chunk_size, raid_disks, | |
415 | level, layout); | |
416 | blocks[i] = stripes[disk]; | |
417 | printf("%d->%d\n", i, disk); | |
418 | } | |
419 | switch(level) { | |
420 | case 6: | |
421 | qsyndrome(p, q, blocks, data_disks, chunk_size); | |
422 | disk = geo_map(-1, start/chunk_size, raid_disks, | |
423 | level, layout); | |
424 | if (memcmp(p, stripes[disk], chunk_size) != 0) { | |
425 | printf("P(%d) wrong at %llu\n", disk, | |
426 | start / chunk_size); | |
427 | } | |
428 | disk = geo_map(-2, start/chunk_size, raid_disks, | |
429 | level, layout); | |
430 | if (memcmp(q, stripes[disk], chunk_size) != 0) { | |
431 | printf("Q(%d) wrong at %llu\n", disk, | |
432 | start / chunk_size); | |
433 | } | |
434 | break; | |
435 | } | |
436 | length -= chunk_size; | |
437 | start += chunk_size; | |
438 | } | |
439 | return 0; | |
440 | } | |
441 | ||
e86c9dd6 NB |
442 | unsigned long long getnum(char *str, char **err) |
443 | { | |
444 | char *e; | |
445 | unsigned long long rv = strtoull(str, &e, 10); | |
446 | if (e==str || *e) { | |
447 | *err = str; | |
448 | return 0; | |
449 | } | |
450 | return rv; | |
451 | } | |
452 | ||
453 | main(int argc, char *argv[]) | |
454 | { | |
455 | /* save/restore file raid_disks chunk_size level layout start length devices... | |
456 | */ | |
457 | int save; | |
458 | int *fds; | |
459 | char *file; | |
460 | int storefd; | |
461 | unsigned long long *offsets; | |
462 | int raid_disks, chunk_size, level, layout; | |
463 | unsigned long long start, length; | |
464 | int i; | |
465 | ||
466 | char *err = NULL; | |
467 | if (argc < 10) { | |
468 | fprintf(stderr, "Usage: test_stripe save/restore file raid_disks" | |
469 | " chunk_size level layout start length devices...\n"); | |
470 | exit(1); | |
471 | } | |
472 | if (strcmp(argv[1], "save")==0) | |
473 | save = 1; | |
474 | else if (strcmp(argv[1], "restore") == 0) | |
475 | save = 0; | |
48327135 NB |
476 | else if (strcmp(argv[1], "test") == 0) |
477 | save = 2; | |
e86c9dd6 NB |
478 | else { |
479 | fprintf(stderr, "test_stripe: must give 'save' or 'restore'.\n"); | |
480 | exit(2); | |
481 | } | |
482 | ||
483 | file = argv[2]; | |
484 | raid_disks = getnum(argv[3], &err); | |
485 | chunk_size = getnum(argv[4], &err); | |
486 | level = getnum(argv[5], &err); | |
487 | layout = getnum(argv[6], &err); | |
488 | start = getnum(argv[7], &err); | |
489 | length = getnum(argv[8], &err); | |
490 | if (err) { | |
491 | fprintf(stderr, "test_stripe: Bad number: %s\n", err); | |
492 | exit(2); | |
493 | } | |
494 | if (argc != raid_disks + 9) { | |
495 | fprintf(stderr, "test_stripe: wrong number of devices: want %d found %d\n", | |
496 | raid_disks, argc-9); | |
497 | exit(2); | |
498 | } | |
499 | fds = malloc(raid_disks * sizeof(*fds)); | |
500 | offsets = malloc(raid_disks * sizeof(*offsets)); | |
501 | memset(offsets, 0, raid_disks * sizeof(*offsets)); | |
502 | ||
503 | storefd = open(file, O_RDWR); | |
504 | if (storefd < 0) { | |
505 | perror(file); | |
506 | fprintf(stderr, "test_stripe: could not open %s.\n", file); | |
507 | exit(3); | |
508 | } | |
509 | for (i=0; i<raid_disks; i++) { | |
510 | fds[i] = open(argv[9+i], O_RDWR); | |
511 | if (fds[i] < 0) { | |
512 | perror(argv[9+i]); | |
513 | fprintf(stderr,"test_stripe: cannot open %s.\n", argv[9+i]); | |
514 | exit(3); | |
515 | } | |
516 | } | |
517 | ||
48327135 | 518 | if (save == 1) { |
e86c9dd6 NB |
519 | int rv = save_stripes(fds, offsets, |
520 | raid_disks, chunk_size, level, layout, | |
521 | 1, &storefd, | |
522 | start, length); | |
523 | if (rv != 0) { | |
48327135 NB |
524 | fprintf(stderr, |
525 | "test_stripe: save_stripes returned %d\n", rv); | |
526 | exit(1); | |
527 | } | |
528 | } else if (save == 2) { | |
529 | int rv = test_stripes(fds, offsets, | |
530 | raid_disks, chunk_size, level, layout, | |
531 | start, length); | |
532 | if (rv != 0) { | |
533 | fprintf(stderr, | |
534 | "test_stripe: test_stripes returned %d\n", rv); | |
e86c9dd6 NB |
535 | exit(1); |
536 | } | |
537 | } else { | |
538 | int rv = restore_stripes(fds, offsets, | |
539 | raid_disks, chunk_size, level, layout, | |
353632d9 | 540 | storefd, 0ULL, |
e86c9dd6 NB |
541 | start, length); |
542 | if (rv != 0) { | |
48327135 NB |
543 | fprintf(stderr, |
544 | "test_stripe: restore_stripes returned %d\n", | |
545 | rv); | |
e86c9dd6 NB |
546 | exit(1); |
547 | } | |
548 | } | |
549 | exit(0); | |
550 | } | |
551 | ||
552 | #endif /* MAIN */ |