]> git.ipfire.org Git - thirdparty/mdadm.git/blame - restripe.c
Examine/Detail: report raid6 layout
[thirdparty/mdadm.git] / restripe.c
CommitLineData
e86c9dd6
NB
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2006 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25#include "mdadm.h"
26
27/* To restripe, we read from old geometry to a buffer, and
28 * read from buffer to new geometry.
29 * When reading we don't worry about parity. When writing we do.
30 *
31 */
32
e0d95aac
N
33static int geo_map(int block, unsigned long long stripe, int raid_disks,
34 int level, int layout)
e86c9dd6 35{
48327135 36 /* On the given stripe, find which disk in the array will have
e86c9dd6 37 * block numbered 'block'.
48327135
NB
38 * '-1' means the parity block.
39 * '-2' means the Q syndrome.
e86c9dd6
NB
40 */
41 int pd;
42
43 switch(level*100 + layout) {
44 case 000:
45 case 400:
e0d95aac 46 case 500 + ALGORITHM_PARITY_N:
e86c9dd6
NB
47 /* raid 4 isn't messed around by parity blocks */
48 if (block == -1)
49 return raid_disks-1; /* parity block */
50 return block;
51 case 500 + ALGORITHM_LEFT_ASYMMETRIC:
52 pd = (raid_disks-1) - stripe % raid_disks;
53 if (block == -1) return pd;
54 if (block >= pd)
55 block++;
56 return block;
57
58 case 500 + ALGORITHM_RIGHT_ASYMMETRIC:
59 pd = stripe % raid_disks;
60 if (block == -1) return pd;
61 if (block >= pd)
62 block++;
63 return block;
64
65 case 500 + ALGORITHM_LEFT_SYMMETRIC:
66 pd = (raid_disks - 1) - stripe % raid_disks;
67 if (block == -1) return pd;
68 return (pd + 1 + block) % raid_disks;
69
70 case 500 + ALGORITHM_RIGHT_SYMMETRIC:
71 pd = stripe % raid_disks;
72 if (block == -1) return pd;
73 return (pd + 1 + block) % raid_disks;
74
e0d95aac
N
75 case 500 + ALGORITHM_PARITY_0:
76 return block + 1;
77
78
79 case 600 + ALGORITHM_PARITY_N_6:
80 if (block == -2)
81 return raid_disks - 1;
82 if (block == -1)
83 return raid_disks - 2; /* parity block */
84 return block;
85 case 600 + ALGORITHM_LEFT_ASYMMETRIC_6:
86 if (block == -2)
87 return raid_disks - 1;
88 raid_disks--;
89 pd = (raid_disks-1) - stripe % raid_disks;
90 if (block == -1) return pd;
91 if (block >= pd)
92 block++;
93 return block;
94
95 case 600 + ALGORITHM_RIGHT_ASYMMETRIC_6:
96 if (block == -2)
97 return raid_disks - 1;
98 raid_disks--;
99 pd = stripe % raid_disks;
100 if (block == -1) return pd;
101 if (block >= pd)
102 block++;
103 return block;
104
105 case 600 + ALGORITHM_LEFT_SYMMETRIC_6:
106 if (block == -2)
107 return raid_disks - 1;
108 raid_disks--;
109 pd = (raid_disks - 1) - stripe % raid_disks;
110 if (block == -1) return pd;
111 return (pd + 1 + block) % raid_disks;
112
113 case 600 + ALGORITHM_RIGHT_SYMMETRIC_6:
114 if (block == -2)
115 return raid_disks - 1;
116 raid_disks--;
117 pd = stripe % raid_disks;
118 if (block == -1) return pd;
119 return (pd + 1 + block) % raid_disks;
120
121 case 600 + ALGORITHM_PARITY_0_6:
122 if (block == -2)
123 return raid_disks - 1;
124 return block + 1;
125
126
127 case 600 + ALGORITHM_PARITY_0:
128 if (block == -1)
129 return 0;
130 if (block == -2)
131 return 1;
132 return block + 2;
133
e86c9dd6
NB
134 case 600 + ALGORITHM_LEFT_ASYMMETRIC:
135 pd = raid_disks - 1 - (stripe % raid_disks);
136 if (block == -1) return pd;
48327135 137 if (block == -2) return (pd+1) % raid_disks;
e86c9dd6
NB
138 if (pd == raid_disks - 1)
139 return block+1;
140 if (block >= pd)
141 return block+2;
142 return block;
143
e0d95aac
N
144 case 600 + ALGORITHM_ROTATING_ZERO_RESTART:
145 /* Different order for calculating Q, otherwize same as ... */
e86c9dd6
NB
146 case 600 + ALGORITHM_RIGHT_ASYMMETRIC:
147 pd = stripe % raid_disks;
148 if (block == -1) return pd;
48327135 149 if (block == -2) return (pd+1) % raid_disks;
e86c9dd6
NB
150 if (pd == raid_disks - 1)
151 return block+1;
152 if (block >= pd)
153 return block+2;
154 return block;
155
156 case 600 + ALGORITHM_LEFT_SYMMETRIC:
157 pd = raid_disks - 1 - (stripe % raid_disks);
158 if (block == -1) return pd;
48327135 159 if (block == -2) return (pd+1) % raid_disks;
e86c9dd6
NB
160 return (pd + 2 + block) % raid_disks;
161
162 case 600 + ALGORITHM_RIGHT_SYMMETRIC:
163 pd = stripe % raid_disks;
164 if (block == -1) return pd;
48327135 165 if (block == -2) return (pd+1) % raid_disks;
e86c9dd6 166 return (pd + 2 + block) % raid_disks;
e0d95aac
N
167
168
169 case 600 + ALGORITHM_ROTATING_N_RESTART:
170 /* Same a left_asymmetric, by first stripe is
171 * D D D P Q rather than
172 * Q D D D P
173 */
174 pd = raid_disks - 1 - ((stripe + 1) % raid_disks);
175 if (block == -1) return pd;
176 if (block == -2) return (pd+1) % raid_disks;
177 if (pd == raid_disks - 1)
178 return block+1;
179 if (block >= pd)
180 return block+2;
181 return block;
182
183 case 600 + ALGORITHM_ROTATING_N_CONTINUE:
184 /* Same as left_symmetric but Q is before P */
185 pd = raid_disks - 1 - (stripe % raid_disks);
186 if (block == -1) return pd;
187 if (block == -2) return (pd+raid_disks-1) % raid_disks;
188 return (pd + 1 + block) % raid_disks;
e86c9dd6
NB
189 }
190 return -1;
191}
e0d95aac
N
192static int is_ddf(int layout)
193{
194 switch (layout)
195 {
196 default:
197 return 0;
198 case ALGORITHM_ROTATING_N_CONTINUE:
199 case ALGORITHM_ROTATING_N_RESTART:
200 case ALGORITHM_ROTATING_ZERO_RESTART:
201 return 1;
202 }
203}
e86c9dd6
NB
204
205
206static void xor_blocks(char *target, char **sources, int disks, int size)
207{
208 int i, j;
209 /* Amazingly inefficient... */
210 for (i=0; i<size; i++) {
211 char c = 0;
212 for (j=0 ; j<disks; j++)
213 c ^= sources[j][i];
214 target[i] = c;
215 }
216}
217
48327135
NB
218static void qsyndrome(char *p, char *q, char **sources, int disks, int size)
219{
220 int d, z;
221 char wq0, wp0, wd0, w10, w20;
222 for ( d = 0; d < size; d++) {
223 wq0 = wp0 = sources[disks-1][d];
224 for ( z = disks-2 ; z >= 0 ; z-- ) {
225 wd0 = sources[z][d];
226 wp0 ^= wd0;
227 w20 = (wq0&0x80) ? 0xff : 0x00;
228 w10 = (wq0 << 1) & 0xff;
229 w20 &= 0x1d;
230 w10 ^= w20;
231 wq0 = w10 ^ wd0;
232 }
233 p[d] = wp0;
234 q[d] = wq0;
235 }
236}
237
e86c9dd6
NB
238/* Save data:
239 * We are given:
240 * A list of 'fds' of the active disks. For now we require all to be present.
48327135 241 * A geometry: raid_disks, chunk_size, level, layout
e86c9dd6
NB
242 * A list of 'fds' for mirrored targets. They are already seeked to
243 * right (Write) location
244 * A start and length
245 */
246
247int save_stripes(int *source, unsigned long long *offsets,
248 int raid_disks, int chunk_size, int level, int layout,
249 int nwrites, int *dest,
250 unsigned long long start, unsigned long long length)
251{
94a20f0c
N
252 char abuf[8192+512];
253 char *buf = (char*)(((unsigned long)abuf+511)&~511UL);
e86c9dd6
NB
254 int cpos = start % chunk_size; /* where in chunk we are up to */
255 int len;
256 int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
257 int disk;
258
259 while (length > 0) {
260 unsigned long long offset;
261 int i;
262 len = chunk_size - cpos;
94a20f0c 263 if (len > 8192) len = 8192;
e86c9dd6
NB
264 if (len > length) len = length;
265 /* len bytes to be moved from one device */
266
267 offset = (start/chunk_size/data_disks)*chunk_size + cpos;
268 disk = start/chunk_size % data_disks;
269 disk = geo_map(disk, start/chunk_size/data_disks,
270 raid_disks, level, layout);
271 if (lseek64(source[disk], offsets[disk]+offset, 0) < 0)
272 return -1;
273 if (read(source[disk], buf, len) != len)
274 return -1;
275 for (i=0; i<nwrites; i++)
276 if (write(dest[i], buf, len) != len)
277 return -1;
278 length -= len;
279 start += len;
280 cpos += len;
281 while (cpos >= chunk_size) cpos -= chunk_size;
282 }
283 return 0;
284}
285
286/* Restore data:
287 * We are given:
288 * A list of 'fds' of the active disks. Some may be '-1' for not-available.
353632d9 289 * A geometry: raid_disks, chunk_size, level, layout
e86c9dd6
NB
290 * An 'fd' to read from. It is already seeked to the right (Read) location.
291 * A start and length.
292 * The length must be a multiple of the stripe size.
293 *
294 * We build a full stripe in memory and then write it out.
295 * We assume that there are enough working devices.
296 */
297int restore_stripes(int *dest, unsigned long long *offsets,
298 int raid_disks, int chunk_size, int level, int layout,
353632d9 299 int source, unsigned long long read_offset,
e86c9dd6
NB
300 unsigned long long start, unsigned long long length)
301{
302 char *stripe_buf = malloc(raid_disks * chunk_size);
303 char **stripes = malloc(raid_disks * sizeof(char*));
304 char **blocks = malloc(raid_disks * sizeof(char*));
e0d95aac 305 char *zero = malloc(chunk_size);
e86c9dd6
NB
306 int i;
307
308 int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
309
e0d95aac
N
310 if (stripe_buf == NULL || stripes == NULL || blocks == NULL
311 || zero == NULL) {
e86c9dd6
NB
312 free(stripe_buf);
313 free(stripes);
314 free(blocks);
e0d95aac 315 free(zero);
e86c9dd6
NB
316 return -2;
317 }
e0d95aac 318 memset(zero, 0, chunk_size);
e86c9dd6
NB
319 for (i=0; i<raid_disks; i++)
320 stripes[i] = stripe_buf + i * chunk_size;
321 while (length > 0) {
322 int len = data_disks * chunk_size;
323 unsigned long long offset;
48327135 324 int disk, qdisk;
e86c9dd6
NB
325 if (length < len)
326 return -3;
327 for (i=0; i < data_disks; i++) {
328 int disk = geo_map(i, start/chunk_size/data_disks,
329 raid_disks, level, layout);
353632d9
NB
330 if (lseek64(source, read_offset, 0) != read_offset)
331 return -1;
e86c9dd6
NB
332 if (read(source, stripes[disk], chunk_size) != chunk_size)
333 return -1;
353632d9 334 read_offset += chunk_size;
e86c9dd6
NB
335 }
336 /* We have the data, now do the parity */
337 offset = (start/chunk_size/data_disks) * chunk_size;
48327135
NB
338 switch (level) {
339 case 4:
340 case 5:
341 disk = geo_map(-1, start/chunk_size/data_disks,
e86c9dd6 342 raid_disks, level, layout);
e0d95aac
N
343 for (i = 0; i < data_disks; i++)
344 blocks[i] = stripes[(disk+1+i) % raid_disks];
e86c9dd6 345 xor_blocks(stripes[disk], blocks, data_disks, chunk_size);
48327135
NB
346 break;
347 case 6:
348 disk = geo_map(-1, start/chunk_size/data_disks,
349 raid_disks, level, layout);
350 qdisk = geo_map(-2, start/chunk_size/data_disks,
351 raid_disks, level, layout);
e0d95aac
N
352 if (is_ddf(layout)) {
353 /* q over 'raid_disks' blocks, in device order.
354 * 'p' and 'q' get to be all zero
355 */
356 for (i = 0; i < raid_disks; i++)
357 if (i == disk || i == qdisk)
358 blocks[i] = zero;
359 else
360 blocks[i] = stripes[i];
361 qsyndrome(stripes[disk], stripes[qdisk],
362 blocks, raid_disks, chunk_size);
363 } else {
364 /* for md' q is over 'data_disks' blocks,
365 * starting immediately after 'q'
366 */
367 for (i = 0; i < data_disks; i++)
368 blocks[i] = stripes[(qdisk+1+i) % raid_disks];
48327135 369
e0d95aac
N
370 qsyndrome(stripes[disk], stripes[qdisk], blocks,
371 data_disks, chunk_size);
372 }
48327135 373 break;
e86c9dd6
NB
374 }
375 for (i=0; i < raid_disks ; i++)
376 if (dest[i] >= 0) {
377 if (lseek64(dest[i], offsets[i]+offset, 0) < 0)
378 return -1;
379 if (write(dest[i], stripes[i], chunk_size) != chunk_size)
380 return -1;
381 }
382 length -= len;
383 start += len;
384 }
385 return 0;
386}
387
388#ifdef MAIN
389
48327135
NB
390int test_stripes(int *source, unsigned long long *offsets,
391 int raid_disks, int chunk_size, int level, int layout,
392 unsigned long long start, unsigned long long length)
393{
394 /* ready the data and p (and q) blocks, and check we got them right */
395 char *stripe_buf = malloc(raid_disks * chunk_size);
396 char **stripes = malloc(raid_disks * sizeof(char*));
397 char **blocks = malloc(raid_disks * sizeof(char*));
398 char *p = malloc(chunk_size);
399 char *q = malloc(chunk_size);
400
401 int i;
402 int data_disks = raid_disks - (level == 5 ? 1: 2);
403 for ( i = 0 ; i < raid_disks ; i++)
404 stripes[i] = stripe_buf + i * chunk_size;
405
406 while (length > 0) {
407 int disk;
408
409 for (i = 0 ; i < raid_disks ; i++) {
410 lseek64(source[i], offsets[i]+start, 0);
411 read(source[i], stripes[i], chunk_size);
412 }
413 for (i = 0 ; i < data_disks ; i++) {
414 int disk = geo_map(i, start/chunk_size, raid_disks,
415 level, layout);
416 blocks[i] = stripes[disk];
417 printf("%d->%d\n", i, disk);
418 }
419 switch(level) {
420 case 6:
421 qsyndrome(p, q, blocks, data_disks, chunk_size);
422 disk = geo_map(-1, start/chunk_size, raid_disks,
423 level, layout);
424 if (memcmp(p, stripes[disk], chunk_size) != 0) {
425 printf("P(%d) wrong at %llu\n", disk,
426 start / chunk_size);
427 }
428 disk = geo_map(-2, start/chunk_size, raid_disks,
429 level, layout);
430 if (memcmp(q, stripes[disk], chunk_size) != 0) {
431 printf("Q(%d) wrong at %llu\n", disk,
432 start / chunk_size);
433 }
434 break;
435 }
436 length -= chunk_size;
437 start += chunk_size;
438 }
439 return 0;
440}
441
e86c9dd6
NB
442unsigned long long getnum(char *str, char **err)
443{
444 char *e;
445 unsigned long long rv = strtoull(str, &e, 10);
446 if (e==str || *e) {
447 *err = str;
448 return 0;
449 }
450 return rv;
451}
452
453main(int argc, char *argv[])
454{
455 /* save/restore file raid_disks chunk_size level layout start length devices...
456 */
457 int save;
458 int *fds;
459 char *file;
460 int storefd;
461 unsigned long long *offsets;
462 int raid_disks, chunk_size, level, layout;
463 unsigned long long start, length;
464 int i;
465
466 char *err = NULL;
467 if (argc < 10) {
468 fprintf(stderr, "Usage: test_stripe save/restore file raid_disks"
469 " chunk_size level layout start length devices...\n");
470 exit(1);
471 }
472 if (strcmp(argv[1], "save")==0)
473 save = 1;
474 else if (strcmp(argv[1], "restore") == 0)
475 save = 0;
48327135
NB
476 else if (strcmp(argv[1], "test") == 0)
477 save = 2;
e86c9dd6
NB
478 else {
479 fprintf(stderr, "test_stripe: must give 'save' or 'restore'.\n");
480 exit(2);
481 }
482
483 file = argv[2];
484 raid_disks = getnum(argv[3], &err);
485 chunk_size = getnum(argv[4], &err);
486 level = getnum(argv[5], &err);
487 layout = getnum(argv[6], &err);
488 start = getnum(argv[7], &err);
489 length = getnum(argv[8], &err);
490 if (err) {
491 fprintf(stderr, "test_stripe: Bad number: %s\n", err);
492 exit(2);
493 }
494 if (argc != raid_disks + 9) {
495 fprintf(stderr, "test_stripe: wrong number of devices: want %d found %d\n",
496 raid_disks, argc-9);
497 exit(2);
498 }
499 fds = malloc(raid_disks * sizeof(*fds));
500 offsets = malloc(raid_disks * sizeof(*offsets));
501 memset(offsets, 0, raid_disks * sizeof(*offsets));
502
503 storefd = open(file, O_RDWR);
504 if (storefd < 0) {
505 perror(file);
506 fprintf(stderr, "test_stripe: could not open %s.\n", file);
507 exit(3);
508 }
509 for (i=0; i<raid_disks; i++) {
510 fds[i] = open(argv[9+i], O_RDWR);
511 if (fds[i] < 0) {
512 perror(argv[9+i]);
513 fprintf(stderr,"test_stripe: cannot open %s.\n", argv[9+i]);
514 exit(3);
515 }
516 }
517
48327135 518 if (save == 1) {
e86c9dd6
NB
519 int rv = save_stripes(fds, offsets,
520 raid_disks, chunk_size, level, layout,
521 1, &storefd,
522 start, length);
523 if (rv != 0) {
48327135
NB
524 fprintf(stderr,
525 "test_stripe: save_stripes returned %d\n", rv);
526 exit(1);
527 }
528 } else if (save == 2) {
529 int rv = test_stripes(fds, offsets,
530 raid_disks, chunk_size, level, layout,
531 start, length);
532 if (rv != 0) {
533 fprintf(stderr,
534 "test_stripe: test_stripes returned %d\n", rv);
e86c9dd6
NB
535 exit(1);
536 }
537 } else {
538 int rv = restore_stripes(fds, offsets,
539 raid_disks, chunk_size, level, layout,
353632d9 540 storefd, 0ULL,
e86c9dd6
NB
541 start, length);
542 if (rv != 0) {
48327135
NB
543 fprintf(stderr,
544 "test_stripe: restore_stripes returned %d\n",
545 rv);
e86c9dd6
NB
546 exit(1);
547 }
548 }
549 exit(0);
550}
551
552#endif /* MAIN */