]> git.ipfire.org Git - thirdparty/mdadm.git/blame - restripe.c
Merge branch 'master' into devel-3.2
[thirdparty/mdadm.git] / restripe.c
CommitLineData
e86c9dd6
NB
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
e736b623 4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
e86c9dd6
NB
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25#include "mdadm.h"
a6288483 26#include <stdint.h>
e86c9dd6
NB
27
28/* To restripe, we read from old geometry to a buffer, and
29 * read from buffer to new geometry.
a6288483
N
30 * When reading, we might have missing devices and so could need
31 * to reconstruct.
32 * When writing, we need to create correct parity and Q.
e86c9dd6
NB
33 *
34 */
35
e0d95aac
N
36static int geo_map(int block, unsigned long long stripe, int raid_disks,
37 int level, int layout)
e86c9dd6 38{
48327135 39 /* On the given stripe, find which disk in the array will have
e86c9dd6 40 * block numbered 'block'.
48327135
NB
41 * '-1' means the parity block.
42 * '-2' means the Q syndrome.
e86c9dd6
NB
43 */
44 int pd;
45
b6e317c8
AK
46 /* layout is not relevant for raid0 and raid4 */
47 if ((level == 0) ||
48 (level == 4))
49 layout = 0;
50
e86c9dd6
NB
51 switch(level*100 + layout) {
52 case 000:
53 case 400:
e0d95aac 54 case 500 + ALGORITHM_PARITY_N:
e86c9dd6
NB
55 /* raid 4 isn't messed around by parity blocks */
56 if (block == -1)
57 return raid_disks-1; /* parity block */
58 return block;
59 case 500 + ALGORITHM_LEFT_ASYMMETRIC:
60 pd = (raid_disks-1) - stripe % raid_disks;
61 if (block == -1) return pd;
62 if (block >= pd)
63 block++;
64 return block;
65
66 case 500 + ALGORITHM_RIGHT_ASYMMETRIC:
67 pd = stripe % raid_disks;
68 if (block == -1) return pd;
69 if (block >= pd)
70 block++;
71 return block;
72
73 case 500 + ALGORITHM_LEFT_SYMMETRIC:
74 pd = (raid_disks - 1) - stripe % raid_disks;
75 if (block == -1) return pd;
76 return (pd + 1 + block) % raid_disks;
77
78 case 500 + ALGORITHM_RIGHT_SYMMETRIC:
79 pd = stripe % raid_disks;
80 if (block == -1) return pd;
81 return (pd + 1 + block) % raid_disks;
82
e0d95aac
N
83 case 500 + ALGORITHM_PARITY_0:
84 return block + 1;
85
86
87 case 600 + ALGORITHM_PARITY_N_6:
88 if (block == -2)
89 return raid_disks - 1;
90 if (block == -1)
91 return raid_disks - 2; /* parity block */
92 return block;
93 case 600 + ALGORITHM_LEFT_ASYMMETRIC_6:
94 if (block == -2)
95 return raid_disks - 1;
96 raid_disks--;
97 pd = (raid_disks-1) - stripe % raid_disks;
98 if (block == -1) return pd;
99 if (block >= pd)
100 block++;
101 return block;
102
103 case 600 + ALGORITHM_RIGHT_ASYMMETRIC_6:
104 if (block == -2)
105 return raid_disks - 1;
106 raid_disks--;
107 pd = stripe % raid_disks;
108 if (block == -1) return pd;
109 if (block >= pd)
110 block++;
111 return block;
112
113 case 600 + ALGORITHM_LEFT_SYMMETRIC_6:
114 if (block == -2)
115 return raid_disks - 1;
116 raid_disks--;
117 pd = (raid_disks - 1) - stripe % raid_disks;
118 if (block == -1) return pd;
119 return (pd + 1 + block) % raid_disks;
120
121 case 600 + ALGORITHM_RIGHT_SYMMETRIC_6:
122 if (block == -2)
123 return raid_disks - 1;
124 raid_disks--;
125 pd = stripe % raid_disks;
126 if (block == -1) return pd;
127 return (pd + 1 + block) % raid_disks;
128
129 case 600 + ALGORITHM_PARITY_0_6:
130 if (block == -2)
131 return raid_disks - 1;
132 return block + 1;
133
134
135 case 600 + ALGORITHM_PARITY_0:
136 if (block == -1)
137 return 0;
138 if (block == -2)
139 return 1;
140 return block + 2;
141
e86c9dd6
NB
142 case 600 + ALGORITHM_LEFT_ASYMMETRIC:
143 pd = raid_disks - 1 - (stripe % raid_disks);
144 if (block == -1) return pd;
48327135 145 if (block == -2) return (pd+1) % raid_disks;
e86c9dd6
NB
146 if (pd == raid_disks - 1)
147 return block+1;
148 if (block >= pd)
149 return block+2;
150 return block;
151
e0d95aac
N
152 case 600 + ALGORITHM_ROTATING_ZERO_RESTART:
153 /* Different order for calculating Q, otherwize same as ... */
e86c9dd6
NB
154 case 600 + ALGORITHM_RIGHT_ASYMMETRIC:
155 pd = stripe % raid_disks;
156 if (block == -1) return pd;
48327135 157 if (block == -2) return (pd+1) % raid_disks;
e86c9dd6
NB
158 if (pd == raid_disks - 1)
159 return block+1;
160 if (block >= pd)
161 return block+2;
162 return block;
163
164 case 600 + ALGORITHM_LEFT_SYMMETRIC:
165 pd = raid_disks - 1 - (stripe % raid_disks);
166 if (block == -1) return pd;
48327135 167 if (block == -2) return (pd+1) % raid_disks;
e86c9dd6
NB
168 return (pd + 2 + block) % raid_disks;
169
170 case 600 + ALGORITHM_RIGHT_SYMMETRIC:
171 pd = stripe % raid_disks;
172 if (block == -1) return pd;
48327135 173 if (block == -2) return (pd+1) % raid_disks;
e86c9dd6 174 return (pd + 2 + block) % raid_disks;
e0d95aac
N
175
176
177 case 600 + ALGORITHM_ROTATING_N_RESTART:
178 /* Same a left_asymmetric, by first stripe is
179 * D D D P Q rather than
180 * Q D D D P
181 */
182 pd = raid_disks - 1 - ((stripe + 1) % raid_disks);
183 if (block == -1) return pd;
184 if (block == -2) return (pd+1) % raid_disks;
185 if (pd == raid_disks - 1)
186 return block+1;
187 if (block >= pd)
188 return block+2;
189 return block;
190
191 case 600 + ALGORITHM_ROTATING_N_CONTINUE:
192 /* Same as left_symmetric but Q is before P */
193 pd = raid_disks - 1 - (stripe % raid_disks);
194 if (block == -1) return pd;
195 if (block == -2) return (pd+raid_disks-1) % raid_disks;
196 return (pd + 1 + block) % raid_disks;
e86c9dd6
NB
197 }
198 return -1;
199}
e0d95aac
N
200static int is_ddf(int layout)
201{
202 switch (layout)
203 {
204 default:
205 return 0;
206 case ALGORITHM_ROTATING_N_CONTINUE:
207 case ALGORITHM_ROTATING_N_RESTART:
208 case ALGORITHM_ROTATING_ZERO_RESTART:
209 return 1;
210 }
211}
e86c9dd6
NB
212
213
214static void xor_blocks(char *target, char **sources, int disks, int size)
215{
216 int i, j;
217 /* Amazingly inefficient... */
218 for (i=0; i<size; i++) {
219 char c = 0;
220 for (j=0 ; j<disks; j++)
221 c ^= sources[j][i];
222 target[i] = c;
223 }
224}
225
a6288483 226static void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size)
48327135
NB
227{
228 int d, z;
a6288483 229 uint8_t wq0, wp0, wd0, w10, w20;
48327135
NB
230 for ( d = 0; d < size; d++) {
231 wq0 = wp0 = sources[disks-1][d];
232 for ( z = disks-2 ; z >= 0 ; z-- ) {
233 wd0 = sources[z][d];
234 wp0 ^= wd0;
235 w20 = (wq0&0x80) ? 0xff : 0x00;
236 w10 = (wq0 << 1) & 0xff;
237 w20 &= 0x1d;
238 w10 ^= w20;
239 wq0 = w10 ^ wd0;
240 }
241 p[d] = wp0;
242 q[d] = wq0;
243 }
244}
245
a6288483
N
246
247/*
248 * The following was taken from linux/drivers/md/mktables.c, and modified
249 * to create in-memory tables rather than C code
250 */
251static uint8_t gfmul(uint8_t a, uint8_t b)
252{
253 uint8_t v = 0;
254
255 while (b) {
256 if (b & 1)
257 v ^= a;
258 a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
259 b >>= 1;
260 }
261
262 return v;
263}
264
265static uint8_t gfpow(uint8_t a, int b)
266{
267 uint8_t v = 1;
268
269 b %= 255;
270 if (b < 0)
271 b += 255;
272
273 while (b) {
274 if (b & 1)
275 v = gfmul(v, a);
276 a = gfmul(a, a);
277 b >>= 1;
278 }
279
280 return v;
281}
282
283int tables_ready = 0;
284uint8_t raid6_gfmul[256][256];
285uint8_t raid6_gfexp[256];
286uint8_t raid6_gfinv[256];
287uint8_t raid6_gfexi[256];
288void make_tables(void)
289{
290 int i, j;
291 uint8_t v;
292
293 /* Compute multiplication table */
294 for (i = 0; i < 256; i++)
295 for (j = 0; j < 256; j++)
296 raid6_gfmul[i][j] = gfmul(i, j);
297
298 /* Compute power-of-2 table (exponent) */
299 v = 1;
300 for (i = 0; i < 256; i++) {
301 raid6_gfexp[i] = v;
302 v = gfmul(v, 2);
303 if (v == 1)
304 v = 0; /* For entry 255, not a real entry */
305 }
306
307 /* Compute inverse table x^-1 == x^254 */
308 for (i = 0; i < 256; i++)
309 raid6_gfinv[i] = gfpow(i, 254);
310
311 /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
312 for (i = 0; i < 256; i ++)
313 raid6_gfexi[i] = raid6_gfinv[raid6_gfexp[i] ^ 1];
314
315 tables_ready = 1;
316}
317
318uint8_t *zero;
319/* Following was taken from linux/drivers/md/raid6recov.c */
320
321/* Recover two failed data blocks. */
322void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
323 uint8_t **ptrs)
324{
325 uint8_t *p, *q, *dp, *dq;
326 uint8_t px, qx, db;
327 const uint8_t *pbmul; /* P multiplier table for B data */
328 const uint8_t *qmul; /* Q multiplier table (for both) */
329
330 p = ptrs[disks-2];
331 q = ptrs[disks-1];
332
333 /* Compute syndrome with zero for the missing data pages
334 Use the dead data pages as temporary storage for
335 delta p and delta q */
336 dp = ptrs[faila];
337 ptrs[faila] = zero;
338 dq = ptrs[failb];
339 ptrs[failb] = zero;
340
341 qsyndrome(dp, dq, ptrs, disks-2, bytes);
342
343 /* Restore pointer table */
344 ptrs[faila] = dp;
345 ptrs[failb] = dq;
346
347 /* Now, pick the proper data tables */
348 pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
349 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
350
351 /* Now do it... */
352 while ( bytes-- ) {
353 px = *p ^ *dp;
354 qx = qmul[*q ^ *dq];
355 *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
356 *dp++ = db ^ px; /* Reconstructed A */
357 p++; q++;
358 }
359}
360
361/* Recover failure of one data block plus the P block */
362void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs)
363{
364 uint8_t *p, *q, *dq;
365 const uint8_t *qmul; /* Q multiplier table */
366
367 p = ptrs[disks-2];
368 q = ptrs[disks-1];
369
370 /* Compute syndrome with zero for the missing data page
371 Use the dead data page as temporary storage for delta q */
372 dq = ptrs[faila];
373 ptrs[faila] = zero;
374
375 qsyndrome(p, dq, ptrs, disks-2, bytes);
376
377 /* Restore pointer table */
378 ptrs[faila] = dq;
379
380 /* Now, pick the proper data tables */
381 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
382
383 /* Now do it... */
384 while ( bytes-- ) {
385 *p++ ^= *dq = qmul[*q ^ *dq];
386 q++; dq++;
387 }
388}
389
e86c9dd6
NB
390/* Save data:
391 * We are given:
a6288483 392 * A list of 'fds' of the active disks. Some may be absent.
48327135 393 * A geometry: raid_disks, chunk_size, level, layout
e86c9dd6
NB
394 * A list of 'fds' for mirrored targets. They are already seeked to
395 * right (Write) location
a6288483
N
396 * A start and length which must be stripe-aligned
397 * 'buf' is large enough to hold one stripe, and is aligned
e86c9dd6
NB
398 */
399
400int save_stripes(int *source, unsigned long long *offsets,
401 int raid_disks, int chunk_size, int level, int layout,
402 int nwrites, int *dest,
a6288483
N
403 unsigned long long start, unsigned long long length,
404 char *buf)
e86c9dd6 405{
e86c9dd6
NB
406 int len;
407 int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
408 int disk;
a6288483 409 int i;
e86c9dd6 410
a6288483
N
411 if (!tables_ready)
412 make_tables();
413
414 if (zero == NULL) {
415 zero = malloc(chunk_size);
416 memset(zero, 0, chunk_size);
417 }
418
419 len = data_disks * chunk_size;
e86c9dd6 420 while (length > 0) {
a6288483
N
421 int failed = 0;
422 int fdisk[3], fblock[3];
423 for (disk = 0; disk < raid_disks ; disk++) {
424 unsigned long long offset;
425 int dnum;
a6288483
N
426
427 offset = (start/chunk_size/data_disks)*chunk_size;
428 dnum = geo_map(disk < data_disks ? disk : data_disks - disk - 1,
429 start/chunk_size/data_disks,
430 raid_disks, level, layout);
7236ee7a 431 if (dnum < 0) abort();
a6288483 432 if (source[dnum] < 0 ||
cc50ccdc 433 lseek64(source[dnum], offsets[dnum]+offset, 0) < 0 ||
7236ee7a
N
434 read(source[dnum], buf+disk * chunk_size, chunk_size)
435 != chunk_size)
a6288483
N
436 if (failed <= 2) {
437 fdisk[failed] = dnum;
438 fblock[failed] = disk;
439 failed++;
440 }
441 }
442 if (failed == 0 || fblock[0] >= data_disks)
443 /* all data disks are good */
444 ;
445 else if (failed == 1 || fblock[1] >= data_disks+1) {
446 /* one failed data disk and good parity */
447 char *bufs[data_disks];
448 for (i=0; i < data_disks; i++)
449 if (fblock[0] == i)
450 bufs[i] = buf + data_disks*chunk_size;
451 else
452 bufs[i] = buf + i*chunk_size;
453
454 xor_blocks(buf + fblock[0]*chunk_size,
455 bufs, data_disks, chunk_size);
456 } else if (failed > 2 || level != 6)
457 /* too much failure */
e86c9dd6 458 return -1;
a6288483
N
459 else {
460 /* RAID6 computations needed. */
461 uint8_t *bufs[data_disks+4];
462 int qdisk;
463 int syndrome_disks;
464 disk = geo_map(-1, start/chunk_size/data_disks,
465 raid_disks, level, layout);
466 qdisk = geo_map(-2, start/chunk_size/data_disks,
467 raid_disks, level, layout);
468 if (is_ddf(layout)) {
469 /* q over 'raid_disks' blocks, in device order.
470 * 'p' and 'q' get to be all zero
471 */
472 for (i = 0; i < raid_disks; i++)
cc50ccdc
N
473 bufs[i] = zero;
474 for (i = 0; i < data_disks; i++) {
475 int dnum = geo_map(i,
476 start/chunk_size/data_disks,
477 raid_disks, level, layout);
478 int snum;
479 /* i is the logical block number, so is index to 'buf'.
480 * dnum is physical disk number
481 * and thus the syndrome number.
482 */
483 snum = dnum;
484 bufs[snum] = (uint8_t*)buf + chunk_size * i;
485 }
a6288483
N
486 syndrome_disks = raid_disks;
487 } else {
488 /* for md, q is over 'data_disks' blocks,
489 * starting immediately after 'q'
1eac9f84
N
490 * Note that for the '_6' variety, the p block
491 * makes a hole that we need to be careful of.
a6288483 492 */
1eac9f84
N
493 int j;
494 int snum = 0;
495 for (j = 0; j < raid_disks; j++) {
496 int dnum = (qdisk + 1 + j) % raid_disks;
497 if (dnum == disk || dnum == qdisk)
498 continue;
499 for (i = 0; i < data_disks; i++)
500 if (geo_map(i,
501 start/chunk_size/data_disks,
502 raid_disks, level, layout) == dnum)
503 break;
cc50ccdc
N
504 /* i is the logical block number, so is index to 'buf'.
505 * dnum is physical disk number
506 * snum is syndrome disk for which 0 is immediately after Q
507 */
cc50ccdc 508 bufs[snum] = (uint8_t*)buf + chunk_size * i;
1eac9f84
N
509
510 if (fblock[0] == i)
511 fdisk[0] = snum;
512 if (fblock[1] == i)
513 fdisk[1] = snum;
514 snum++;
cc50ccdc 515 }
a6288483 516
a6288483
N
517 syndrome_disks = data_disks;
518 }
cc50ccdc
N
519
520 /* Place P and Q blocks at end of bufs */
521 bufs[syndrome_disks] = (uint8_t*)buf + chunk_size * data_disks;
522 bufs[syndrome_disks+1] = (uint8_t*)buf + chunk_size * (data_disks+1);
523
a6288483
N
524 if (fblock[1] == data_disks)
525 /* One data failed, and parity failed */
526 raid6_datap_recov(syndrome_disks+2, chunk_size,
527 fdisk[0], bufs);
cc50ccdc
N
528 else {
529 if (fdisk[0] > fdisk[1]) {
530 int t = fdisk[0];
531 fdisk[0] = fdisk[1];
532 fdisk[1] = t;
533 }
a6288483
N
534 /* Two data blocks failed, P,Q OK */
535 raid6_2data_recov(syndrome_disks+2, chunk_size,
536 fdisk[0], fdisk[1], bufs);
cc50ccdc 537 }
a6288483
N
538 }
539
e86c9dd6
NB
540 for (i=0; i<nwrites; i++)
541 if (write(dest[i], buf, len) != len)
542 return -1;
a6288483 543
e86c9dd6
NB
544 length -= len;
545 start += len;
e86c9dd6
NB
546 }
547 return 0;
548}
549
550/* Restore data:
551 * We are given:
552 * A list of 'fds' of the active disks. Some may be '-1' for not-available.
353632d9 553 * A geometry: raid_disks, chunk_size, level, layout
e86c9dd6
NB
554 * An 'fd' to read from. It is already seeked to the right (Read) location.
555 * A start and length.
556 * The length must be a multiple of the stripe size.
557 *
558 * We build a full stripe in memory and then write it out.
559 * We assume that there are enough working devices.
560 */
561int restore_stripes(int *dest, unsigned long long *offsets,
562 int raid_disks, int chunk_size, int level, int layout,
353632d9 563 int source, unsigned long long read_offset,
e86c9dd6
NB
564 unsigned long long start, unsigned long long length)
565{
e9e43ec3 566 char *stripe_buf;
e86c9dd6
NB
567 char **stripes = malloc(raid_disks * sizeof(char*));
568 char **blocks = malloc(raid_disks * sizeof(char*));
569 int i;
570
a6288483 571 int data_disks = raid_disks - (level == 0 ? 0 : level <= 5 ? 1 : 2);
e86c9dd6 572
fcf57625
N
573 if (posix_memalign((void**)&stripe_buf, 4096, raid_disks * chunk_size))
574 stripe_buf = NULL;
a6288483
N
575 if (zero == NULL) {
576 zero = malloc(chunk_size);
577 if (zero)
578 memset(zero, 0, chunk_size);
579 }
e0d95aac
N
580 if (stripe_buf == NULL || stripes == NULL || blocks == NULL
581 || zero == NULL) {
e86c9dd6
NB
582 free(stripe_buf);
583 free(stripes);
584 free(blocks);
e0d95aac 585 free(zero);
e86c9dd6
NB
586 return -2;
587 }
588 for (i=0; i<raid_disks; i++)
589 stripes[i] = stripe_buf + i * chunk_size;
590 while (length > 0) {
f21e18ca 591 unsigned int len = data_disks * chunk_size;
e86c9dd6 592 unsigned long long offset;
48327135 593 int disk, qdisk;
a6288483 594 int syndrome_disks;
e86c9dd6
NB
595 if (length < len)
596 return -3;
597 for (i=0; i < data_disks; i++) {
598 int disk = geo_map(i, start/chunk_size/data_disks,
599 raid_disks, level, layout);
f21e18ca
N
600 if ((unsigned long long)lseek64(source, read_offset, 0)
601 != read_offset)
353632d9 602 return -1;
f21e18ca
N
603 if (read(source, stripes[disk],
604 chunk_size) != chunk_size)
e86c9dd6 605 return -1;
353632d9 606 read_offset += chunk_size;
e86c9dd6
NB
607 }
608 /* We have the data, now do the parity */
609 offset = (start/chunk_size/data_disks) * chunk_size;
48327135
NB
610 switch (level) {
611 case 4:
612 case 5:
613 disk = geo_map(-1, start/chunk_size/data_disks,
e86c9dd6 614 raid_disks, level, layout);
e0d95aac
N
615 for (i = 0; i < data_disks; i++)
616 blocks[i] = stripes[(disk+1+i) % raid_disks];
e86c9dd6 617 xor_blocks(stripes[disk], blocks, data_disks, chunk_size);
48327135
NB
618 break;
619 case 6:
620 disk = geo_map(-1, start/chunk_size/data_disks,
621 raid_disks, level, layout);
622 qdisk = geo_map(-2, start/chunk_size/data_disks,
623 raid_disks, level, layout);
e0d95aac
N
624 if (is_ddf(layout)) {
625 /* q over 'raid_disks' blocks, in device order.
626 * 'p' and 'q' get to be all zero
627 */
628 for (i = 0; i < raid_disks; i++)
629 if (i == disk || i == qdisk)
a6288483 630 blocks[i] = (char*)zero;
e0d95aac
N
631 else
632 blocks[i] = stripes[i];
a6288483 633 syndrome_disks = raid_disks;
e0d95aac 634 } else {
a6288483 635 /* for md, q is over 'data_disks' blocks,
e0d95aac
N
636 * starting immediately after 'q'
637 */
638 for (i = 0; i < data_disks; i++)
639 blocks[i] = stripes[(qdisk+1+i) % raid_disks];
48327135 640
a6288483 641 syndrome_disks = data_disks;
e0d95aac 642 }
a6288483
N
643 qsyndrome((uint8_t*)stripes[disk],
644 (uint8_t*)stripes[qdisk],
645 (uint8_t**)blocks,
646 syndrome_disks, chunk_size);
48327135 647 break;
e86c9dd6
NB
648 }
649 for (i=0; i < raid_disks ; i++)
650 if (dest[i] >= 0) {
651 if (lseek64(dest[i], offsets[i]+offset, 0) < 0)
652 return -1;
653 if (write(dest[i], stripes[i], chunk_size) != chunk_size)
654 return -1;
655 }
656 length -= len;
657 start += len;
658 }
659 return 0;
660}
661
662#ifdef MAIN
663
48327135
NB
664int test_stripes(int *source, unsigned long long *offsets,
665 int raid_disks, int chunk_size, int level, int layout,
666 unsigned long long start, unsigned long long length)
667{
668 /* ready the data and p (and q) blocks, and check we got them right */
669 char *stripe_buf = malloc(raid_disks * chunk_size);
670 char **stripes = malloc(raid_disks * sizeof(char*));
671 char **blocks = malloc(raid_disks * sizeof(char*));
672 char *p = malloc(chunk_size);
673 char *q = malloc(chunk_size);
674
675 int i;
676 int data_disks = raid_disks - (level == 5 ? 1: 2);
677 for ( i = 0 ; i < raid_disks ; i++)
678 stripes[i] = stripe_buf + i * chunk_size;
679
680 while (length > 0) {
681 int disk;
682
683 for (i = 0 ; i < raid_disks ; i++) {
684 lseek64(source[i], offsets[i]+start, 0);
685 read(source[i], stripes[i], chunk_size);
686 }
687 for (i = 0 ; i < data_disks ; i++) {
688 int disk = geo_map(i, start/chunk_size, raid_disks,
689 level, layout);
690 blocks[i] = stripes[disk];
691 printf("%d->%d\n", i, disk);
692 }
693 switch(level) {
694 case 6:
521f349c 695 qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
48327135
NB
696 disk = geo_map(-1, start/chunk_size, raid_disks,
697 level, layout);
698 if (memcmp(p, stripes[disk], chunk_size) != 0) {
699 printf("P(%d) wrong at %llu\n", disk,
700 start / chunk_size);
701 }
702 disk = geo_map(-2, start/chunk_size, raid_disks,
703 level, layout);
704 if (memcmp(q, stripes[disk], chunk_size) != 0) {
705 printf("Q(%d) wrong at %llu\n", disk,
706 start / chunk_size);
707 }
708 break;
709 }
710 length -= chunk_size;
711 start += chunk_size;
712 }
713 return 0;
714}
715
e86c9dd6
NB
716unsigned long long getnum(char *str, char **err)
717{
718 char *e;
719 unsigned long long rv = strtoull(str, &e, 10);
720 if (e==str || *e) {
721 *err = str;
722 return 0;
723 }
724 return rv;
725}
726
727main(int argc, char *argv[])
728{
729 /* save/restore file raid_disks chunk_size level layout start length devices...
730 */
731 int save;
732 int *fds;
733 char *file;
a6288483 734 char *buf;
e86c9dd6
NB
735 int storefd;
736 unsigned long long *offsets;
737 int raid_disks, chunk_size, level, layout;
738 unsigned long long start, length;
739 int i;
740
741 char *err = NULL;
742 if (argc < 10) {
743 fprintf(stderr, "Usage: test_stripe save/restore file raid_disks"
744 " chunk_size level layout start length devices...\n");
745 exit(1);
746 }
747 if (strcmp(argv[1], "save")==0)
748 save = 1;
749 else if (strcmp(argv[1], "restore") == 0)
750 save = 0;
48327135
NB
751 else if (strcmp(argv[1], "test") == 0)
752 save = 2;
e86c9dd6
NB
753 else {
754 fprintf(stderr, "test_stripe: must give 'save' or 'restore'.\n");
755 exit(2);
756 }
757
758 file = argv[2];
759 raid_disks = getnum(argv[3], &err);
760 chunk_size = getnum(argv[4], &err);
761 level = getnum(argv[5], &err);
762 layout = getnum(argv[6], &err);
763 start = getnum(argv[7], &err);
764 length = getnum(argv[8], &err);
765 if (err) {
766 fprintf(stderr, "test_stripe: Bad number: %s\n", err);
767 exit(2);
768 }
769 if (argc != raid_disks + 9) {
770 fprintf(stderr, "test_stripe: wrong number of devices: want %d found %d\n",
771 raid_disks, argc-9);
772 exit(2);
773 }
774 fds = malloc(raid_disks * sizeof(*fds));
775 offsets = malloc(raid_disks * sizeof(*offsets));
776 memset(offsets, 0, raid_disks * sizeof(*offsets));
777
778 storefd = open(file, O_RDWR);
779 if (storefd < 0) {
780 perror(file);
781 fprintf(stderr, "test_stripe: could not open %s.\n", file);
782 exit(3);
783 }
784 for (i=0; i<raid_disks; i++) {
785 fds[i] = open(argv[9+i], O_RDWR);
786 if (fds[i] < 0) {
787 perror(argv[9+i]);
788 fprintf(stderr,"test_stripe: cannot open %s.\n", argv[9+i]);
789 exit(3);
790 }
791 }
792
a6288483
N
793 buf = malloc(raid_disks * chunk_size);
794
48327135 795 if (save == 1) {
e86c9dd6
NB
796 int rv = save_stripes(fds, offsets,
797 raid_disks, chunk_size, level, layout,
798 1, &storefd,
a6288483 799 start, length, buf);
e86c9dd6 800 if (rv != 0) {
48327135
NB
801 fprintf(stderr,
802 "test_stripe: save_stripes returned %d\n", rv);
803 exit(1);
804 }
805 } else if (save == 2) {
806 int rv = test_stripes(fds, offsets,
807 raid_disks, chunk_size, level, layout,
808 start, length);
809 if (rv != 0) {
810 fprintf(stderr,
811 "test_stripe: test_stripes returned %d\n", rv);
e86c9dd6
NB
812 exit(1);
813 }
814 } else {
815 int rv = restore_stripes(fds, offsets,
816 raid_disks, chunk_size, level, layout,
353632d9 817 storefd, 0ULL,
e86c9dd6
NB
818 start, length);
819 if (rv != 0) {
48327135
NB
820 fprintf(stderr,
821 "test_stripe: restore_stripes returned %d\n",
822 rv);
e86c9dd6
NB
823 exit(1);
824 }
825 }
826 exit(0);
827}
828
829#endif /* MAIN */