]> git.ipfire.org Git - thirdparty/mdadm.git/blob - restripe.c
tests: add test that DDF marks missing devices as failed on assembly.
[thirdparty/mdadm.git] / restripe.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25 #include "mdadm.h"
26 #include <stdint.h>
27
28 /* To restripe, we read from old geometry to a buffer, and
29 * read from buffer to new geometry.
30 * When reading, we might have missing devices and so could need
31 * to reconstruct.
32 * When writing, we need to create correct parity and Q.
33 *
34 */
35
36 int geo_map(int block, unsigned long long stripe, int raid_disks,
37 int level, int layout)
38 {
39 /* On the given stripe, find which disk in the array will have
40 * block numbered 'block'.
41 * '-1' means the parity block.
42 * '-2' means the Q syndrome.
43 */
44 int pd;
45
46 /* layout is not relevant for raid0 and raid4 */
47 if ((level == 0) ||
48 (level == 4))
49 layout = 0;
50
51 switch(level*100 + layout) {
52 case 000:
53 case 400:
54 case 500 + ALGORITHM_PARITY_N:
55 /* raid 4 isn't messed around by parity blocks */
56 if (block == -1)
57 return raid_disks-1; /* parity block */
58 return block;
59 case 500 + ALGORITHM_LEFT_ASYMMETRIC:
60 pd = (raid_disks-1) - stripe % raid_disks;
61 if (block == -1) return pd;
62 if (block >= pd)
63 block++;
64 return block;
65
66 case 500 + ALGORITHM_RIGHT_ASYMMETRIC:
67 pd = stripe % raid_disks;
68 if (block == -1) return pd;
69 if (block >= pd)
70 block++;
71 return block;
72
73 case 500 + ALGORITHM_LEFT_SYMMETRIC:
74 pd = (raid_disks - 1) - stripe % raid_disks;
75 if (block == -1) return pd;
76 return (pd + 1 + block) % raid_disks;
77
78 case 500 + ALGORITHM_RIGHT_SYMMETRIC:
79 pd = stripe % raid_disks;
80 if (block == -1) return pd;
81 return (pd + 1 + block) % raid_disks;
82
83 case 500 + ALGORITHM_PARITY_0:
84 return block + 1;
85
86 case 600 + ALGORITHM_PARITY_N_6:
87 if (block == -2)
88 return raid_disks - 1;
89 if (block == -1)
90 return raid_disks - 2; /* parity block */
91 return block;
92 case 600 + ALGORITHM_LEFT_ASYMMETRIC_6:
93 if (block == -2)
94 return raid_disks - 1;
95 raid_disks--;
96 pd = (raid_disks-1) - stripe % raid_disks;
97 if (block == -1) return pd;
98 if (block >= pd)
99 block++;
100 return block;
101
102 case 600 + ALGORITHM_RIGHT_ASYMMETRIC_6:
103 if (block == -2)
104 return raid_disks - 1;
105 raid_disks--;
106 pd = stripe % raid_disks;
107 if (block == -1) return pd;
108 if (block >= pd)
109 block++;
110 return block;
111
112 case 600 + ALGORITHM_LEFT_SYMMETRIC_6:
113 if (block == -2)
114 return raid_disks - 1;
115 raid_disks--;
116 pd = (raid_disks - 1) - stripe % raid_disks;
117 if (block == -1) return pd;
118 return (pd + 1 + block) % raid_disks;
119
120 case 600 + ALGORITHM_RIGHT_SYMMETRIC_6:
121 if (block == -2)
122 return raid_disks - 1;
123 raid_disks--;
124 pd = stripe % raid_disks;
125 if (block == -1) return pd;
126 return (pd + 1 + block) % raid_disks;
127
128 case 600 + ALGORITHM_PARITY_0_6:
129 if (block == -2)
130 return raid_disks - 1;
131 return block + 1;
132
133 case 600 + ALGORITHM_PARITY_0:
134 if (block == -1)
135 return 0;
136 if (block == -2)
137 return 1;
138 return block + 2;
139
140 case 600 + ALGORITHM_LEFT_ASYMMETRIC:
141 pd = raid_disks - 1 - (stripe % raid_disks);
142 if (block == -1) return pd;
143 if (block == -2) return (pd+1) % raid_disks;
144 if (pd == raid_disks - 1)
145 return block+1;
146 if (block >= pd)
147 return block+2;
148 return block;
149
150 case 600 + ALGORITHM_ROTATING_ZERO_RESTART:
151 /* Different order for calculating Q, otherwize same as ... */
152 case 600 + ALGORITHM_RIGHT_ASYMMETRIC:
153 pd = stripe % raid_disks;
154 if (block == -1) return pd;
155 if (block == -2) return (pd+1) % raid_disks;
156 if (pd == raid_disks - 1)
157 return block+1;
158 if (block >= pd)
159 return block+2;
160 return block;
161
162 case 600 + ALGORITHM_LEFT_SYMMETRIC:
163 pd = raid_disks - 1 - (stripe % raid_disks);
164 if (block == -1) return pd;
165 if (block == -2) return (pd+1) % raid_disks;
166 return (pd + 2 + block) % raid_disks;
167
168 case 600 + ALGORITHM_RIGHT_SYMMETRIC:
169 pd = stripe % raid_disks;
170 if (block == -1) return pd;
171 if (block == -2) return (pd+1) % raid_disks;
172 return (pd + 2 + block) % raid_disks;
173
174 case 600 + ALGORITHM_ROTATING_N_RESTART:
175 /* Same a left_asymmetric, by first stripe is
176 * D D D P Q rather than
177 * Q D D D P
178 */
179 pd = raid_disks - 1 - ((stripe + 1) % raid_disks);
180 if (block == -1) return pd;
181 if (block == -2) return (pd+1) % raid_disks;
182 if (pd == raid_disks - 1)
183 return block+1;
184 if (block >= pd)
185 return block+2;
186 return block;
187
188 case 600 + ALGORITHM_ROTATING_N_CONTINUE:
189 /* Same as left_symmetric but Q is before P */
190 pd = raid_disks - 1 - (stripe % raid_disks);
191 if (block == -1) return pd;
192 if (block == -2) return (pd+raid_disks-1) % raid_disks;
193 return (pd + 1 + block) % raid_disks;
194 }
195 return -1;
196 }
197 static int is_ddf(int layout)
198 {
199 switch (layout)
200 {
201 default:
202 return 0;
203 case ALGORITHM_ROTATING_N_CONTINUE:
204 case ALGORITHM_ROTATING_N_RESTART:
205 case ALGORITHM_ROTATING_ZERO_RESTART:
206 return 1;
207 }
208 }
209
210 void xor_blocks(char *target, char **sources, int disks, int size)
211 {
212 int i, j;
213 /* Amazingly inefficient... */
214 for (i=0; i<size; i++) {
215 char c = 0;
216 for (j=0 ; j<disks; j++)
217 c ^= sources[j][i];
218 target[i] = c;
219 }
220 }
221
222 void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size)
223 {
224 int d, z;
225 uint8_t wq0, wp0, wd0, w10, w20;
226 for ( d = 0; d < size; d++) {
227 wq0 = wp0 = sources[disks-1][d];
228 for ( z = disks-2 ; z >= 0 ; z-- ) {
229 wd0 = sources[z][d];
230 wp0 ^= wd0;
231 w20 = (wq0&0x80) ? 0xff : 0x00;
232 w10 = (wq0 << 1) & 0xff;
233 w20 &= 0x1d;
234 w10 ^= w20;
235 wq0 = w10 ^ wd0;
236 }
237 p[d] = wp0;
238 q[d] = wq0;
239 }
240 }
241
242 /*
243 * The following was taken from linux/drivers/md/mktables.c, and modified
244 * to create in-memory tables rather than C code
245 */
246 static uint8_t gfmul(uint8_t a, uint8_t b)
247 {
248 uint8_t v = 0;
249
250 while (b) {
251 if (b & 1)
252 v ^= a;
253 a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
254 b >>= 1;
255 }
256
257 return v;
258 }
259
260 static uint8_t gfpow(uint8_t a, int b)
261 {
262 uint8_t v = 1;
263
264 b %= 255;
265 if (b < 0)
266 b += 255;
267
268 while (b) {
269 if (b & 1)
270 v = gfmul(v, a);
271 a = gfmul(a, a);
272 b >>= 1;
273 }
274
275 return v;
276 }
277
278 int tables_ready = 0;
279 uint8_t raid6_gfmul[256][256];
280 uint8_t raid6_gfexp[256];
281 uint8_t raid6_gfinv[256];
282 uint8_t raid6_gfexi[256];
283 uint8_t raid6_gflog[256];
284 uint8_t raid6_gfilog[256];
285 void make_tables(void)
286 {
287 int i, j;
288 uint8_t v;
289 uint32_t b, log;
290
291 /* Compute multiplication table */
292 for (i = 0; i < 256; i++)
293 for (j = 0; j < 256; j++)
294 raid6_gfmul[i][j] = gfmul(i, j);
295
296 /* Compute power-of-2 table (exponent) */
297 v = 1;
298 for (i = 0; i < 256; i++) {
299 raid6_gfexp[i] = v;
300 v = gfmul(v, 2);
301 if (v == 1)
302 v = 0; /* For entry 255, not a real entry */
303 }
304
305 /* Compute inverse table x^-1 == x^254 */
306 for (i = 0; i < 256; i++)
307 raid6_gfinv[i] = gfpow(i, 254);
308
309 /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
310 for (i = 0; i < 256; i ++)
311 raid6_gfexi[i] = raid6_gfinv[raid6_gfexp[i] ^ 1];
312
313 /* Compute log and inverse log */
314 /* Modified code from:
315 * http://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.html
316 */
317 b = 1;
318 raid6_gflog[0] = 0;
319 raid6_gfilog[255] = 0;
320
321 for (log = 0; log < 255; log++) {
322 raid6_gflog[b] = (uint8_t) log;
323 raid6_gfilog[log] = (uint8_t) b;
324 b = b << 1;
325 if (b & 256) b = b ^ 0435;
326 }
327
328 tables_ready = 1;
329 }
330
331 uint8_t *zero;
332 int zero_size;
333
334 void ensure_zero_has_size(int chunk_size)
335 {
336 if (zero == NULL || chunk_size > zero_size) {
337 if (zero)
338 free(zero);
339 zero = xcalloc(1, chunk_size);
340 zero_size = chunk_size;
341 }
342 }
343
344 /* Following was taken from linux/drivers/md/raid6recov.c */
345
346 /* Recover two failed data blocks. */
347 void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
348 uint8_t **ptrs)
349 {
350 uint8_t *p, *q, *dp, *dq;
351 uint8_t px, qx, db;
352 const uint8_t *pbmul; /* P multiplier table for B data */
353 const uint8_t *qmul; /* Q multiplier table (for both) */
354
355 p = ptrs[disks-2];
356 q = ptrs[disks-1];
357
358 /* Compute syndrome with zero for the missing data pages
359 Use the dead data pages as temporary storage for
360 delta p and delta q */
361 dp = ptrs[faila];
362 ptrs[faila] = zero;
363 dq = ptrs[failb];
364 ptrs[failb] = zero;
365
366 qsyndrome(dp, dq, ptrs, disks-2, bytes);
367
368 /* Restore pointer table */
369 ptrs[faila] = dp;
370 ptrs[failb] = dq;
371
372 /* Now, pick the proper data tables */
373 pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
374 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
375
376 /* Now do it... */
377 while ( bytes-- ) {
378 px = *p ^ *dp;
379 qx = qmul[*q ^ *dq];
380 *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
381 *dp++ = db ^ px; /* Reconstructed A */
382 p++; q++;
383 }
384 }
385
386 /* Recover failure of one data block plus the P block */
387 void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs)
388 {
389 uint8_t *p, *q, *dq;
390 const uint8_t *qmul; /* Q multiplier table */
391
392 p = ptrs[disks-2];
393 q = ptrs[disks-1];
394
395 /* Compute syndrome with zero for the missing data page
396 Use the dead data page as temporary storage for delta q */
397 dq = ptrs[faila];
398 ptrs[faila] = zero;
399
400 qsyndrome(p, dq, ptrs, disks-2, bytes);
401
402 /* Restore pointer table */
403 ptrs[faila] = dq;
404
405 /* Now, pick the proper data tables */
406 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
407
408 /* Now do it... */
409 while ( bytes-- ) {
410 *p++ ^= *dq = qmul[*q ^ *dq];
411 q++; dq++;
412 }
413 }
414
415 /* Try to find out if a specific disk has a problem */
416 int raid6_check_disks(int data_disks, int start, int chunk_size,
417 int level, int layout, int diskP, int diskQ,
418 char *p, char *q, char **stripes)
419 {
420 int i;
421 int data_id, diskD;
422 uint8_t Px, Qx;
423 int curr_broken_disk = -1;
424 int prev_broken_disk = -1;
425 int broken_status = 0;
426
427 for(i = 0; i < chunk_size; i++) {
428 Px = (uint8_t)stripes[diskP][i] ^ (uint8_t)p[i];
429 Qx = (uint8_t)stripes[diskQ][i] ^ (uint8_t)q[i];
430
431 if((Px != 0) && (Qx == 0))
432 curr_broken_disk = diskP;
433
434 if((Px == 0) && (Qx != 0))
435 curr_broken_disk = diskQ;
436
437 if((Px != 0) && (Qx != 0)) {
438 data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
439 if(data_id < 0) data_id += 255;
440 diskD = geo_map(data_id, start/chunk_size,
441 data_disks + 2, level, layout);
442 curr_broken_disk = diskD;
443 }
444
445 if((Px == 0) && (Qx == 0))
446 curr_broken_disk = curr_broken_disk;
447
448 if(curr_broken_disk >= data_disks + 2)
449 broken_status = 2;
450
451 switch(broken_status) {
452 case 0:
453 if(curr_broken_disk != -1) {
454 prev_broken_disk = curr_broken_disk;
455 broken_status = 1;
456 }
457 break;
458
459 case 1:
460 if(curr_broken_disk != prev_broken_disk)
461 broken_status = 2;
462 break;
463
464 case 2:
465 default:
466 curr_broken_disk = prev_broken_disk = -2;
467 break;
468 }
469 }
470
471 return curr_broken_disk;
472 }
473
474 /*******************************************************************************
475 * Function: save_stripes
476 * Description:
477 * Function reads data (only data without P and Q) from array and writes
478 * it to buf and opcjonaly to backup files
479 * Parameters:
480 * source : A list of 'fds' of the active disks.
481 * Some may be absent
482 * offsets : A list of offsets on disk belonging
483 * to the array [bytes]
484 * raid_disks : geometry: number of disks in the array
485 * chunk_size : geometry: chunk size [bytes]
486 * level : geometry: RAID level
487 * layout : geometry: layout
488 * nwrites : number of backup files
489 * dest : A list of 'fds' for mirrored targets
490 * (e.g. backup files). They are already seeked to right
491 * (write) location. If NULL, data will be wrote
492 * to the buf only
493 * start : start address of data to read (must be stripe-aligned)
494 * [bytes]
495 * length - : length of data to read (must be stripe-aligned)
496 * [bytes]
497 * buf : buffer for data. It is large enough to hold
498 * one stripe. It is stripe aligned
499 * Returns:
500 * 0 : success
501 * -1 : fail
502 ******************************************************************************/
503 int save_stripes(int *source, unsigned long long *offsets,
504 int raid_disks, int chunk_size, int level, int layout,
505 int nwrites, int *dest,
506 unsigned long long start, unsigned long long length,
507 char *buf)
508 {
509 int len;
510 int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
511 int disk;
512 int i;
513 unsigned long long length_test;
514
515 if (!tables_ready)
516 make_tables();
517 ensure_zero_has_size(chunk_size);
518
519 len = data_disks * chunk_size;
520 length_test = length / len;
521 length_test *= len;
522
523 if (length != length_test) {
524 dprintf("Error: save_stripes(): Data are not alligned. EXIT\n");
525 dprintf("\tArea for saving stripes (length) = %llu\n", length);
526 dprintf("\tWork step (len) = %i\n", len);
527 dprintf("\tExpected save area (length_test) = %llu\n",
528 length_test);
529 abort();
530 }
531
532 while (length > 0) {
533 int failed = 0;
534 int fdisk[3], fblock[3];
535 for (disk = 0; disk < raid_disks ; disk++) {
536 unsigned long long offset;
537 int dnum;
538
539 offset = (start/chunk_size/data_disks)*chunk_size;
540 dnum = geo_map(disk < data_disks ? disk : data_disks - disk - 1,
541 start/chunk_size/data_disks,
542 raid_disks, level, layout);
543 if (dnum < 0) abort();
544 if (source[dnum] < 0 ||
545 lseek64(source[dnum], offsets[dnum]+offset, 0) < 0 ||
546 read(source[dnum], buf+disk * chunk_size, chunk_size)
547 != chunk_size)
548 if (failed <= 2) {
549 fdisk[failed] = dnum;
550 fblock[failed] = disk;
551 failed++;
552 }
553 }
554 if (failed == 0 || fblock[0] >= data_disks)
555 /* all data disks are good */
556 ;
557 else if (failed == 1 || fblock[1] >= data_disks+1) {
558 /* one failed data disk and good parity */
559 char *bufs[data_disks];
560 for (i=0; i < data_disks; i++)
561 if (fblock[0] == i)
562 bufs[i] = buf + data_disks*chunk_size;
563 else
564 bufs[i] = buf + i*chunk_size;
565
566 xor_blocks(buf + fblock[0]*chunk_size,
567 bufs, data_disks, chunk_size);
568 } else if (failed > 2 || level != 6)
569 /* too much failure */
570 return -1;
571 else {
572 /* RAID6 computations needed. */
573 uint8_t *bufs[data_disks+4];
574 int qdisk;
575 int syndrome_disks;
576 disk = geo_map(-1, start/chunk_size/data_disks,
577 raid_disks, level, layout);
578 qdisk = geo_map(-2, start/chunk_size/data_disks,
579 raid_disks, level, layout);
580 if (is_ddf(layout)) {
581 /* q over 'raid_disks' blocks, in device order.
582 * 'p' and 'q' get to be all zero
583 */
584 for (i = 0; i < raid_disks; i++)
585 bufs[i] = zero;
586 for (i = 0; i < data_disks; i++) {
587 int dnum = geo_map(i,
588 start/chunk_size/data_disks,
589 raid_disks, level, layout);
590 int snum;
591 /* i is the logical block number, so is index to 'buf'.
592 * dnum is physical disk number
593 * and thus the syndrome number.
594 */
595 snum = dnum;
596 bufs[snum] = (uint8_t*)buf + chunk_size * i;
597 }
598 syndrome_disks = raid_disks;
599 } else {
600 /* for md, q is over 'data_disks' blocks,
601 * starting immediately after 'q'
602 * Note that for the '_6' variety, the p block
603 * makes a hole that we need to be careful of.
604 */
605 int j;
606 int snum = 0;
607 for (j = 0; j < raid_disks; j++) {
608 int dnum = (qdisk + 1 + j) % raid_disks;
609 if (dnum == disk || dnum == qdisk)
610 continue;
611 for (i = 0; i < data_disks; i++)
612 if (geo_map(i,
613 start/chunk_size/data_disks,
614 raid_disks, level, layout) == dnum)
615 break;
616 /* i is the logical block number, so is index to 'buf'.
617 * dnum is physical disk number
618 * snum is syndrome disk for which 0 is immediately after Q
619 */
620 bufs[snum] = (uint8_t*)buf + chunk_size * i;
621
622 if (fblock[0] == i)
623 fdisk[0] = snum;
624 if (fblock[1] == i)
625 fdisk[1] = snum;
626 snum++;
627 }
628
629 syndrome_disks = data_disks;
630 }
631
632 /* Place P and Q blocks at end of bufs */
633 bufs[syndrome_disks] = (uint8_t*)buf + chunk_size * data_disks;
634 bufs[syndrome_disks+1] = (uint8_t*)buf + chunk_size * (data_disks+1);
635
636 if (fblock[1] == data_disks)
637 /* One data failed, and parity failed */
638 raid6_datap_recov(syndrome_disks+2, chunk_size,
639 fdisk[0], bufs);
640 else {
641 if (fdisk[0] > fdisk[1]) {
642 int t = fdisk[0];
643 fdisk[0] = fdisk[1];
644 fdisk[1] = t;
645 }
646 /* Two data blocks failed, P,Q OK */
647 raid6_2data_recov(syndrome_disks+2, chunk_size,
648 fdisk[0], fdisk[1], bufs);
649 }
650 }
651 if (dest) {
652 for (i = 0; i < nwrites; i++)
653 if (write(dest[i], buf, len) != len)
654 return -1;
655 } else {
656 /* build next stripe in buffer */
657 buf += len;
658 }
659 length -= len;
660 start += len;
661 }
662 return 0;
663 }
664
665 /* Restore data:
666 * We are given:
667 * A list of 'fds' of the active disks. Some may be '-1' for not-available.
668 * A geometry: raid_disks, chunk_size, level, layout
669 * An 'fd' to read from. It is already seeked to the right (Read) location.
670 * A start and length.
671 * The length must be a multiple of the stripe size.
672 *
673 * We build a full stripe in memory and then write it out.
674 * We assume that there are enough working devices.
675 */
676 int restore_stripes(int *dest, unsigned long long *offsets,
677 int raid_disks, int chunk_size, int level, int layout,
678 int source, unsigned long long read_offset,
679 unsigned long long start, unsigned long long length,
680 char *src_buf)
681 {
682 char *stripe_buf;
683 char **stripes = xmalloc(raid_disks * sizeof(char*));
684 char **blocks = xmalloc(raid_disks * sizeof(char*));
685 int i;
686 int rv;
687
688 int data_disks = raid_disks - (level == 0 ? 0 : level <= 5 ? 1 : 2);
689
690 if (posix_memalign((void**)&stripe_buf, 4096, raid_disks * chunk_size))
691 stripe_buf = NULL;
692
693 if (zero == NULL || chunk_size > zero_size) {
694 if (zero)
695 free(zero);
696 zero = xcalloc(1, chunk_size);
697 zero_size = chunk_size;
698 }
699
700 if (stripe_buf == NULL || stripes == NULL || blocks == NULL
701 || zero == NULL) {
702 rv = -2;
703 goto abort;
704 }
705 for (i = 0; i < raid_disks; i++)
706 stripes[i] = stripe_buf + i * chunk_size;
707 while (length > 0) {
708 unsigned int len = data_disks * chunk_size;
709 unsigned long long offset;
710 int disk, qdisk;
711 int syndrome_disks;
712 if (length < len) {
713 rv = -3;
714 goto abort;
715 }
716 for (i = 0; i < data_disks; i++) {
717 int disk = geo_map(i, start/chunk_size/data_disks,
718 raid_disks, level, layout);
719 if (src_buf == NULL) {
720 /* read from file */
721 if (lseek64(source, read_offset, 0) !=
722 (off64_t)read_offset) {
723 rv = -1;
724 goto abort;
725 }
726 if (read(source,
727 stripes[disk],
728 chunk_size) != chunk_size) {
729 rv = -1;
730 goto abort;
731 }
732 } else {
733 /* read from input buffer */
734 memcpy(stripes[disk],
735 src_buf + read_offset,
736 chunk_size);
737 }
738 read_offset += chunk_size;
739 }
740 /* We have the data, now do the parity */
741 offset = (start/chunk_size/data_disks) * chunk_size;
742 switch (level) {
743 case 4:
744 case 5:
745 disk = geo_map(-1, start/chunk_size/data_disks,
746 raid_disks, level, layout);
747 for (i = 0; i < data_disks; i++)
748 blocks[i] = stripes[(disk+1+i) % raid_disks];
749 xor_blocks(stripes[disk], blocks, data_disks, chunk_size);
750 break;
751 case 6:
752 disk = geo_map(-1, start/chunk_size/data_disks,
753 raid_disks, level, layout);
754 qdisk = geo_map(-2, start/chunk_size/data_disks,
755 raid_disks, level, layout);
756 if (is_ddf(layout)) {
757 /* q over 'raid_disks' blocks, in device order.
758 * 'p' and 'q' get to be all zero
759 */
760 for (i = 0; i < raid_disks; i++)
761 if (i == disk || i == qdisk)
762 blocks[i] = (char*)zero;
763 else
764 blocks[i] = stripes[i];
765 syndrome_disks = raid_disks;
766 } else {
767 /* for md, q is over 'data_disks' blocks,
768 * starting immediately after 'q'
769 */
770 for (i = 0; i < data_disks; i++)
771 blocks[i] = stripes[(qdisk+1+i) % raid_disks];
772
773 syndrome_disks = data_disks;
774 }
775 qsyndrome((uint8_t*)stripes[disk],
776 (uint8_t*)stripes[qdisk],
777 (uint8_t**)blocks,
778 syndrome_disks, chunk_size);
779 break;
780 }
781 for (i=0; i < raid_disks ; i++)
782 if (dest[i] >= 0) {
783 if (lseek64(dest[i],
784 offsets[i]+offset, 0) < 0) {
785 rv = -1;
786 goto abort;
787 }
788 if (write(dest[i], stripes[i],
789 chunk_size) != chunk_size) {
790 rv = -1;
791 goto abort;
792 }
793 }
794 length -= len;
795 start += len;
796 }
797 rv = 0;
798
799 abort:
800 free(stripe_buf);
801 free(stripes);
802 free(blocks);
803 return rv;
804 }
805
806 #ifdef MAIN
807
808 int test_stripes(int *source, unsigned long long *offsets,
809 int raid_disks, int chunk_size, int level, int layout,
810 unsigned long long start, unsigned long long length)
811 {
812 /* ready the data and p (and q) blocks, and check we got them right */
813 char *stripe_buf = xmalloc(raid_disks * chunk_size);
814 char **stripes = xmalloc(raid_disks * sizeof(char*));
815 char **blocks = xmalloc(raid_disks * sizeof(char*));
816 char *p = xmalloc(chunk_size);
817 char *q = xmalloc(chunk_size);
818
819 int i;
820 int diskP, diskQ;
821 int data_disks = raid_disks - (level == 5 ? 1: 2);
822
823 if (!tables_ready)
824 make_tables();
825
826 for ( i = 0 ; i < raid_disks ; i++)
827 stripes[i] = stripe_buf + i * chunk_size;
828
829 while (length > 0) {
830 int disk;
831
832 for (i = 0 ; i < raid_disks ; i++) {
833 lseek64(source[i], offsets[i]+start, 0);
834 read(source[i], stripes[i], chunk_size);
835 }
836 for (i = 0 ; i < data_disks ; i++) {
837 int disk = geo_map(i, start/chunk_size, raid_disks,
838 level, layout);
839 blocks[i] = stripes[disk];
840 printf("%d->%d\n", i, disk);
841 }
842 switch(level) {
843 case 6:
844 qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
845 diskP = geo_map(-1, start/chunk_size, raid_disks,
846 level, layout);
847 if (memcmp(p, stripes[diskP], chunk_size) != 0) {
848 printf("P(%d) wrong at %llu\n", diskP,
849 start / chunk_size);
850 }
851 diskQ = geo_map(-2, start/chunk_size, raid_disks,
852 level, layout);
853 if (memcmp(q, stripes[diskQ], chunk_size) != 0) {
854 printf("Q(%d) wrong at %llu\n", diskQ,
855 start / chunk_size);
856 }
857 disk = raid6_check_disks(data_disks, start, chunk_size,
858 level, layout, diskP, diskQ,
859 p, q, stripes);
860 if(disk >= 0) {
861 printf("Possible failed disk: %d\n", disk);
862 }
863 if(disk == -2) {
864 printf("Failure detected, but disk unknown\n");
865 }
866 break;
867 }
868 length -= chunk_size;
869 start += chunk_size;
870 }
871 return 0;
872 }
873
874 unsigned long long getnum(char *str, char **err)
875 {
876 char *e;
877 unsigned long long rv = strtoull(str, &e, 10);
878 if (e==str || *e) {
879 *err = str;
880 return 0;
881 }
882 return rv;
883 }
884
885 main(int argc, char *argv[])
886 {
887 /* save/restore file raid_disks chunk_size level layout start length devices...
888 */
889 int save;
890 int *fds;
891 char *file;
892 char *buf;
893 int storefd;
894 unsigned long long *offsets;
895 int raid_disks, chunk_size, level, layout;
896 unsigned long long start, length;
897 int i;
898
899 char *err = NULL;
900 if (argc < 10) {
901 fprintf(stderr, "Usage: test_stripe save/restore file raid_disks"
902 " chunk_size level layout start length devices...\n");
903 exit(1);
904 }
905 if (strcmp(argv[1], "save")==0)
906 save = 1;
907 else if (strcmp(argv[1], "restore") == 0)
908 save = 0;
909 else if (strcmp(argv[1], "test") == 0)
910 save = 2;
911 else {
912 fprintf(stderr, "test_stripe: must give 'save' or 'restore'.\n");
913 exit(2);
914 }
915
916 file = argv[2];
917 raid_disks = getnum(argv[3], &err);
918 chunk_size = getnum(argv[4], &err);
919 level = getnum(argv[5], &err);
920 layout = getnum(argv[6], &err);
921 start = getnum(argv[7], &err);
922 length = getnum(argv[8], &err);
923 if (err) {
924 fprintf(stderr, "test_stripe: Bad number: %s\n", err);
925 exit(2);
926 }
927 if (argc != raid_disks + 9) {
928 fprintf(stderr, "test_stripe: wrong number of devices: want %d found %d\n",
929 raid_disks, argc-9);
930 exit(2);
931 }
932 fds = xmalloc(raid_disks * sizeof(*fds));
933 offsets = xcalloc(raid_disks, sizeof(*offsets));
934
935 storefd = open(file, O_RDWR);
936 if (storefd < 0) {
937 perror(file);
938 fprintf(stderr, "test_stripe: could not open %s.\n", file);
939 exit(3);
940 }
941 for (i=0; i<raid_disks; i++) {
942 char *p;
943 p = strchr(argv[9+i], ':');
944
945 if(p != NULL) {
946 *p++ = '\0';
947 offsets[i] = atoll(p) * 512;
948 }
949
950 fds[i] = open(argv[9+i], O_RDWR);
951 if (fds[i] < 0) {
952 perror(argv[9+i]);
953 fprintf(stderr,"test_stripe: cannot open %s.\n", argv[9+i]);
954 exit(3);
955 }
956 }
957
958 buf = xmalloc(raid_disks * chunk_size);
959
960 if (save == 1) {
961 int rv = save_stripes(fds, offsets,
962 raid_disks, chunk_size, level, layout,
963 1, &storefd,
964 start, length, buf);
965 if (rv != 0) {
966 fprintf(stderr,
967 "test_stripe: save_stripes returned %d\n", rv);
968 exit(1);
969 }
970 } else if (save == 2) {
971 int rv = test_stripes(fds, offsets,
972 raid_disks, chunk_size, level, layout,
973 start, length);
974 if (rv != 0) {
975 fprintf(stderr,
976 "test_stripe: test_stripes returned %d\n", rv);
977 exit(1);
978 }
979 } else {
980 int rv = restore_stripes(fds, offsets,
981 raid_disks, chunk_size, level, layout,
982 storefd, 0ULL,
983 start, length, NULL);
984 if (rv != 0) {
985 fprintf(stderr,
986 "test_stripe: restore_stripes returned %d\n",
987 rv);
988 exit(1);
989 }
990 }
991 exit(0);
992 }
993
994 #endif /* MAIN */