]> git.ipfire.org Git - thirdparty/mdadm.git/blob - restripe.c
Create: support --readonly flag.
[thirdparty/mdadm.git] / restripe.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25 #include "mdadm.h"
26 #include <stdint.h>
27
28 /* To restripe, we read from old geometry to a buffer, and
29 * read from buffer to new geometry.
30 * When reading, we might have missing devices and so could need
31 * to reconstruct.
32 * When writing, we need to create correct parity and Q.
33 *
34 */
35
36 int geo_map(int block, unsigned long long stripe, int raid_disks,
37 int level, int layout)
38 {
39 /* On the given stripe, find which disk in the array will have
40 * block numbered 'block'.
41 * '-1' means the parity block.
42 * '-2' means the Q syndrome.
43 */
44 int pd;
45
46 /* layout is not relevant for raid0 and raid4 */
47 if ((level == 0) ||
48 (level == 4))
49 layout = 0;
50
51 switch(level*100 + layout) {
52 case 000:
53 case 400:
54 case 500 + ALGORITHM_PARITY_N:
55 /* raid 4 isn't messed around by parity blocks */
56 if (block == -1)
57 return raid_disks-1; /* parity block */
58 return block;
59 case 500 + ALGORITHM_LEFT_ASYMMETRIC:
60 pd = (raid_disks-1) - stripe % raid_disks;
61 if (block == -1) return pd;
62 if (block >= pd)
63 block++;
64 return block;
65
66 case 500 + ALGORITHM_RIGHT_ASYMMETRIC:
67 pd = stripe % raid_disks;
68 if (block == -1) return pd;
69 if (block >= pd)
70 block++;
71 return block;
72
73 case 500 + ALGORITHM_LEFT_SYMMETRIC:
74 pd = (raid_disks - 1) - stripe % raid_disks;
75 if (block == -1) return pd;
76 return (pd + 1 + block) % raid_disks;
77
78 case 500 + ALGORITHM_RIGHT_SYMMETRIC:
79 pd = stripe % raid_disks;
80 if (block == -1) return pd;
81 return (pd + 1 + block) % raid_disks;
82
83 case 500 + ALGORITHM_PARITY_0:
84 return block + 1;
85
86
87 case 600 + ALGORITHM_PARITY_N_6:
88 if (block == -2)
89 return raid_disks - 1;
90 if (block == -1)
91 return raid_disks - 2; /* parity block */
92 return block;
93 case 600 + ALGORITHM_LEFT_ASYMMETRIC_6:
94 if (block == -2)
95 return raid_disks - 1;
96 raid_disks--;
97 pd = (raid_disks-1) - stripe % raid_disks;
98 if (block == -1) return pd;
99 if (block >= pd)
100 block++;
101 return block;
102
103 case 600 + ALGORITHM_RIGHT_ASYMMETRIC_6:
104 if (block == -2)
105 return raid_disks - 1;
106 raid_disks--;
107 pd = stripe % raid_disks;
108 if (block == -1) return pd;
109 if (block >= pd)
110 block++;
111 return block;
112
113 case 600 + ALGORITHM_LEFT_SYMMETRIC_6:
114 if (block == -2)
115 return raid_disks - 1;
116 raid_disks--;
117 pd = (raid_disks - 1) - stripe % raid_disks;
118 if (block == -1) return pd;
119 return (pd + 1 + block) % raid_disks;
120
121 case 600 + ALGORITHM_RIGHT_SYMMETRIC_6:
122 if (block == -2)
123 return raid_disks - 1;
124 raid_disks--;
125 pd = stripe % raid_disks;
126 if (block == -1) return pd;
127 return (pd + 1 + block) % raid_disks;
128
129 case 600 + ALGORITHM_PARITY_0_6:
130 if (block == -2)
131 return raid_disks - 1;
132 return block + 1;
133
134
135 case 600 + ALGORITHM_PARITY_0:
136 if (block == -1)
137 return 0;
138 if (block == -2)
139 return 1;
140 return block + 2;
141
142 case 600 + ALGORITHM_LEFT_ASYMMETRIC:
143 pd = raid_disks - 1 - (stripe % raid_disks);
144 if (block == -1) return pd;
145 if (block == -2) return (pd+1) % raid_disks;
146 if (pd == raid_disks - 1)
147 return block+1;
148 if (block >= pd)
149 return block+2;
150 return block;
151
152 case 600 + ALGORITHM_ROTATING_ZERO_RESTART:
153 /* Different order for calculating Q, otherwize same as ... */
154 case 600 + ALGORITHM_RIGHT_ASYMMETRIC:
155 pd = stripe % raid_disks;
156 if (block == -1) return pd;
157 if (block == -2) return (pd+1) % raid_disks;
158 if (pd == raid_disks - 1)
159 return block+1;
160 if (block >= pd)
161 return block+2;
162 return block;
163
164 case 600 + ALGORITHM_LEFT_SYMMETRIC:
165 pd = raid_disks - 1 - (stripe % raid_disks);
166 if (block == -1) return pd;
167 if (block == -2) return (pd+1) % raid_disks;
168 return (pd + 2 + block) % raid_disks;
169
170 case 600 + ALGORITHM_RIGHT_SYMMETRIC:
171 pd = stripe % raid_disks;
172 if (block == -1) return pd;
173 if (block == -2) return (pd+1) % raid_disks;
174 return (pd + 2 + block) % raid_disks;
175
176
177 case 600 + ALGORITHM_ROTATING_N_RESTART:
178 /* Same a left_asymmetric, by first stripe is
179 * D D D P Q rather than
180 * Q D D D P
181 */
182 pd = raid_disks - 1 - ((stripe + 1) % raid_disks);
183 if (block == -1) return pd;
184 if (block == -2) return (pd+1) % raid_disks;
185 if (pd == raid_disks - 1)
186 return block+1;
187 if (block >= pd)
188 return block+2;
189 return block;
190
191 case 600 + ALGORITHM_ROTATING_N_CONTINUE:
192 /* Same as left_symmetric but Q is before P */
193 pd = raid_disks - 1 - (stripe % raid_disks);
194 if (block == -1) return pd;
195 if (block == -2) return (pd+raid_disks-1) % raid_disks;
196 return (pd + 1 + block) % raid_disks;
197 }
198 return -1;
199 }
200 static int is_ddf(int layout)
201 {
202 switch (layout)
203 {
204 default:
205 return 0;
206 case ALGORITHM_ROTATING_N_CONTINUE:
207 case ALGORITHM_ROTATING_N_RESTART:
208 case ALGORITHM_ROTATING_ZERO_RESTART:
209 return 1;
210 }
211 }
212
213
214 static void xor_blocks(char *target, char **sources, int disks, int size)
215 {
216 int i, j;
217 /* Amazingly inefficient... */
218 for (i=0; i<size; i++) {
219 char c = 0;
220 for (j=0 ; j<disks; j++)
221 c ^= sources[j][i];
222 target[i] = c;
223 }
224 }
225
226 void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size)
227 {
228 int d, z;
229 uint8_t wq0, wp0, wd0, w10, w20;
230 for ( d = 0; d < size; d++) {
231 wq0 = wp0 = sources[disks-1][d];
232 for ( z = disks-2 ; z >= 0 ; z-- ) {
233 wd0 = sources[z][d];
234 wp0 ^= wd0;
235 w20 = (wq0&0x80) ? 0xff : 0x00;
236 w10 = (wq0 << 1) & 0xff;
237 w20 &= 0x1d;
238 w10 ^= w20;
239 wq0 = w10 ^ wd0;
240 }
241 p[d] = wp0;
242 q[d] = wq0;
243 }
244 }
245
246
247 /*
248 * The following was taken from linux/drivers/md/mktables.c, and modified
249 * to create in-memory tables rather than C code
250 */
251 static uint8_t gfmul(uint8_t a, uint8_t b)
252 {
253 uint8_t v = 0;
254
255 while (b) {
256 if (b & 1)
257 v ^= a;
258 a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
259 b >>= 1;
260 }
261
262 return v;
263 }
264
265 static uint8_t gfpow(uint8_t a, int b)
266 {
267 uint8_t v = 1;
268
269 b %= 255;
270 if (b < 0)
271 b += 255;
272
273 while (b) {
274 if (b & 1)
275 v = gfmul(v, a);
276 a = gfmul(a, a);
277 b >>= 1;
278 }
279
280 return v;
281 }
282
283 int tables_ready = 0;
284 uint8_t raid6_gfmul[256][256];
285 uint8_t raid6_gfexp[256];
286 uint8_t raid6_gfinv[256];
287 uint8_t raid6_gfexi[256];
288 uint8_t raid6_gflog[256];
289 uint8_t raid6_gfilog[256];
290 void make_tables(void)
291 {
292 int i, j;
293 uint8_t v;
294 uint32_t b, log;
295
296 /* Compute multiplication table */
297 for (i = 0; i < 256; i++)
298 for (j = 0; j < 256; j++)
299 raid6_gfmul[i][j] = gfmul(i, j);
300
301 /* Compute power-of-2 table (exponent) */
302 v = 1;
303 for (i = 0; i < 256; i++) {
304 raid6_gfexp[i] = v;
305 v = gfmul(v, 2);
306 if (v == 1)
307 v = 0; /* For entry 255, not a real entry */
308 }
309
310 /* Compute inverse table x^-1 == x^254 */
311 for (i = 0; i < 256; i++)
312 raid6_gfinv[i] = gfpow(i, 254);
313
314 /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
315 for (i = 0; i < 256; i ++)
316 raid6_gfexi[i] = raid6_gfinv[raid6_gfexp[i] ^ 1];
317
318 /* Compute log and inverse log */
319 /* Modified code from:
320 * http://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.html
321 */
322 b = 1;
323 raid6_gflog[0] = 0;
324 raid6_gfilog[255] = 0;
325
326 for (log = 0; log < 255; log++) {
327 raid6_gflog[b] = (uint8_t) log;
328 raid6_gfilog[log] = (uint8_t) b;
329 b = b << 1;
330 if (b & 256) b = b ^ 0435;
331 }
332
333 tables_ready = 1;
334 }
335
336 uint8_t *zero;
337 int zero_size;
338 /* Following was taken from linux/drivers/md/raid6recov.c */
339
340 /* Recover two failed data blocks. */
341 void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
342 uint8_t **ptrs)
343 {
344 uint8_t *p, *q, *dp, *dq;
345 uint8_t px, qx, db;
346 const uint8_t *pbmul; /* P multiplier table for B data */
347 const uint8_t *qmul; /* Q multiplier table (for both) */
348
349 p = ptrs[disks-2];
350 q = ptrs[disks-1];
351
352 /* Compute syndrome with zero for the missing data pages
353 Use the dead data pages as temporary storage for
354 delta p and delta q */
355 dp = ptrs[faila];
356 ptrs[faila] = zero;
357 dq = ptrs[failb];
358 ptrs[failb] = zero;
359
360 qsyndrome(dp, dq, ptrs, disks-2, bytes);
361
362 /* Restore pointer table */
363 ptrs[faila] = dp;
364 ptrs[failb] = dq;
365
366 /* Now, pick the proper data tables */
367 pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
368 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
369
370 /* Now do it... */
371 while ( bytes-- ) {
372 px = *p ^ *dp;
373 qx = qmul[*q ^ *dq];
374 *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
375 *dp++ = db ^ px; /* Reconstructed A */
376 p++; q++;
377 }
378 }
379
380 /* Recover failure of one data block plus the P block */
381 void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs)
382 {
383 uint8_t *p, *q, *dq;
384 const uint8_t *qmul; /* Q multiplier table */
385
386 p = ptrs[disks-2];
387 q = ptrs[disks-1];
388
389 /* Compute syndrome with zero for the missing data page
390 Use the dead data page as temporary storage for delta q */
391 dq = ptrs[faila];
392 ptrs[faila] = zero;
393
394 qsyndrome(p, dq, ptrs, disks-2, bytes);
395
396 /* Restore pointer table */
397 ptrs[faila] = dq;
398
399 /* Now, pick the proper data tables */
400 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
401
402 /* Now do it... */
403 while ( bytes-- ) {
404 *p++ ^= *dq = qmul[*q ^ *dq];
405 q++; dq++;
406 }
407 }
408
409 /* Try to find out if a specific disk has a problem */
410 int raid6_check_disks(int data_disks, int start, int chunk_size,
411 int level, int layout, int diskP, int diskQ,
412 char *p, char *q, char **stripes)
413 {
414 int i;
415 int data_id, diskD;
416 uint8_t Px, Qx;
417 int curr_broken_disk = -1;
418 int prev_broken_disk = -1;
419 int broken_status = 0;
420
421 for(i = 0; i < chunk_size; i++) {
422 Px = (uint8_t)stripes[diskP][i] ^ (uint8_t)p[i];
423 Qx = (uint8_t)stripes[diskQ][i] ^ (uint8_t)q[i];
424
425 if((Px != 0) && (Qx == 0))
426 curr_broken_disk = diskP;
427
428
429 if((Px == 0) && (Qx != 0))
430 curr_broken_disk = diskQ;
431
432
433 if((Px != 0) && (Qx != 0)) {
434 data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
435 if(data_id < 0) data_id += 255;
436 diskD = geo_map(data_id, start/chunk_size,
437 data_disks + 2, level, layout);
438 curr_broken_disk = diskD;
439 }
440
441 if((Px == 0) && (Qx == 0))
442 curr_broken_disk = curr_broken_disk;
443
444 if(curr_broken_disk >= data_disks + 2)
445 broken_status = 2;
446
447 switch(broken_status) {
448 case 0:
449 if(curr_broken_disk != -1) {
450 prev_broken_disk = curr_broken_disk;
451 broken_status = 1;
452 }
453 break;
454
455 case 1:
456 if(curr_broken_disk != prev_broken_disk)
457 broken_status = 2;
458 break;
459
460 case 2:
461 default:
462 curr_broken_disk = prev_broken_disk = -2;
463 break;
464 }
465 }
466
467 return curr_broken_disk;
468 }
469
470 /*******************************************************************************
471 * Function: save_stripes
472 * Description:
473 * Function reads data (only data without P and Q) from array and writes
474 * it to buf and opcjonaly to backup files
475 * Parameters:
476 * source : A list of 'fds' of the active disks.
477 * Some may be absent
478 * offsets : A list of offsets on disk belonging
479 * to the array [bytes]
480 * raid_disks : geometry: number of disks in the array
481 * chunk_size : geometry: chunk size [bytes]
482 * level : geometry: RAID level
483 * layout : geometry: layout
484 * nwrites : number of backup files
485 * dest : A list of 'fds' for mirrored targets
486 * (e.g. backup files). They are already seeked to right
487 * (write) location. If NULL, data will be wrote
488 * to the buf only
489 * start : start address of data to read (must be stripe-aligned)
490 * [bytes]
491 * length - : length of data to read (must be stripe-aligned)
492 * [bytes]
493 * buf : buffer for data. It is large enough to hold
494 * one stripe. It is stripe aligned
495 * Returns:
496 * 0 : success
497 * -1 : fail
498 ******************************************************************************/
499 int save_stripes(int *source, unsigned long long *offsets,
500 int raid_disks, int chunk_size, int level, int layout,
501 int nwrites, int *dest,
502 unsigned long long start, unsigned long long length,
503 char *buf)
504 {
505 int len;
506 int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
507 int disk;
508 int i;
509 unsigned long long length_test;
510
511 if (!tables_ready)
512 make_tables();
513
514 if (zero == NULL || chunk_size > zero_size) {
515 if (zero)
516 free(zero);
517 zero = xcalloc(1, chunk_size);
518 zero_size = chunk_size;
519 }
520
521 len = data_disks * chunk_size;
522 length_test = length / len;
523 length_test *= len;
524
525 if (length != length_test) {
526 dprintf("Error: save_stripes(): Data are not alligned. EXIT\n");
527 dprintf("\tArea for saving stripes (length) = %llu\n", length);
528 dprintf("\tWork step (len) = %i\n", len);
529 dprintf("\tExpected save area (length_test) = %llu\n",
530 length_test);
531 abort();
532 }
533
534 while (length > 0) {
535 int failed = 0;
536 int fdisk[3], fblock[3];
537 for (disk = 0; disk < raid_disks ; disk++) {
538 unsigned long long offset;
539 int dnum;
540
541 offset = (start/chunk_size/data_disks)*chunk_size;
542 dnum = geo_map(disk < data_disks ? disk : data_disks - disk - 1,
543 start/chunk_size/data_disks,
544 raid_disks, level, layout);
545 if (dnum < 0) abort();
546 if (source[dnum] < 0 ||
547 lseek64(source[dnum], offsets[dnum]+offset, 0) < 0 ||
548 read(source[dnum], buf+disk * chunk_size, chunk_size)
549 != chunk_size)
550 if (failed <= 2) {
551 fdisk[failed] = dnum;
552 fblock[failed] = disk;
553 failed++;
554 }
555 }
556 if (failed == 0 || fblock[0] >= data_disks)
557 /* all data disks are good */
558 ;
559 else if (failed == 1 || fblock[1] >= data_disks+1) {
560 /* one failed data disk and good parity */
561 char *bufs[data_disks];
562 for (i=0; i < data_disks; i++)
563 if (fblock[0] == i)
564 bufs[i] = buf + data_disks*chunk_size;
565 else
566 bufs[i] = buf + i*chunk_size;
567
568 xor_blocks(buf + fblock[0]*chunk_size,
569 bufs, data_disks, chunk_size);
570 } else if (failed > 2 || level != 6)
571 /* too much failure */
572 return -1;
573 else {
574 /* RAID6 computations needed. */
575 uint8_t *bufs[data_disks+4];
576 int qdisk;
577 int syndrome_disks;
578 disk = geo_map(-1, start/chunk_size/data_disks,
579 raid_disks, level, layout);
580 qdisk = geo_map(-2, start/chunk_size/data_disks,
581 raid_disks, level, layout);
582 if (is_ddf(layout)) {
583 /* q over 'raid_disks' blocks, in device order.
584 * 'p' and 'q' get to be all zero
585 */
586 for (i = 0; i < raid_disks; i++)
587 bufs[i] = zero;
588 for (i = 0; i < data_disks; i++) {
589 int dnum = geo_map(i,
590 start/chunk_size/data_disks,
591 raid_disks, level, layout);
592 int snum;
593 /* i is the logical block number, so is index to 'buf'.
594 * dnum is physical disk number
595 * and thus the syndrome number.
596 */
597 snum = dnum;
598 bufs[snum] = (uint8_t*)buf + chunk_size * i;
599 }
600 syndrome_disks = raid_disks;
601 } else {
602 /* for md, q is over 'data_disks' blocks,
603 * starting immediately after 'q'
604 * Note that for the '_6' variety, the p block
605 * makes a hole that we need to be careful of.
606 */
607 int j;
608 int snum = 0;
609 for (j = 0; j < raid_disks; j++) {
610 int dnum = (qdisk + 1 + j) % raid_disks;
611 if (dnum == disk || dnum == qdisk)
612 continue;
613 for (i = 0; i < data_disks; i++)
614 if (geo_map(i,
615 start/chunk_size/data_disks,
616 raid_disks, level, layout) == dnum)
617 break;
618 /* i is the logical block number, so is index to 'buf'.
619 * dnum is physical disk number
620 * snum is syndrome disk for which 0 is immediately after Q
621 */
622 bufs[snum] = (uint8_t*)buf + chunk_size * i;
623
624 if (fblock[0] == i)
625 fdisk[0] = snum;
626 if (fblock[1] == i)
627 fdisk[1] = snum;
628 snum++;
629 }
630
631 syndrome_disks = data_disks;
632 }
633
634 /* Place P and Q blocks at end of bufs */
635 bufs[syndrome_disks] = (uint8_t*)buf + chunk_size * data_disks;
636 bufs[syndrome_disks+1] = (uint8_t*)buf + chunk_size * (data_disks+1);
637
638 if (fblock[1] == data_disks)
639 /* One data failed, and parity failed */
640 raid6_datap_recov(syndrome_disks+2, chunk_size,
641 fdisk[0], bufs);
642 else {
643 if (fdisk[0] > fdisk[1]) {
644 int t = fdisk[0];
645 fdisk[0] = fdisk[1];
646 fdisk[1] = t;
647 }
648 /* Two data blocks failed, P,Q OK */
649 raid6_2data_recov(syndrome_disks+2, chunk_size,
650 fdisk[0], fdisk[1], bufs);
651 }
652 }
653 if (dest) {
654 for (i = 0; i < nwrites; i++)
655 if (write(dest[i], buf, len) != len)
656 return -1;
657 } else {
658 /* build next stripe in buffer */
659 buf += len;
660 }
661 length -= len;
662 start += len;
663 }
664 return 0;
665 }
666
667 /* Restore data:
668 * We are given:
669 * A list of 'fds' of the active disks. Some may be '-1' for not-available.
670 * A geometry: raid_disks, chunk_size, level, layout
671 * An 'fd' to read from. It is already seeked to the right (Read) location.
672 * A start and length.
673 * The length must be a multiple of the stripe size.
674 *
675 * We build a full stripe in memory and then write it out.
676 * We assume that there are enough working devices.
677 */
678 int restore_stripes(int *dest, unsigned long long *offsets,
679 int raid_disks, int chunk_size, int level, int layout,
680 int source, unsigned long long read_offset,
681 unsigned long long start, unsigned long long length,
682 char *src_buf)
683 {
684 char *stripe_buf;
685 char **stripes = xmalloc(raid_disks * sizeof(char*));
686 char **blocks = xmalloc(raid_disks * sizeof(char*));
687 int i;
688 int rv;
689
690 int data_disks = raid_disks - (level == 0 ? 0 : level <= 5 ? 1 : 2);
691
692 if (posix_memalign((void**)&stripe_buf, 4096, raid_disks * chunk_size))
693 stripe_buf = NULL;
694
695 if (zero == NULL || chunk_size > zero_size) {
696 if (zero)
697 free(zero);
698 zero = xcalloc(1, chunk_size);
699 zero_size = chunk_size;
700 }
701
702 if (stripe_buf == NULL || stripes == NULL || blocks == NULL
703 || zero == NULL) {
704 rv = -2;
705 goto abort;
706 }
707 for (i = 0; i < raid_disks; i++)
708 stripes[i] = stripe_buf + i * chunk_size;
709 while (length > 0) {
710 unsigned int len = data_disks * chunk_size;
711 unsigned long long offset;
712 int disk, qdisk;
713 int syndrome_disks;
714 if (length < len) {
715 rv = -3;
716 goto abort;
717 }
718 for (i = 0; i < data_disks; i++) {
719 int disk = geo_map(i, start/chunk_size/data_disks,
720 raid_disks, level, layout);
721 if (src_buf == NULL) {
722 /* read from file */
723 if (lseek64(source, read_offset, 0) !=
724 (off64_t)read_offset) {
725 rv = -1;
726 goto abort;
727 }
728 if (read(source,
729 stripes[disk],
730 chunk_size) != chunk_size) {
731 rv = -1;
732 goto abort;
733 }
734 } else {
735 /* read from input buffer */
736 memcpy(stripes[disk],
737 src_buf + read_offset,
738 chunk_size);
739 }
740 read_offset += chunk_size;
741 }
742 /* We have the data, now do the parity */
743 offset = (start/chunk_size/data_disks) * chunk_size;
744 switch (level) {
745 case 4:
746 case 5:
747 disk = geo_map(-1, start/chunk_size/data_disks,
748 raid_disks, level, layout);
749 for (i = 0; i < data_disks; i++)
750 blocks[i] = stripes[(disk+1+i) % raid_disks];
751 xor_blocks(stripes[disk], blocks, data_disks, chunk_size);
752 break;
753 case 6:
754 disk = geo_map(-1, start/chunk_size/data_disks,
755 raid_disks, level, layout);
756 qdisk = geo_map(-2, start/chunk_size/data_disks,
757 raid_disks, level, layout);
758 if (is_ddf(layout)) {
759 /* q over 'raid_disks' blocks, in device order.
760 * 'p' and 'q' get to be all zero
761 */
762 for (i = 0; i < raid_disks; i++)
763 if (i == disk || i == qdisk)
764 blocks[i] = (char*)zero;
765 else
766 blocks[i] = stripes[i];
767 syndrome_disks = raid_disks;
768 } else {
769 /* for md, q is over 'data_disks' blocks,
770 * starting immediately after 'q'
771 */
772 for (i = 0; i < data_disks; i++)
773 blocks[i] = stripes[(qdisk+1+i) % raid_disks];
774
775 syndrome_disks = data_disks;
776 }
777 qsyndrome((uint8_t*)stripes[disk],
778 (uint8_t*)stripes[qdisk],
779 (uint8_t**)blocks,
780 syndrome_disks, chunk_size);
781 break;
782 }
783 for (i=0; i < raid_disks ; i++)
784 if (dest[i] >= 0) {
785 if (lseek64(dest[i],
786 offsets[i]+offset, 0) < 0) {
787 rv = -1;
788 goto abort;
789 }
790 if (write(dest[i], stripes[i],
791 chunk_size) != chunk_size) {
792 rv = -1;
793 goto abort;
794 }
795 }
796 length -= len;
797 start += len;
798 }
799 rv = 0;
800
801 abort:
802 free(stripe_buf);
803 free(stripes);
804 free(blocks);
805 return rv;
806 }
807
808 #ifdef MAIN
809
810 int test_stripes(int *source, unsigned long long *offsets,
811 int raid_disks, int chunk_size, int level, int layout,
812 unsigned long long start, unsigned long long length)
813 {
814 /* ready the data and p (and q) blocks, and check we got them right */
815 char *stripe_buf = xmalloc(raid_disks * chunk_size);
816 char **stripes = xmalloc(raid_disks * sizeof(char*));
817 char **blocks = xmalloc(raid_disks * sizeof(char*));
818 char *p = xmalloc(chunk_size);
819 char *q = xmalloc(chunk_size);
820
821 int i;
822 int diskP, diskQ;
823 int data_disks = raid_disks - (level == 5 ? 1: 2);
824
825 if (!tables_ready)
826 make_tables();
827
828 for ( i = 0 ; i < raid_disks ; i++)
829 stripes[i] = stripe_buf + i * chunk_size;
830
831 while (length > 0) {
832 int disk;
833
834 for (i = 0 ; i < raid_disks ; i++) {
835 lseek64(source[i], offsets[i]+start, 0);
836 read(source[i], stripes[i], chunk_size);
837 }
838 for (i = 0 ; i < data_disks ; i++) {
839 int disk = geo_map(i, start/chunk_size, raid_disks,
840 level, layout);
841 blocks[i] = stripes[disk];
842 printf("%d->%d\n", i, disk);
843 }
844 switch(level) {
845 case 6:
846 qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
847 diskP = geo_map(-1, start/chunk_size, raid_disks,
848 level, layout);
849 if (memcmp(p, stripes[diskP], chunk_size) != 0) {
850 printf("P(%d) wrong at %llu\n", diskP,
851 start / chunk_size);
852 }
853 diskQ = geo_map(-2, start/chunk_size, raid_disks,
854 level, layout);
855 if (memcmp(q, stripes[diskQ], chunk_size) != 0) {
856 printf("Q(%d) wrong at %llu\n", diskQ,
857 start / chunk_size);
858 }
859 disk = raid6_check_disks(data_disks, start, chunk_size,
860 level, layout, diskP, diskQ,
861 p, q, stripes);
862 if(disk >= 0) {
863 printf("Possible failed disk: %d\n", disk);
864 }
865 if(disk == -2) {
866 printf("Failure detected, but disk unknown\n");
867 }
868 break;
869 }
870 length -= chunk_size;
871 start += chunk_size;
872 }
873 return 0;
874 }
875
876 unsigned long long getnum(char *str, char **err)
877 {
878 char *e;
879 unsigned long long rv = strtoull(str, &e, 10);
880 if (e==str || *e) {
881 *err = str;
882 return 0;
883 }
884 return rv;
885 }
886
887 main(int argc, char *argv[])
888 {
889 /* save/restore file raid_disks chunk_size level layout start length devices...
890 */
891 int save;
892 int *fds;
893 char *file;
894 char *buf;
895 int storefd;
896 unsigned long long *offsets;
897 int raid_disks, chunk_size, level, layout;
898 unsigned long long start, length;
899 int i;
900
901 char *err = NULL;
902 if (argc < 10) {
903 fprintf(stderr, "Usage: test_stripe save/restore file raid_disks"
904 " chunk_size level layout start length devices...\n");
905 exit(1);
906 }
907 if (strcmp(argv[1], "save")==0)
908 save = 1;
909 else if (strcmp(argv[1], "restore") == 0)
910 save = 0;
911 else if (strcmp(argv[1], "test") == 0)
912 save = 2;
913 else {
914 fprintf(stderr, "test_stripe: must give 'save' or 'restore'.\n");
915 exit(2);
916 }
917
918 file = argv[2];
919 raid_disks = getnum(argv[3], &err);
920 chunk_size = getnum(argv[4], &err);
921 level = getnum(argv[5], &err);
922 layout = getnum(argv[6], &err);
923 start = getnum(argv[7], &err);
924 length = getnum(argv[8], &err);
925 if (err) {
926 fprintf(stderr, "test_stripe: Bad number: %s\n", err);
927 exit(2);
928 }
929 if (argc != raid_disks + 9) {
930 fprintf(stderr, "test_stripe: wrong number of devices: want %d found %d\n",
931 raid_disks, argc-9);
932 exit(2);
933 }
934 fds = xmalloc(raid_disks * sizeof(*fds));
935 offsets = xcalloc(raid_disks, sizeof(*offsets));
936
937 storefd = open(file, O_RDWR);
938 if (storefd < 0) {
939 perror(file);
940 fprintf(stderr, "test_stripe: could not open %s.\n", file);
941 exit(3);
942 }
943 for (i=0; i<raid_disks; i++) {
944 char *p;
945 p = strchr(argv[9+i], ':');
946
947 if(p != NULL) {
948 *p++ = '\0';
949 offsets[i] = atoll(p) * 512;
950 }
951
952 fds[i] = open(argv[9+i], O_RDWR);
953 if (fds[i] < 0) {
954 perror(argv[9+i]);
955 fprintf(stderr,"test_stripe: cannot open %s.\n", argv[9+i]);
956 exit(3);
957 }
958 }
959
960 buf = xmalloc(raid_disks * chunk_size);
961
962 if (save == 1) {
963 int rv = save_stripes(fds, offsets,
964 raid_disks, chunk_size, level, layout,
965 1, &storefd,
966 start, length, buf);
967 if (rv != 0) {
968 fprintf(stderr,
969 "test_stripe: save_stripes returned %d\n", rv);
970 exit(1);
971 }
972 } else if (save == 2) {
973 int rv = test_stripes(fds, offsets,
974 raid_disks, chunk_size, level, layout,
975 start, length);
976 if (rv != 0) {
977 fprintf(stderr,
978 "test_stripe: test_stripes returned %d\n", rv);
979 exit(1);
980 }
981 } else {
982 int rv = restore_stripes(fds, offsets,
983 raid_disks, chunk_size, level, layout,
984 storefd, 0ULL,
985 start, length, NULL);
986 if (rv != 0) {
987 fprintf(stderr,
988 "test_stripe: restore_stripes returned %d\n",
989 rv);
990 exit(1);
991 }
992 }
993 exit(0);
994 }
995
996
997 void *xmalloc(size_t len)
998 {
999 void *rv = malloc(len);
1000 char *msg;
1001 if (rv)
1002 return rv;
1003 msg = Name ": memory allocation failure - aborting\n";
1004 write(2, msg, strlen(msg));
1005 exit(4);
1006 }
1007
1008 void *xcalloc(size_t num, size_t size)
1009 {
1010 void *rv = calloc(num, size);
1011 char *msg;
1012 if (rv)
1013 return rv;
1014 msg = Name ": memory allocation failure - aborting\n";
1015 write(2, msg, strlen(msg));
1016 exit(4);
1017 }
1018 #endif /* MAIN */