]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - restripe.c
sysfs: Avoid if and return on the same line
[thirdparty/mdadm.git] / restripe.c
index 509b45087bed453b6f76aa23e4a2d4639a6d1405..359ae86ed5845d109336639629bc737f3895e39a 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * mdadm - manage Linux "md" devices aka RAID arrays.
  *
- * Copyright (C) 2006 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
  */
 
 #include "mdadm.h"
+#include <stdint.h>
 
 /* To restripe, we read from old geometry to a buffer, and
  * read from buffer to new geometry.
- * When reading we don't worry about parity. When writing we do.
+ * When reading, we might have missing devices and so could need
+ * to reconstruct.
+ * When writing, we need to create correct parity and Q.
  *
  */
 
-static int geo_map(int block, unsigned long long stripe, int raid_disks, int level, int layout)
+int geo_map(int block, unsigned long long stripe, int raid_disks,
+                  int level, int layout)
 {
        /* On the given stripe, find which disk in the array will have
         * block numbered 'block'.
@@ -39,51 +43,128 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, int lev
         */
        int pd;
 
+       /* layout is not relevant for raid0 and raid4 */
+       if ((level == 0) ||
+           (level == 4))
+               layout = 0;
+
        switch(level*100 + layout) {
        case 000:
        case 400:
+       case 500 + ALGORITHM_PARITY_N:
                /* raid 4 isn't messed around by parity blocks */
                if (block == -1)
                        return raid_disks-1; /* parity block */
                return block;
        case 500 + ALGORITHM_LEFT_ASYMMETRIC:
                pd = (raid_disks-1) - stripe % raid_disks;
-               if (block == -1) return pd;
+               if (block == -1)
+                       return pd;
                if (block >= pd)
                        block++;
                return block;
 
        case 500 + ALGORITHM_RIGHT_ASYMMETRIC:
                pd = stripe % raid_disks;
-               if (block == -1) return pd;
+               if (block == -1)
+                       return pd;
                if (block >= pd)
                        block++;
                return block;
 
        case 500 + ALGORITHM_LEFT_SYMMETRIC:
                pd = (raid_disks - 1) - stripe % raid_disks;
-               if (block == -1) return pd;
+               if (block == -1)
+                       return pd;
                return (pd + 1 + block) % raid_disks;
 
        case 500 + ALGORITHM_RIGHT_SYMMETRIC:
                pd = stripe % raid_disks;
-               if (block == -1) return pd;
+               if (block == -1)
+                       return pd;
                return (pd + 1 + block) % raid_disks;
 
+       case 500 + ALGORITHM_PARITY_0:
+               return block + 1;
+
+       case 600 + ALGORITHM_PARITY_N_6:
+               if (block == -2)
+                       return raid_disks - 1;
+               if (block == -1)
+                       return raid_disks - 2; /* parity block */
+               return block;
+       case 600 + ALGORITHM_LEFT_ASYMMETRIC_6:
+               if (block == -2)
+                       return raid_disks - 1;
+               raid_disks--;
+               pd = (raid_disks-1) - stripe % raid_disks;
+               if (block == -1)
+                       return pd;
+               if (block >= pd)
+                       block++;
+               return block;
+
+       case 600 + ALGORITHM_RIGHT_ASYMMETRIC_6:
+               if (block == -2)
+                       return raid_disks - 1;
+               raid_disks--;
+               pd = stripe % raid_disks;
+               if (block == -1)
+                       return pd;
+               if (block >= pd)
+                       block++;
+               return block;
+
+       case 600 + ALGORITHM_LEFT_SYMMETRIC_6:
+               if (block == -2)
+                       return raid_disks - 1;
+               raid_disks--;
+               pd = (raid_disks - 1) - stripe % raid_disks;
+               if (block == -1)
+                       return pd;
+               return (pd + 1 + block) % raid_disks;
+
+       case 600 + ALGORITHM_RIGHT_SYMMETRIC_6:
+               if (block == -2)
+                       return raid_disks - 1;
+               raid_disks--;
+               pd = stripe % raid_disks;
+               if (block == -1)
+                       return pd;
+               return (pd + 1 + block) % raid_disks;
+
+       case 600 + ALGORITHM_PARITY_0_6:
+               if (block == -2)
+                       return raid_disks - 1;
+               return block + 1;
+
+       case 600 + ALGORITHM_PARITY_0:
+               if (block == -1)
+                       return 0;
+               if (block == -2)
+                       return 1;
+               return block + 2;
+
        case 600 + ALGORITHM_LEFT_ASYMMETRIC:
                pd = raid_disks - 1 - (stripe % raid_disks);
-               if (block == -1) return pd;
-               if (block == -2) return (pd+1) % raid_disks;
+               if (block == -1)
+                       return pd;
+               if (block == -2)
+                       return (pd+1) % raid_disks;
                if (pd == raid_disks - 1)
                        return block+1;
                if (block >= pd)
                        return block+2;
                return block;
 
+       case 600 + ALGORITHM_ROTATING_ZERO_RESTART:
+               /* Different order for calculating Q, otherwize same as ... */
        case 600 + ALGORITHM_RIGHT_ASYMMETRIC:
                pd = stripe % raid_disks;
-               if (block == -1) return pd;
-               if (block == -2) return (pd+1) % raid_disks;
+               if (block == -1)
+                       return pd;
+               if (block == -2)
+                       return (pd+1) % raid_disks;
                if (pd == raid_disks - 1)
                        return block+1;
                if (block >= pd)
@@ -92,21 +173,62 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, int lev
 
        case 600 + ALGORITHM_LEFT_SYMMETRIC:
                pd = raid_disks - 1 - (stripe % raid_disks);
-               if (block == -1) return pd;
-               if (block == -2) return (pd+1) % raid_disks;
+               if (block == -1)
+                       return pd;
+               if (block == -2)
+                       return (pd+1) % raid_disks;
                return (pd + 2 + block) % raid_disks;
 
        case 600 + ALGORITHM_RIGHT_SYMMETRIC:
                pd = stripe % raid_disks;
-               if (block == -1) return pd;
-               if (block == -2) return (pd+1) % raid_disks;
+               if (block == -1)
+                       return pd;
+               if (block == -2)
+                       return (pd+1) % raid_disks;
                return (pd + 2 + block) % raid_disks;
+
+       case 600 + ALGORITHM_ROTATING_N_RESTART:
+               /* Same a left_asymmetric, by first stripe is
+                * D D D P Q  rather than
+                * Q D D D P
+                */
+               pd = raid_disks - 1 - ((stripe + 1) % raid_disks);
+               if (block == -1)
+                       return pd;
+               if (block == -2)
+                       return (pd+1) % raid_disks;
+               if (pd == raid_disks - 1)
+                       return block+1;
+               if (block >= pd)
+                       return block+2;
+               return block;
+
+       case 600 + ALGORITHM_ROTATING_N_CONTINUE:
+               /* Same as left_symmetric but Q is before P */
+               pd = raid_disks - 1 - (stripe % raid_disks);
+               if (block == -1)
+                       return pd;
+               if (block == -2)
+                       return (pd+raid_disks-1) % raid_disks;
+               return (pd + 1 + block) % raid_disks;
        }
        return -1;
 }
 
+int is_ddf(int layout)
+{
+       switch (layout)
+       {
+       default:
+               return 0;
+       case ALGORITHM_ROTATING_N_CONTINUE:
+       case ALGORITHM_ROTATING_N_RESTART:
+       case ALGORITHM_ROTATING_ZERO_RESTART:
+               return 1;
+       }
+}
 
-static void xor_blocks(char *target, char **sources, int disks, int size)
+void xor_blocks(char *target, char **sources, int disks, int size)
 {
        int i, j;
        /* Amazingly inefficient... */
@@ -118,10 +240,10 @@ static void xor_blocks(char *target, char **sources, int disks, int size)
        }
 }
 
-static void qsyndrome(char *p, char *q, char **sources, int disks, int size)
+void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size)
 {
        int d, z;
-       char wq0, wp0, wd0, w10, w20;
+       uint8_t wq0, wp0, wd0, w10, w20;
        for ( d = 0; d < size; d++) {
                wq0 = wp0 = sources[disks-1][d];
                for ( z = disks-2 ; z >= 0 ; z-- ) {
@@ -138,50 +260,438 @@ static void qsyndrome(char *p, char *q, char **sources, int disks, int size)
        }
 }
 
-/* Save data:
- * We are given:
- *  A list of 'fds' of the active disks.  For now we require all to be present.
- *  A geometry: raid_disks, chunk_size, level, layout
- *  A list of 'fds' for mirrored targets.  They are already seeked to
- *    right (Write) location
- *  A start and length
+/*
+ * The following was taken from linux/drivers/md/mktables.c, and modified
+ * to create in-memory tables rather than C code
  */
+static uint8_t gfmul(uint8_t a, uint8_t b)
+{
+       uint8_t v = 0;
+
+       while (b) {
+               if (b & 1)
+                       v ^= a;
+               a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
+               b >>= 1;
+       }
+
+       return v;
+}
+
+static uint8_t gfpow(uint8_t a, int b)
+{
+       uint8_t v = 1;
+
+       b %= 255;
+       if (b < 0)
+               b += 255;
+
+       while (b) {
+               if (b & 1)
+                       v = gfmul(v, a);
+               a = gfmul(a, a);
+               b >>= 1;
+       }
+
+       return v;
+}
+
+int tables_ready = 0;
+uint8_t raid6_gfmul[256][256];
+uint8_t raid6_gfexp[256];
+uint8_t raid6_gfinv[256];
+uint8_t raid6_gfexi[256];
+uint8_t raid6_gflog[256];
+uint8_t raid6_gfilog[256];
+void make_tables(void)
+{
+       int i, j;
+       uint8_t v;
+       uint32_t b, log;
+
+       /* Compute multiplication table */
+       for (i = 0; i < 256; i++)
+               for (j = 0; j < 256; j++)
+                               raid6_gfmul[i][j] = gfmul(i, j);
+
+       /* Compute power-of-2 table (exponent) */
+       v = 1;
+       for (i = 0; i < 256; i++) {
+               raid6_gfexp[i] = v;
+               v = gfmul(v, 2);
+               if (v == 1)
+                       v = 0;  /* For entry 255, not a real entry */
+       }
+
+       /* Compute inverse table x^-1 == x^254 */
+       for (i = 0; i < 256; i++)
+               raid6_gfinv[i] = gfpow(i, 254);
+
+       /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
+       for (i = 0; i < 256; i ++)
+               raid6_gfexi[i] = raid6_gfinv[raid6_gfexp[i] ^ 1];
+
+       /* Compute log and inverse log */
+       /* Modified code from:
+        *    http://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.html
+        */
+       b = 1;
+       raid6_gflog[0] = 0;
+       raid6_gfilog[255] = 0;
+
+       for (log = 0; log < 255; log++) {
+               raid6_gflog[b] = (uint8_t) log;
+               raid6_gfilog[log] = (uint8_t) b;
+               b = b << 1;
+               if (b & 256) b = b ^ 0435;
+       }
+
+       tables_ready = 1;
+}
+
+uint8_t *zero;
+int zero_size;
+
+void ensure_zero_has_size(int chunk_size)
+{
+       if (zero == NULL || chunk_size > zero_size) {
+               if (zero)
+                       free(zero);
+               zero = xcalloc(1, chunk_size);
+               zero_size = chunk_size;
+       }
+}
+
+/* Following was taken from linux/drivers/md/raid6recov.c */
+
+/* Recover two failed data blocks. */
+
+void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+                      uint8_t **ptrs, int neg_offset)
+{
+       uint8_t *p, *q, *dp, *dq;
+       uint8_t px, qx, db;
+       const uint8_t *pbmul;   /* P multiplier table for B data */
+       const uint8_t *qmul;            /* Q multiplier table (for both) */
+
+       if (faila > failb) {
+               int t = faila;
+               faila = failb;
+               failb = t;
+       }
 
+       if (neg_offset) {
+               p = ptrs[-1];
+               q = ptrs[-2];
+       } else {
+               p = ptrs[disks-2];
+               q = ptrs[disks-1];
+       }
+
+       /* Compute syndrome with zero for the missing data pages
+          Use the dead data pages as temporary storage for
+          delta p and delta q */
+       dp = ptrs[faila];
+       ptrs[faila] = zero;
+       dq = ptrs[failb];
+       ptrs[failb] = zero;
+
+       qsyndrome(dp, dq, ptrs, disks-2, bytes);
+
+       /* Restore pointer table */
+       ptrs[faila]   = dp;
+       ptrs[failb]   = dq;
+
+       /* Now, pick the proper data tables */
+       pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
+       qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
+
+       /* Now do it... */
+       while ( bytes-- ) {
+               px    = *p ^ *dp;
+               qx    = qmul[*q ^ *dq];
+               *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
+               *dp++ = db ^ px; /* Reconstructed A */
+               p++; q++;
+       }
+}
+
+/* Recover failure of one data block plus the P block */
+void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs,
+                      int neg_offset)
+{
+       uint8_t *p, *q, *dq;
+       const uint8_t *qmul;            /* Q multiplier table */
+
+       if (neg_offset) {
+               p = ptrs[-1];
+               q = ptrs[-2];
+       } else {
+               p = ptrs[disks-2];
+               q = ptrs[disks-1];
+       }
+
+       /* Compute syndrome with zero for the missing data page
+          Use the dead data page as temporary storage for delta q */
+       dq = ptrs[faila];
+       ptrs[faila] = zero;
+
+       qsyndrome(p, dq, ptrs, disks-2, bytes);
+
+       /* Restore pointer table */
+       ptrs[faila]   = dq;
+
+       /* Now, pick the proper data tables */
+       qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+       /* Now do it... */
+       while ( bytes-- ) {
+               *p++ ^= *dq = qmul[*q ^ *dq];
+               q++; dq++;
+       }
+}
+
+/* Try to find out if a specific disk has a problem */
+int raid6_check_disks(int data_disks, int start, int chunk_size,
+                     int level, int layout, int diskP, int diskQ,
+                     uint8_t *p, uint8_t *q, char **stripes)
+{
+       int i;
+       int data_id, diskD;
+       uint8_t Px, Qx;
+       int curr_broken_disk = -1;
+       int prev_broken_disk = -1;
+       int broken_status = 0;
+
+       for(i = 0; i < chunk_size; i++) {
+               Px = (uint8_t)stripes[diskP][i] ^ (uint8_t)p[i];
+               Qx = (uint8_t)stripes[diskQ][i] ^ (uint8_t)q[i];
+
+               if((Px != 0) && (Qx == 0))
+                       curr_broken_disk = diskP;
+
+               if((Px == 0) && (Qx != 0))
+                       curr_broken_disk = diskQ;
+
+               if((Px != 0) && (Qx != 0)) {
+                       data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
+                       if(data_id < 0) data_id += 255;
+                       diskD = geo_map(data_id, start/chunk_size,
+                                       data_disks + 2, level, layout);
+                       curr_broken_disk = diskD;
+               }
+
+               if((Px == 0) && (Qx == 0))
+                       curr_broken_disk = curr_broken_disk;
+
+               if(curr_broken_disk >= data_disks + 2)
+                       broken_status = 2;
+
+               switch(broken_status) {
+               case 0:
+                       if(curr_broken_disk != -1) {
+                               prev_broken_disk = curr_broken_disk;
+                               broken_status = 1;
+                       }
+                       break;
+
+               case 1:
+                       if(curr_broken_disk != prev_broken_disk)
+                               broken_status = 2;
+                       break;
+
+               case 2:
+               default:
+                       curr_broken_disk = prev_broken_disk = -2;
+                       break;
+               }
+       }
+
+       return curr_broken_disk;
+}
+
+/*******************************************************************************
+ * Function:   save_stripes
+ * Description:
+ *     Function reads data (only data without P and Q) from array and writes
+ * it to buf and opcjonaly to backup files
+ * Parameters:
+ *     source          : A list of 'fds' of the active disks.
+ *                       Some may be absent
+ *     offsets         : A list of offsets on disk belonging
+ *                      to the array [bytes]
+ *     raid_disks      : geometry: number of disks in the array
+ *     chunk_size      : geometry: chunk size [bytes]
+ *     level           : geometry: RAID level
+ *     layout          : geometry: layout
+ *     nwrites         : number of backup files
+ *     dest            : A list of 'fds' for mirrored targets
+ *                       (e.g. backup files). They are already seeked to right
+ *                       (write) location. If NULL, data will be wrote
+ *                       to the buf only
+ *     start           : start address of data to read (must be stripe-aligned)
+ *                       [bytes]
+ *     length  -       : length of data to read (must be stripe-aligned)
+ *                       [bytes]
+ *     buf             : buffer for data. It is large enough to hold
+ *                       one stripe. It is stripe aligned
+ * Returns:
+ *      0 : success
+ *     -1 : fail
+ ******************************************************************************/
 int save_stripes(int *source, unsigned long long *offsets,
                 int raid_disks, int chunk_size, int level, int layout,
                 int nwrites, int *dest,
-                unsigned long long start, unsigned long long length)
+                unsigned long long start, unsigned long long length,
+                char *buf)
 {
-       char abuf[8192+512];
-       char *buf = (char*)(((unsigned long)abuf+511)&~511UL);
-       int cpos = start % chunk_size; /* where in chunk we are up to */
        int len;
        int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
        int disk;
+       int i;
+       unsigned long long length_test;
+
+       if (!tables_ready)
+               make_tables();
+       ensure_zero_has_size(chunk_size);
+
+       len = data_disks * chunk_size;
+       length_test = length / len;
+       length_test *= len;
+
+       if (length != length_test) {
+               dprintf("Error: save_stripes(): Data are not alligned. EXIT\n");
+               dprintf("\tArea for saving stripes (length) = %llu\n", length);
+               dprintf("\tWork step (len)                  = %i\n", len);
+               dprintf("\tExpected save area (length_test) = %llu\n",
+                       length_test);
+               abort();
+       }
 
        while (length > 0) {
-               unsigned long long offset;
-               int i;
-               len = chunk_size - cpos;
-               if (len > 8192) len = 8192;
-               if (len > length) len = length;
-               /* len bytes to be moved from one device */
-
-               offset = (start/chunk_size/data_disks)*chunk_size + cpos;
-               disk = start/chunk_size % data_disks;
-               disk = geo_map(disk, start/chunk_size/data_disks,
-                              raid_disks, level, layout);
-               if (lseek64(source[disk], offsets[disk]+offset, 0) < 0)
-                       return -1;
-               if (read(source[disk], buf, len) != len)
+               int failed = 0;
+               int fdisk[3], fblock[3];
+               for (disk = 0; disk < raid_disks ; disk++) {
+                       unsigned long long offset;
+                       int dnum;
+
+                       offset = (start/chunk_size/data_disks)*chunk_size;
+                       dnum = geo_map(disk < data_disks ? disk : data_disks - disk - 1,
+                                      start/chunk_size/data_disks,
+                                      raid_disks, level, layout);
+                       if (dnum < 0) abort();
+                       if (source[dnum] < 0 ||
+                           lseek64(source[dnum], offsets[dnum]+offset, 0) < 0 ||
+                           read(source[dnum], buf+disk * chunk_size, chunk_size)
+                           != chunk_size)
+                               if (failed <= 2) {
+                                       fdisk[failed] = dnum;
+                                       fblock[failed] = disk;
+                                       failed++;
+                               }
+               }
+               if (failed == 0 || fblock[0] >= data_disks)
+                       /* all data disks are good */
+                       ;
+               else if (failed == 1 || fblock[1] >= data_disks+1) {
+                       /* one failed data disk and good parity */
+                       char *bufs[data_disks];
+                       for (i=0; i < data_disks; i++)
+                               if (fblock[0] == i)
+                                       bufs[i] = buf + data_disks*chunk_size;
+                               else
+                                       bufs[i] = buf + i*chunk_size;
+
+                       xor_blocks(buf + fblock[0]*chunk_size,
+                                  bufs, data_disks, chunk_size);
+               } else if (failed > 2 || level != 6)
+                       /* too much failure */
                        return -1;
-               for (i=0; i<nwrites; i++)
-                       if (write(dest[i], buf, len) != len)
-                               return -1;
+               else {
+                       /* RAID6 computations needed. */
+                       uint8_t *bufs[data_disks+4];
+                       int qdisk;
+                       int syndrome_disks;
+                       disk = geo_map(-1, start/chunk_size/data_disks,
+                                      raid_disks, level, layout);
+                       qdisk = geo_map(-2, start/chunk_size/data_disks,
+                                      raid_disks, level, layout);
+                       if (is_ddf(layout)) {
+                               /* q over 'raid_disks' blocks, in device order.
+                                * 'p' and 'q' get to be all zero
+                                */
+                               for (i = 0; i < raid_disks; i++)
+                                       bufs[i] = zero;
+                               for (i = 0; i < data_disks; i++) {
+                                       int dnum = geo_map(i,
+                                                          start/chunk_size/data_disks,
+                                                          raid_disks, level, layout);
+                                       int snum;
+                                       /* i is the logical block number, so is index to 'buf'.
+                                        * dnum is physical disk number
+                                        * and thus the syndrome number.
+                                        */
+                                       snum = dnum;
+                                       bufs[snum] = (uint8_t*)buf + chunk_size * i;
+                               }
+                               syndrome_disks = raid_disks;
+                       } else {
+                               /* for md, q is over 'data_disks' blocks,
+                                * starting immediately after 'q'
+                                * Note that for the '_6' variety, the p block
+                                * makes a hole that we need to be careful of.
+                                */
+                               int j;
+                               int snum = 0;
+                               for (j = 0; j < raid_disks; j++) {
+                                       int dnum = (qdisk + 1 + j) % raid_disks;
+                                       if (dnum == disk || dnum == qdisk)
+                                               continue;
+                                       for (i = 0; i < data_disks; i++)
+                                               if (geo_map(i,
+                                                           start/chunk_size/data_disks,
+                                                           raid_disks, level, layout) == dnum)
+                                                       break;
+                                       /* i is the logical block number, so is index to 'buf'.
+                                        * dnum is physical disk number
+                                        * snum is syndrome disk for which 0 is immediately after Q
+                                        */
+                                       bufs[snum] = (uint8_t*)buf + chunk_size * i;
+
+                                       if (fblock[0] == i)
+                                               fdisk[0] = snum;
+                                       if (fblock[1] == i)
+                                               fdisk[1] = snum;
+                                       snum++;
+                               }
+
+                               syndrome_disks = data_disks;
+                       }
+
+                       /* Place P and Q blocks at end of bufs */
+                       bufs[syndrome_disks] = (uint8_t*)buf + chunk_size * data_disks;
+                       bufs[syndrome_disks+1] = (uint8_t*)buf + chunk_size * (data_disks+1);
+
+                       if (fblock[1] == data_disks)
+                               /* One data failed, and parity failed */
+                               raid6_datap_recov(syndrome_disks+2, chunk_size,
+                                                 fdisk[0], bufs, 0);
+                       else {
+                               /* Two data blocks failed, P,Q OK */
+                               raid6_2data_recov(syndrome_disks+2, chunk_size,
+                                                 fdisk[0], fdisk[1], bufs, 0);
+                       }
+               }
+               if (dest) {
+                       for (i = 0; i < nwrites; i++)
+                               if (write(dest[i], buf, len) != len)
+                                       return -1;
+               } else {
+                       /* build next stripe in buffer */
+                       buf += len;
+               }
                length -= len;
                start += len;
-               cpos += len;
-               while (cpos >= chunk_size) cpos -= chunk_size;
        }
        return 0;
 }
@@ -200,37 +710,65 @@ int save_stripes(int *source, unsigned long long *offsets,
 int restore_stripes(int *dest, unsigned long long *offsets,
                    int raid_disks, int chunk_size, int level, int layout,
                    int source, unsigned long long read_offset,
-                   unsigned long long start, unsigned long long length)
+                   unsigned long long start, unsigned long long length,
+                   char *src_buf)
 {
-       char *stripe_buf = malloc(raid_disks * chunk_size);
-       char **stripes = malloc(raid_disks * sizeof(char*));
-       char **blocks = malloc(raid_disks * sizeof(char*));
+       char *stripe_buf;
+       char **stripes = xmalloc(raid_disks * sizeof(char*));
+       char **blocks = xmalloc(raid_disks * sizeof(char*));
        int i;
+       int rv;
 
-       int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
+       int data_disks = raid_disks - (level == 0 ? 0 : level <= 5 ? 1 : 2);
+
+       if (posix_memalign((void**)&stripe_buf, 4096, raid_disks * chunk_size))
+               stripe_buf = NULL;
+
+       if (zero == NULL || chunk_size > zero_size) {
+               if (zero)
+                       free(zero);
+               zero = xcalloc(1, chunk_size);
+               zero_size = chunk_size;
+       }
 
-       if (stripe_buf == NULL || stripes == NULL || blocks == NULL) {
-               free(stripe_buf);
-               free(stripes);
-               free(blocks);
-               return -2;
+       if (stripe_buf == NULL || stripes == NULL || blocks == NULL
+           || zero == NULL) {
+               rv = -2;
+               goto abort;
        }
-       for (i=0; i<raid_disks; i++)
+       for (i = 0; i < raid_disks; i++)
                stripes[i] = stripe_buf + i * chunk_size;
        while (length > 0) {
-               int len = data_disks * chunk_size;
+               unsigned int len = data_disks * chunk_size;
                unsigned long long offset;
                int disk, qdisk;
-               if (length < len)
-                       return -3;
-               for (i=0; i < data_disks; i++) {
+               int syndrome_disks;
+               if (length < len) {
+                       rv = -3;
+                       goto abort;
+               }
+               for (i = 0; i < data_disks; i++) {
                        int disk = geo_map(i, start/chunk_size/data_disks,
                                           raid_disks, level, layout);
-                       blocks[i] = stripes[disk];
-                       if (lseek64(source, read_offset, 0) != read_offset)
-                               return -1;
-                       if (read(source, stripes[disk], chunk_size) != chunk_size)
-                               return -1;
+                       if (src_buf == NULL) {
+                               /* read from file */
+                               if (lseek64(source, read_offset, 0) !=
+                                        (off64_t)read_offset) {
+                                       rv = -1;
+                                       goto abort;
+                               }
+                               if (read(source,
+                                        stripes[disk],
+                                        chunk_size) != chunk_size) {
+                                       rv = -1;
+                                       goto abort;
+                               }
+                       } else {
+                               /* read from input buffer */
+                               memcpy(stripes[disk],
+                                      src_buf + read_offset,
+                                      chunk_size);
+                       }
                        read_offset += chunk_size;
                }
                /* We have the data, now do the parity */
@@ -240,6 +778,8 @@ int restore_stripes(int *dest, unsigned long long *offsets,
                case 5:
                        disk = geo_map(-1, start/chunk_size/data_disks,
                                           raid_disks, level, layout);
+                       for (i = 0; i < data_disks; i++)
+                               blocks[i] = stripes[(disk+1+i) % raid_disks];
                        xor_blocks(stripes[disk], blocks, data_disks, chunk_size);
                        break;
                case 6:
@@ -247,22 +787,54 @@ int restore_stripes(int *dest, unsigned long long *offsets,
                                       raid_disks, level, layout);
                        qdisk = geo_map(-2, start/chunk_size/data_disks,
                                       raid_disks, level, layout);
+                       if (is_ddf(layout)) {
+                               /* q over 'raid_disks' blocks, in device order.
+                                * 'p' and 'q' get to be all zero
+                                */
+                               for (i = 0; i < raid_disks; i++)
+                                       if (i == disk || i == qdisk)
+                                               blocks[i] = (char*)zero;
+                                       else
+                                               blocks[i] = stripes[i];
+                               syndrome_disks = raid_disks;
+                       } else {
+                               /* for md, q is over 'data_disks' blocks,
+                                * starting immediately after 'q'
+                                */
+                               for (i = 0; i < data_disks; i++)
+                                       blocks[i] = stripes[(qdisk+1+i) % raid_disks];
 
-                       qsyndrome(stripes[disk], stripes[qdisk], blocks,
-                                 data_disks, chunk_size);
+                               syndrome_disks = data_disks;
+                       }
+                       qsyndrome((uint8_t*)stripes[disk],
+                                 (uint8_t*)stripes[qdisk],
+                                 (uint8_t**)blocks,
+                                 syndrome_disks, chunk_size);
                        break;
                }
                for (i=0; i < raid_disks ; i++)
                        if (dest[i] >= 0) {
-                               if (lseek64(dest[i], offsets[i]+offset, 0) < 0)
-                                       return -1;
-                               if (write(dest[i], stripes[i], chunk_size) != chunk_size)
-                                       return -1;
+                               if (lseek64(dest[i],
+                                        offsets[i]+offset, 0) < 0) {
+                                       rv = -1;
+                                       goto abort;
+                               }
+                               if (write(dest[i], stripes[i],
+                                        chunk_size) != chunk_size) {
+                                       rv = -1;
+                                       goto abort;
+                               }
                        }
                length -= len;
                start += len;
        }
-       return 0;
+       rv = 0;
+
+abort:
+       free(stripe_buf);
+       free(stripes);
+       free(blocks);
+       return rv;
 }
 
 #ifdef MAIN
@@ -272,14 +844,19 @@ int test_stripes(int *source, unsigned long long *offsets,
                 unsigned long long start, unsigned long long length)
 {
        /* ready the data and p (and q) blocks, and check we got them right */
-       char *stripe_buf = malloc(raid_disks * chunk_size);
-       char **stripes = malloc(raid_disks * sizeof(char*));
-       char **blocks = malloc(raid_disks * sizeof(char*));
-       char *p = malloc(chunk_size);
-       char *q = malloc(chunk_size);
+       char *stripe_buf = xmalloc(raid_disks * chunk_size);
+       char **stripes = xmalloc(raid_disks * sizeof(char*));
+       char **blocks = xmalloc(raid_disks * sizeof(char*));
+       uint8_t *p = xmalloc(chunk_size);
+       uint8_t *q = xmalloc(chunk_size);
 
        int i;
+       int diskP, diskQ;
        int data_disks = raid_disks - (level == 5 ? 1: 2);
+
+       if (!tables_ready)
+               make_tables();
+
        for ( i = 0 ; i < raid_disks ; i++)
                stripes[i] = stripe_buf + i * chunk_size;
 
@@ -298,19 +875,28 @@ int test_stripes(int *source, unsigned long long *offsets,
                }
                switch(level) {
                case 6:
-                       qsyndrome(p, q, blocks, data_disks, chunk_size);
-                       disk = geo_map(-1, start/chunk_size, raid_disks,
+                       qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
+                       diskP = geo_map(-1, start/chunk_size, raid_disks,
                                       level, layout);
-                       if (memcmp(p, stripes[disk], chunk_size) != 0) {
-                               printf("P(%d) wrong at %llu\n", disk,
+                       if (memcmp(p, stripes[diskP], chunk_size) != 0) {
+                               printf("P(%d) wrong at %llu\n", diskP,
                                       start / chunk_size);
                        }
-                       disk = geo_map(-2, start/chunk_size, raid_disks,
+                       diskQ = geo_map(-2, start/chunk_size, raid_disks,
                                       level, layout);
-                       if (memcmp(q, stripes[disk], chunk_size) != 0) {
-                               printf("Q(%d) wrong at %llu\n", disk,
+                       if (memcmp(q, stripes[diskQ], chunk_size) != 0) {
+                               printf("Q(%d) wrong at %llu\n", diskQ,
                                       start / chunk_size);
                        }
+                       disk = raid6_check_disks(data_disks, start, chunk_size,
+                                                level, layout, diskP, diskQ,
+                                                p, q, stripes);
+                       if(disk >= 0) {
+                         printf("Possible failed disk: %d\n", disk);
+                       }
+                       if(disk == -2) {
+                         printf("Failure detected, but disk unknown\n");
+                       }
                        break;
                }
                length -= chunk_size;
@@ -330,13 +916,15 @@ unsigned long long getnum(char *str, char **err)
        return rv;
 }
 
-main(int argc, char *argv[])
+char const Name[] = "test_restripe";
+int main(int argc, char *argv[])
 {
        /* save/restore file raid_disks chunk_size level layout start length devices...
         */
        int save;
        int *fds;
        char *file;
+       char *buf;
        int storefd;
        unsigned long long *offsets;
        int raid_disks, chunk_size, level, layout;
@@ -345,8 +933,7 @@ main(int argc, char *argv[])
 
        char *err = NULL;
        if (argc < 10) {
-               fprintf(stderr, "Usage: test_stripe save/restore file raid_disks"
-                       " chunk_size level layout start length devices...\n");
+               fprintf(stderr, "Usage: test_stripe save/restore file raid_disks chunk_size level layout start length devices...\n");
                exit(1);
        }
        if (strcmp(argv[1], "save")==0)
@@ -376,9 +963,8 @@ main(int argc, char *argv[])
                        raid_disks, argc-9);
                exit(2);
        }
-       fds = malloc(raid_disks * sizeof(*fds));
-       offsets = malloc(raid_disks * sizeof(*offsets));
-       memset(offsets, 0, raid_disks * sizeof(*offsets));
+       fds = xmalloc(raid_disks * sizeof(*fds));
+       offsets = xcalloc(raid_disks, sizeof(*offsets));
 
        storefd = open(file, O_RDWR);
        if (storefd < 0) {
@@ -387,6 +973,14 @@ main(int argc, char *argv[])
                exit(3);
        }
        for (i=0; i<raid_disks; i++) {
+               char *p;
+               p = strchr(argv[9+i], ':');
+
+               if(p != NULL) {
+                       *p++ = '\0';
+                       offsets[i] = atoll(p) * 512;
+               }
+
                fds[i] = open(argv[9+i], O_RDWR);
                if (fds[i] < 0) {
                        perror(argv[9+i]);
@@ -395,11 +989,13 @@ main(int argc, char *argv[])
                }
        }
 
+       buf = xmalloc(raid_disks * chunk_size);
+
        if (save == 1) {
                int rv = save_stripes(fds, offsets,
                                      raid_disks, chunk_size, level, layout,
                                      1, &storefd,
-                                     start, length);
+                                     start, length, buf);
                if (rv != 0) {
                        fprintf(stderr,
                                "test_stripe: save_stripes returned %d\n", rv);
@@ -418,7 +1014,7 @@ main(int argc, char *argv[])
                int rv = restore_stripes(fds, offsets,
                                         raid_disks, chunk_size, level, layout,
                                         storefd, 0ULL,
-                                        start, length);
+                                        start, length, NULL);
                if (rv != 0) {
                        fprintf(stderr,
                                "test_stripe: restore_stripes returned %d\n",