*
*/
-static int geo_map(int block, unsigned long long stripe, int raid_disks,
+int geo_map(int block, unsigned long long stripe, int raid_disks,
int level, int layout)
{
/* On the given stripe, find which disk in the array will have
*/
int pd;
+ /* layout is not relevant for raid0 and raid4 */
+ if ((level == 0) ||
+ (level == 4))
+ layout = 0;
+
switch(level*100 + layout) {
case 000:
case 400:
return block;
case 500 + ALGORITHM_LEFT_ASYMMETRIC:
pd = (raid_disks-1) - stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
if (block >= pd)
block++;
return block;
case 500 + ALGORITHM_RIGHT_ASYMMETRIC:
pd = stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
if (block >= pd)
block++;
return block;
case 500 + ALGORITHM_LEFT_SYMMETRIC:
pd = (raid_disks - 1) - stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
return (pd + 1 + block) % raid_disks;
case 500 + ALGORITHM_RIGHT_SYMMETRIC:
pd = stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
return (pd + 1 + block) % raid_disks;
case 500 + ALGORITHM_PARITY_0:
return block + 1;
-
case 600 + ALGORITHM_PARITY_N_6:
if (block == -2)
return raid_disks - 1;
return raid_disks - 1;
raid_disks--;
pd = (raid_disks-1) - stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
if (block >= pd)
block++;
return block;
return raid_disks - 1;
raid_disks--;
pd = stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
if (block >= pd)
block++;
return block;
return raid_disks - 1;
raid_disks--;
pd = (raid_disks - 1) - stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
return (pd + 1 + block) % raid_disks;
case 600 + ALGORITHM_RIGHT_SYMMETRIC_6:
return raid_disks - 1;
raid_disks--;
pd = stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
return (pd + 1 + block) % raid_disks;
case 600 + ALGORITHM_PARITY_0_6:
return raid_disks - 1;
return block + 1;
-
case 600 + ALGORITHM_PARITY_0:
if (block == -1)
return 0;
case 600 + ALGORITHM_LEFT_ASYMMETRIC:
pd = raid_disks - 1 - (stripe % raid_disks);
- if (block == -1) return pd;
- if (block == -2) return (pd+1) % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
if (pd == raid_disks - 1)
return block+1;
if (block >= pd)
/* Different order for calculating Q, otherwize same as ... */
case 600 + ALGORITHM_RIGHT_ASYMMETRIC:
pd = stripe % raid_disks;
- if (block == -1) return pd;
- if (block == -2) return (pd+1) % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
if (pd == raid_disks - 1)
return block+1;
if (block >= pd)
case 600 + ALGORITHM_LEFT_SYMMETRIC:
pd = raid_disks - 1 - (stripe % raid_disks);
- if (block == -1) return pd;
- if (block == -2) return (pd+1) % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
return (pd + 2 + block) % raid_disks;
case 600 + ALGORITHM_RIGHT_SYMMETRIC:
pd = stripe % raid_disks;
- if (block == -1) return pd;
- if (block == -2) return (pd+1) % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
return (pd + 2 + block) % raid_disks;
-
case 600 + ALGORITHM_ROTATING_N_RESTART:
/* Same a left_asymmetric, by first stripe is
* D D D P Q rather than
* Q D D D P
*/
pd = raid_disks - 1 - ((stripe + 1) % raid_disks);
- if (block == -1) return pd;
- if (block == -2) return (pd+1) % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
if (pd == raid_disks - 1)
return block+1;
if (block >= pd)
case 600 + ALGORITHM_ROTATING_N_CONTINUE:
/* Same as left_symmetric but Q is before P */
pd = raid_disks - 1 - (stripe % raid_disks);
- if (block == -1) return pd;
- if (block == -2) return (pd+raid_disks-1) % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+raid_disks-1) % raid_disks;
return (pd + 1 + block) % raid_disks;
}
return -1;
}
-static int is_ddf(int layout)
+
+int is_ddf(int layout)
{
switch (layout)
{
}
}
-
-static void xor_blocks(char *target, char **sources, int disks, int size)
+void xor_blocks(char *target, char **sources, int disks, int size)
{
int i, j;
/* Amazingly inefficient... */
}
}
-static void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size)
+void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size)
{
int d, z;
uint8_t wq0, wp0, wd0, w10, w20;
}
}
-
/*
* The following was taken from linux/drivers/md/mktables.c, and modified
* to create in-memory tables rather than C code
uint8_t raid6_gfexp[256];
uint8_t raid6_gfinv[256];
uint8_t raid6_gfexi[256];
+uint8_t raid6_gflog[256];
+uint8_t raid6_gfilog[256];
void make_tables(void)
{
int i, j;
uint8_t v;
+ uint32_t b, log;
/* Compute multiplication table */
for (i = 0; i < 256; i++)
for (i = 0; i < 256; i ++)
raid6_gfexi[i] = raid6_gfinv[raid6_gfexp[i] ^ 1];
+ /* Compute log and inverse log */
+ /* Modified code from:
+ * http://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.html
+ */
+ b = 1;
+ raid6_gflog[0] = 0;
+ raid6_gfilog[255] = 0;
+
+ for (log = 0; log < 255; log++) {
+ raid6_gflog[b] = (uint8_t) log;
+ raid6_gfilog[log] = (uint8_t) b;
+ b = b << 1;
+ if (b & 256) b = b ^ 0435;
+ }
+
tables_ready = 1;
}
uint8_t *zero;
+int zero_size;
+
+void ensure_zero_has_size(int chunk_size)
+{
+ if (zero == NULL || chunk_size > zero_size) {
+ if (zero)
+ free(zero);
+ zero = xcalloc(1, chunk_size);
+ zero_size = chunk_size;
+ }
+}
+
/* Following was taken from linux/drivers/md/raid6recov.c */
/* Recover two failed data blocks. */
+
void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
- uint8_t **ptrs)
+ uint8_t **ptrs, int neg_offset)
{
uint8_t *p, *q, *dp, *dq;
uint8_t px, qx, db;
const uint8_t *pbmul; /* P multiplier table for B data */
const uint8_t *qmul; /* Q multiplier table (for both) */
- p = ptrs[disks-2];
- q = ptrs[disks-1];
+ if (faila > failb) {
+ int t = faila;
+ faila = failb;
+ failb = t;
+ }
+
+ if (neg_offset) {
+ p = ptrs[-1];
+ q = ptrs[-2];
+ } else {
+ p = ptrs[disks-2];
+ q = ptrs[disks-1];
+ }
/* Compute syndrome with zero for the missing data pages
Use the dead data pages as temporary storage for
}
/* Recover failure of one data block plus the P block */
-void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs)
+void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs,
+ int neg_offset)
{
uint8_t *p, *q, *dq;
const uint8_t *qmul; /* Q multiplier table */
- p = ptrs[disks-2];
- q = ptrs[disks-1];
+ if (neg_offset) {
+ p = ptrs[-1];
+ q = ptrs[-2];
+ } else {
+ p = ptrs[disks-2];
+ q = ptrs[disks-1];
+ }
/* Compute syndrome with zero for the missing data page
Use the dead data page as temporary storage for delta q */
}
}
-/* Save data:
- * We are given:
- * A list of 'fds' of the active disks. Some may be absent.
- * A geometry: raid_disks, chunk_size, level, layout
- * A list of 'fds' for mirrored targets. They are already seeked to
- * right (Write) location
- * A start and length which must be stripe-aligned
- * 'buf' is large enough to hold one stripe, and is aligned
- */
+/* Try to find out if a specific disk has a problem */
+int raid6_check_disks(int data_disks, int start, int chunk_size,
+ int level, int layout, int diskP, int diskQ,
+ uint8_t *p, uint8_t *q, char **stripes)
+{
+ int i;
+ int data_id, diskD;
+ uint8_t Px, Qx;
+ int curr_broken_disk = -1;
+ int prev_broken_disk = -1;
+ int broken_status = 0;
+
+ for(i = 0; i < chunk_size; i++) {
+ Px = (uint8_t)stripes[diskP][i] ^ (uint8_t)p[i];
+ Qx = (uint8_t)stripes[diskQ][i] ^ (uint8_t)q[i];
+
+ if((Px != 0) && (Qx == 0))
+ curr_broken_disk = diskP;
+
+ if((Px == 0) && (Qx != 0))
+ curr_broken_disk = diskQ;
+
+ if((Px != 0) && (Qx != 0)) {
+ data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
+ if(data_id < 0) data_id += 255;
+ diskD = geo_map(data_id, start/chunk_size,
+ data_disks + 2, level, layout);
+ curr_broken_disk = diskD;
+ }
+
+ if((Px == 0) && (Qx == 0))
+ curr_broken_disk = prev_broken_disk;
+
+ if(curr_broken_disk >= data_disks + 2)
+ broken_status = 2;
+
+ switch(broken_status) {
+ case 0:
+ if(curr_broken_disk != -1) {
+ prev_broken_disk = curr_broken_disk;
+ broken_status = 1;
+ }
+ break;
+
+ case 1:
+ if(curr_broken_disk != prev_broken_disk)
+ broken_status = 2;
+ break;
+ case 2:
+ default:
+ curr_broken_disk = prev_broken_disk = -2;
+ break;
+ }
+ }
+
+ return curr_broken_disk;
+}
+
+/*******************************************************************************
+ * Function: save_stripes
+ * Description:
+ * Function reads data (only data without P and Q) from array and writes
+ * it to buf and opcjonaly to backup files
+ * Parameters:
+ * source : A list of 'fds' of the active disks.
+ * Some may be absent
+ * offsets : A list of offsets on disk belonging
+ * to the array [bytes]
+ * raid_disks : geometry: number of disks in the array
+ * chunk_size : geometry: chunk size [bytes]
+ * level : geometry: RAID level
+ * layout : geometry: layout
+ * nwrites : number of backup files
+ * dest : A list of 'fds' for mirrored targets
+ * (e.g. backup files). They are already seeked to right
+ * (write) location. If NULL, data will be wrote
+ * to the buf only
+ * start : start address of data to read (must be stripe-aligned)
+ * [bytes]
+ * length - : length of data to read (must be stripe-aligned)
+ * [bytes]
+ * buf : buffer for data. It is large enough to hold
+ * one stripe. It is stripe aligned
+ * Returns:
+ * 0 : success
+ * -1 : fail
+ ******************************************************************************/
int save_stripes(int *source, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int nwrites, int *dest,
int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
int disk;
int i;
+ unsigned long long length_test;
if (!tables_ready)
make_tables();
+ ensure_zero_has_size(chunk_size);
- if (zero == NULL) {
- zero = malloc(chunk_size);
- memset(zero, 0, chunk_size);
+ len = data_disks * chunk_size;
+ length_test = length / len;
+ length_test *= len;
+
+ if (length != length_test) {
+ dprintf("Error: save_stripes(): Data are not alligned. EXIT\n");
+ dprintf("\tArea for saving stripes (length) = %llu\n", length);
+ dprintf("\tWork step (len) = %i\n", len);
+ dprintf("\tExpected save area (length_test) = %llu\n",
+ length_test);
+ abort();
}
- len = data_disks * chunk_size;
while (length > 0) {
int failed = 0;
int fdisk[3], fblock[3];
if (fblock[1] == data_disks)
/* One data failed, and parity failed */
raid6_datap_recov(syndrome_disks+2, chunk_size,
- fdisk[0], bufs);
+ fdisk[0], bufs, 0);
else {
- if (fdisk[0] > fdisk[1]) {
- int t = fdisk[0];
- fdisk[0] = fdisk[1];
- fdisk[1] = t;
- }
/* Two data blocks failed, P,Q OK */
raid6_2data_recov(syndrome_disks+2, chunk_size,
- fdisk[0], fdisk[1], bufs);
+ fdisk[0], fdisk[1], bufs, 0);
}
}
-
- for (i=0; i<nwrites; i++)
- if (write(dest[i], buf, len) != len)
- return -1;
-
+ if (dest) {
+ for (i = 0; i < nwrites; i++)
+ if (write(dest[i], buf, len) != len)
+ return -1;
+ } else {
+ /* build next stripe in buffer */
+ buf += len;
+ }
length -= len;
start += len;
}
int restore_stripes(int *dest, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int source, unsigned long long read_offset,
- unsigned long long start, unsigned long long length)
+ unsigned long long start, unsigned long long length,
+ char *src_buf)
{
char *stripe_buf;
- char **stripes = malloc(raid_disks * sizeof(char*));
- char **blocks = malloc(raid_disks * sizeof(char*));
+ char **stripes = xmalloc(raid_disks * sizeof(char*));
+ char **blocks = xmalloc(raid_disks * sizeof(char*));
int i;
+ int rv;
int data_disks = raid_disks - (level == 0 ? 0 : level <= 5 ? 1 : 2);
if (posix_memalign((void**)&stripe_buf, 4096, raid_disks * chunk_size))
stripe_buf = NULL;
- if (zero == NULL) {
- zero = malloc(chunk_size);
+
+ if (zero == NULL || chunk_size > zero_size) {
if (zero)
- memset(zero, 0, chunk_size);
+ free(zero);
+ zero = xcalloc(1, chunk_size);
+ zero_size = chunk_size;
}
+
if (stripe_buf == NULL || stripes == NULL || blocks == NULL
|| zero == NULL) {
- free(stripe_buf);
- free(stripes);
- free(blocks);
- free(zero);
- return -2;
+ rv = -2;
+ goto abort;
}
- for (i=0; i<raid_disks; i++)
+ for (i = 0; i < raid_disks; i++)
stripes[i] = stripe_buf + i * chunk_size;
while (length > 0) {
- int len = data_disks * chunk_size;
+ unsigned int len = data_disks * chunk_size;
unsigned long long offset;
int disk, qdisk;
int syndrome_disks;
- if (length < len)
- return -3;
- for (i=0; i < data_disks; i++) {
+ if (length < len) {
+ rv = -3;
+ goto abort;
+ }
+ for (i = 0; i < data_disks; i++) {
int disk = geo_map(i, start/chunk_size/data_disks,
raid_disks, level, layout);
- if (lseek64(source, read_offset, 0) != read_offset)
- return -1;
- if (read(source, stripes[disk], chunk_size) != chunk_size)
- return -1;
+ if (src_buf == NULL) {
+ /* read from file */
+ if (lseek64(source, read_offset, 0) !=
+ (off64_t)read_offset) {
+ rv = -1;
+ goto abort;
+ }
+ if (read(source,
+ stripes[disk],
+ chunk_size) != chunk_size) {
+ rv = -1;
+ goto abort;
+ }
+ } else {
+ /* read from input buffer */
+ memcpy(stripes[disk],
+ src_buf + read_offset,
+ chunk_size);
+ }
read_offset += chunk_size;
}
/* We have the data, now do the parity */
syndrome_disks = data_disks;
}
qsyndrome((uint8_t*)stripes[disk],
- (uint8_t*)stripes[qdisk],
+ (uint8_t*)stripes[qdisk],
(uint8_t**)blocks,
syndrome_disks, chunk_size);
break;
}
for (i=0; i < raid_disks ; i++)
if (dest[i] >= 0) {
- if (lseek64(dest[i], offsets[i]+offset, 0) < 0)
- return -1;
- if (write(dest[i], stripes[i], chunk_size) != chunk_size)
- return -1;
+ if (lseek64(dest[i],
+ offsets[i]+offset, 0) < 0) {
+ rv = -1;
+ goto abort;
+ }
+ if (write(dest[i], stripes[i],
+ chunk_size) != chunk_size) {
+ rv = -1;
+ goto abort;
+ }
}
length -= len;
start += len;
}
- return 0;
+ rv = 0;
+
+abort:
+ free(stripe_buf);
+ free(stripes);
+ free(blocks);
+ return rv;
}
#ifdef MAIN
unsigned long long start, unsigned long long length)
{
/* ready the data and p (and q) blocks, and check we got them right */
- char *stripe_buf = malloc(raid_disks * chunk_size);
- char **stripes = malloc(raid_disks * sizeof(char*));
- char **blocks = malloc(raid_disks * sizeof(char*));
- char *p = malloc(chunk_size);
- char *q = malloc(chunk_size);
+ char *stripe_buf = xmalloc(raid_disks * chunk_size);
+ char **stripes = xmalloc(raid_disks * sizeof(char*));
+ char **blocks = xmalloc(raid_disks * sizeof(char*));
+ uint8_t *p = xmalloc(chunk_size);
+ uint8_t *q = xmalloc(chunk_size);
int i;
+ int diskP, diskQ;
int data_disks = raid_disks - (level == 5 ? 1: 2);
+
+ if (!tables_ready)
+ make_tables();
+
for ( i = 0 ; i < raid_disks ; i++)
stripes[i] = stripe_buf + i * chunk_size;
switch(level) {
case 6:
qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
- disk = geo_map(-1, start/chunk_size, raid_disks,
+ diskP = geo_map(-1, start/chunk_size, raid_disks,
level, layout);
- if (memcmp(p, stripes[disk], chunk_size) != 0) {
- printf("P(%d) wrong at %llu\n", disk,
+ if (memcmp(p, stripes[diskP], chunk_size) != 0) {
+ printf("P(%d) wrong at %llu\n", diskP,
start / chunk_size);
}
- disk = geo_map(-2, start/chunk_size, raid_disks,
+ diskQ = geo_map(-2, start/chunk_size, raid_disks,
level, layout);
- if (memcmp(q, stripes[disk], chunk_size) != 0) {
- printf("Q(%d) wrong at %llu\n", disk,
+ if (memcmp(q, stripes[diskQ], chunk_size) != 0) {
+ printf("Q(%d) wrong at %llu\n", diskQ,
start / chunk_size);
}
+ disk = raid6_check_disks(data_disks, start, chunk_size,
+ level, layout, diskP, diskQ,
+ p, q, stripes);
+ if(disk >= 0) {
+ printf("Possible failed disk: %d\n", disk);
+ }
+ if(disk == -2) {
+ printf("Failure detected, but disk unknown\n");
+ }
break;
}
length -= chunk_size;
return rv;
}
-main(int argc, char *argv[])
+char const Name[] = "test_restripe";
+int main(int argc, char *argv[])
{
/* save/restore file raid_disks chunk_size level layout start length devices...
*/
char *err = NULL;
if (argc < 10) {
- fprintf(stderr, "Usage: test_stripe save/restore file raid_disks"
- " chunk_size level layout start length devices...\n");
+ fprintf(stderr, "Usage: test_stripe save/restore file raid_disks chunk_size level layout start length devices...\n");
exit(1);
}
if (strcmp(argv[1], "save")==0)
raid_disks, argc-9);
exit(2);
}
- fds = malloc(raid_disks * sizeof(*fds));
- offsets = malloc(raid_disks * sizeof(*offsets));
- memset(offsets, 0, raid_disks * sizeof(*offsets));
+ fds = xmalloc(raid_disks * sizeof(*fds));
+ offsets = xcalloc(raid_disks, sizeof(*offsets));
storefd = open(file, O_RDWR);
if (storefd < 0) {
exit(3);
}
for (i=0; i<raid_disks; i++) {
+ char *p;
+ p = strchr(argv[9+i], ':');
+
+ if(p != NULL) {
+ *p++ = '\0';
+ offsets[i] = atoll(p) * 512;
+ }
+
fds[i] = open(argv[9+i], O_RDWR);
if (fds[i] < 0) {
perror(argv[9+i]);
}
}
- buf = malloc(raid_disks * chunk_size);
+ buf = xmalloc(raid_disks * chunk_size);
if (save == 1) {
int rv = save_stripes(fds, offsets,
int rv = restore_stripes(fds, offsets,
raid_disks, chunk_size, level, layout,
storefd, 0ULL,
- start, length);
+ start, length, NULL);
if (rv != 0) {
fprintf(stderr,
"test_stripe: restore_stripes returned %d\n",