X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=restripe.c;h=de85ee46259f92e39d30e92b56c7582ce8306efe;hb=2d2b0eb7b9d5603edb2e41e2c2860042fbbe8c92;hp=3d4d1c61430ed8872c24b434c7d3dae654f59a47;hpb=521f349cb0b2549fc0b9f1a8141f0615841634da;p=thirdparty%2Fmdadm.git diff --git a/restripe.c b/restripe.c index 3d4d1c61..de85ee46 100644 --- a/restripe.c +++ b/restripe.c @@ -33,7 +33,7 @@ * */ -static int geo_map(int block, unsigned long long stripe, int raid_disks, +int geo_map(int block, unsigned long long stripe, int raid_disks, int level, int layout) { /* On the given stripe, find which disk in the array will have @@ -43,6 +43,11 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, */ int pd; + /* layout is not relevant for raid0 and raid4 */ + if ((level == 0) || + (level == 4)) + layout = 0; + switch(level*100 + layout) { case 000: case 400: @@ -53,32 +58,35 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, return block; case 500 + ALGORITHM_LEFT_ASYMMETRIC: pd = (raid_disks-1) - stripe % raid_disks; - if (block == -1) return pd; + if (block == -1) + return pd; if (block >= pd) block++; return block; case 500 + ALGORITHM_RIGHT_ASYMMETRIC: pd = stripe % raid_disks; - if (block == -1) return pd; + if (block == -1) + return pd; if (block >= pd) block++; return block; case 500 + ALGORITHM_LEFT_SYMMETRIC: pd = (raid_disks - 1) - stripe % raid_disks; - if (block == -1) return pd; + if (block == -1) + return pd; return (pd + 1 + block) % raid_disks; case 500 + ALGORITHM_RIGHT_SYMMETRIC: pd = stripe % raid_disks; - if (block == -1) return pd; + if (block == -1) + return pd; return (pd + 1 + block) % raid_disks; case 500 + ALGORITHM_PARITY_0: return block + 1; - case 600 + ALGORITHM_PARITY_N_6: if (block == -2) return raid_disks - 1; @@ -90,7 +98,8 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, return raid_disks - 1; raid_disks--; pd = (raid_disks-1) - stripe % raid_disks; - if (block == -1) return pd; + if (block == -1) + return pd; if (block >= pd) block++; return block; @@ -100,7 +109,8 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, return raid_disks - 1; raid_disks--; pd = stripe % raid_disks; - if (block == -1) return pd; + if (block == -1) + return pd; if (block >= pd) block++; return block; @@ -110,7 +120,8 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, return raid_disks - 1; raid_disks--; pd = (raid_disks - 1) - stripe % raid_disks; - if (block == -1) return pd; + if (block == -1) + return pd; return (pd + 1 + block) % raid_disks; case 600 + ALGORITHM_RIGHT_SYMMETRIC_6: @@ -118,7 +129,8 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, return raid_disks - 1; raid_disks--; pd = stripe % raid_disks; - if (block == -1) return pd; + if (block == -1) + return pd; return (pd + 1 + block) % raid_disks; case 600 + ALGORITHM_PARITY_0_6: @@ -126,7 +138,6 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, return raid_disks - 1; return block + 1; - case 600 + ALGORITHM_PARITY_0: if (block == -1) return 0; @@ -136,8 +147,10 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, case 600 + ALGORITHM_LEFT_ASYMMETRIC: pd = raid_disks - 1 - (stripe % raid_disks); - if (block == -1) return pd; - if (block == -2) return (pd+1) % raid_disks; + if (block == -1) + return pd; + if (block == -2) + return (pd+1) % raid_disks; if (pd == raid_disks - 1) return block+1; if (block >= pd) @@ -148,8 +161,10 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, /* Different order for calculating Q, otherwize same as ... */ case 600 + ALGORITHM_RIGHT_ASYMMETRIC: pd = stripe % raid_disks; - if (block == -1) return pd; - if (block == -2) return (pd+1) % raid_disks; + if (block == -1) + return pd; + if (block == -2) + return (pd+1) % raid_disks; if (pd == raid_disks - 1) return block+1; if (block >= pd) @@ -158,25 +173,30 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, case 600 + ALGORITHM_LEFT_SYMMETRIC: pd = raid_disks - 1 - (stripe % raid_disks); - if (block == -1) return pd; - if (block == -2) return (pd+1) % raid_disks; + if (block == -1) + return pd; + if (block == -2) + return (pd+1) % raid_disks; return (pd + 2 + block) % raid_disks; case 600 + ALGORITHM_RIGHT_SYMMETRIC: pd = stripe % raid_disks; - if (block == -1) return pd; - if (block == -2) return (pd+1) % raid_disks; + if (block == -1) + return pd; + if (block == -2) + return (pd+1) % raid_disks; return (pd + 2 + block) % raid_disks; - case 600 + ALGORITHM_ROTATING_N_RESTART: /* Same a left_asymmetric, by first stripe is * D D D P Q rather than * Q D D D P */ pd = raid_disks - 1 - ((stripe + 1) % raid_disks); - if (block == -1) return pd; - if (block == -2) return (pd+1) % raid_disks; + if (block == -1) + return pd; + if (block == -2) + return (pd+1) % raid_disks; if (pd == raid_disks - 1) return block+1; if (block >= pd) @@ -186,13 +206,16 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, case 600 + ALGORITHM_ROTATING_N_CONTINUE: /* Same as left_symmetric but Q is before P */ pd = raid_disks - 1 - (stripe % raid_disks); - if (block == -1) return pd; - if (block == -2) return (pd+raid_disks-1) % raid_disks; + if (block == -1) + return pd; + if (block == -2) + return (pd+raid_disks-1) % raid_disks; return (pd + 1 + block) % raid_disks; } return -1; } -static int is_ddf(int layout) + +int is_ddf(int layout) { switch (layout) { @@ -205,8 +228,7 @@ static int is_ddf(int layout) } } - -static void xor_blocks(char *target, char **sources, int disks, int size) +void xor_blocks(char *target, char **sources, int disks, int size) { int i, j; /* Amazingly inefficient... */ @@ -218,7 +240,7 @@ static void xor_blocks(char *target, char **sources, int disks, int size) } } -static void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size) +void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size) { int d, z; uint8_t wq0, wp0, wd0, w10, w20; @@ -238,7 +260,6 @@ static void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int } } - /* * The following was taken from linux/drivers/md/mktables.c, and modified * to create in-memory tables rather than C code @@ -280,10 +301,13 @@ uint8_t raid6_gfmul[256][256]; uint8_t raid6_gfexp[256]; uint8_t raid6_gfinv[256]; uint8_t raid6_gfexi[256]; +uint8_t raid6_gflog[256]; +uint8_t raid6_gfilog[256]; void make_tables(void) { int i, j; uint8_t v; + uint32_t b, log; /* Compute multiplication table */ for (i = 0; i < 256; i++) @@ -307,23 +331,62 @@ void make_tables(void) for (i = 0; i < 256; i ++) raid6_gfexi[i] = raid6_gfinv[raid6_gfexp[i] ^ 1]; + /* Compute log and inverse log */ + /* Modified code from: + * http://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.html + */ + b = 1; + raid6_gflog[0] = 0; + raid6_gfilog[255] = 0; + + for (log = 0; log < 255; log++) { + raid6_gflog[b] = (uint8_t) log; + raid6_gfilog[log] = (uint8_t) b; + b = b << 1; + if (b & 256) b = b ^ 0435; + } + tables_ready = 1; } uint8_t *zero; +int zero_size; + +void ensure_zero_has_size(int chunk_size) +{ + if (zero == NULL || chunk_size > zero_size) { + if (zero) + free(zero); + zero = xcalloc(1, chunk_size); + zero_size = chunk_size; + } +} + /* Following was taken from linux/drivers/md/raid6recov.c */ /* Recover two failed data blocks. */ + void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, - uint8_t **ptrs) + uint8_t **ptrs, int neg_offset) { uint8_t *p, *q, *dp, *dq; uint8_t px, qx, db; const uint8_t *pbmul; /* P multiplier table for B data */ const uint8_t *qmul; /* Q multiplier table (for both) */ - p = ptrs[disks-2]; - q = ptrs[disks-1]; + if (faila > failb) { + int t = faila; + faila = failb; + failb = t; + } + + if (neg_offset) { + p = ptrs[-1]; + q = ptrs[-2]; + } else { + p = ptrs[disks-2]; + q = ptrs[disks-1]; + } /* Compute syndrome with zero for the missing data pages Use the dead data pages as temporary storage for @@ -354,13 +417,19 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, } /* Recover failure of one data block plus the P block */ -void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs) +void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs, + int neg_offset) { uint8_t *p, *q, *dq; const uint8_t *qmul; /* Q multiplier table */ - p = ptrs[disks-2]; - q = ptrs[disks-1]; + if (neg_offset) { + p = ptrs[-1]; + q = ptrs[-2]; + } else { + p = ptrs[disks-2]; + q = ptrs[disks-1]; + } /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ @@ -382,16 +451,94 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs) } } -/* Save data: - * We are given: - * A list of 'fds' of the active disks. Some may be absent. - * A geometry: raid_disks, chunk_size, level, layout - * A list of 'fds' for mirrored targets. They are already seeked to - * right (Write) location - * A start and length which must be stripe-aligned - * 'buf' is large enough to hold one stripe, and is aligned - */ +/* Try to find out if a specific disk has a problem */ +int raid6_check_disks(int data_disks, int start, int chunk_size, + int level, int layout, int diskP, int diskQ, + uint8_t *p, uint8_t *q, char **stripes) +{ + int i; + int data_id, diskD; + uint8_t Px, Qx; + int curr_broken_disk = -1; + int prev_broken_disk = -1; + int broken_status = 0; + + for(i = 0; i < chunk_size; i++) { + Px = (uint8_t)stripes[diskP][i] ^ (uint8_t)p[i]; + Qx = (uint8_t)stripes[diskQ][i] ^ (uint8_t)q[i]; + + if((Px != 0) && (Qx == 0)) + curr_broken_disk = diskP; + + if((Px == 0) && (Qx != 0)) + curr_broken_disk = diskQ; + + if((Px != 0) && (Qx != 0)) { + data_id = (raid6_gflog[Qx] - raid6_gflog[Px]); + if(data_id < 0) data_id += 255; + diskD = geo_map(data_id, start/chunk_size, + data_disks + 2, level, layout); + curr_broken_disk = diskD; + } + if((Px == 0) && (Qx == 0)) + curr_broken_disk = prev_broken_disk; + + if(curr_broken_disk >= data_disks + 2) + broken_status = 2; + + switch(broken_status) { + case 0: + if(curr_broken_disk != -1) { + prev_broken_disk = curr_broken_disk; + broken_status = 1; + } + break; + + case 1: + if(curr_broken_disk != prev_broken_disk) + broken_status = 2; + break; + + case 2: + default: + curr_broken_disk = prev_broken_disk = -2; + break; + } + } + + return curr_broken_disk; +} + +/******************************************************************************* + * Function: save_stripes + * Description: + * Function reads data (only data without P and Q) from array and writes + * it to buf and opcjonaly to backup files + * Parameters: + * source : A list of 'fds' of the active disks. + * Some may be absent + * offsets : A list of offsets on disk belonging + * to the array [bytes] + * raid_disks : geometry: number of disks in the array + * chunk_size : geometry: chunk size [bytes] + * level : geometry: RAID level + * layout : geometry: layout + * nwrites : number of backup files + * dest : A list of 'fds' for mirrored targets + * (e.g. backup files). They are already seeked to right + * (write) location. If NULL, data will be wrote + * to the buf only + * start : start address of data to read (must be stripe-aligned) + * [bytes] + * length - : length of data to read (must be stripe-aligned) + * [bytes] + * buf : buffer for data. It is large enough to hold + * one stripe. It is stripe aligned + * Returns: + * 0 : success + * -1 : fail + ******************************************************************************/ int save_stripes(int *source, unsigned long long *offsets, int raid_disks, int chunk_size, int level, int layout, int nwrites, int *dest, @@ -402,16 +549,25 @@ int save_stripes(int *source, unsigned long long *offsets, int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2); int disk; int i; + unsigned long long length_test; if (!tables_ready) make_tables(); + ensure_zero_has_size(chunk_size); - if (zero == NULL) { - zero = malloc(chunk_size); - memset(zero, 0, chunk_size); + len = data_disks * chunk_size; + length_test = length / len; + length_test *= len; + + if (length != length_test) { + dprintf("Error: save_stripes(): Data are not alligned. EXIT\n"); + dprintf("\tArea for saving stripes (length) = %llu\n", length); + dprintf("\tWork step (len) = %i\n", len); + dprintf("\tExpected save area (length_test) = %llu\n", + length_test); + abort(); } - len = data_disks * chunk_size; while (length > 0) { int failed = 0; int fdisk[3], fblock[3]; @@ -482,22 +638,33 @@ int save_stripes(int *source, unsigned long long *offsets, } else { /* for md, q is over 'data_disks' blocks, * starting immediately after 'q' + * Note that for the '_6' variety, the p block + * makes a hole that we need to be careful of. */ - for (i = 0; i < data_disks; i++) { - int dnum = geo_map(i, - start/chunk_size/data_disks, - raid_disks, level, layout); - int snum; + int j; + int snum = 0; + for (j = 0; j < raid_disks; j++) { + int dnum = (qdisk + 1 + j) % raid_disks; + if (dnum == disk || dnum == qdisk) + continue; + for (i = 0; i < data_disks; i++) + if (geo_map(i, + start/chunk_size/data_disks, + raid_disks, level, layout) == dnum) + break; /* i is the logical block number, so is index to 'buf'. * dnum is physical disk number * snum is syndrome disk for which 0 is immediately after Q */ - snum = (raid_disks + dnum - qdisk - 1) % raid_disks; bufs[snum] = (uint8_t*)buf + chunk_size * i; + + if (fblock[0] == i) + fdisk[0] = snum; + if (fblock[1] == i) + fdisk[1] = snum; + snum++; } - fdisk[0] = (raid_disks + fdisk[0] - qdisk - 1) % raid_disks; - fdisk[1] = (raid_disks + fdisk[1] - qdisk - 1) % raid_disks; syndrome_disks = data_disks; } @@ -508,23 +675,21 @@ int save_stripes(int *source, unsigned long long *offsets, if (fblock[1] == data_disks) /* One data failed, and parity failed */ raid6_datap_recov(syndrome_disks+2, chunk_size, - fdisk[0], bufs); + fdisk[0], bufs, 0); else { - if (fdisk[0] > fdisk[1]) { - int t = fdisk[0]; - fdisk[0] = fdisk[1]; - fdisk[1] = t; - } /* Two data blocks failed, P,Q OK */ raid6_2data_recov(syndrome_disks+2, chunk_size, - fdisk[0], fdisk[1], bufs); + fdisk[0], fdisk[1], bufs, 0); } } - - for (i=0; i zero_size) { if (zero) - memset(zero, 0, chunk_size); + free(zero); + zero = xcalloc(1, chunk_size); + zero_size = chunk_size; } + if (stripe_buf == NULL || stripes == NULL || blocks == NULL || zero == NULL) { - free(stripe_buf); - free(stripes); - free(blocks); - free(zero); - return -2; + rv = -2; + goto abort; } - for (i=0; i 0) { - int len = data_disks * chunk_size; + unsigned int len = data_disks * chunk_size; unsigned long long offset; int disk, qdisk; int syndrome_disks; - if (length < len) - return -3; - for (i=0; i < data_disks; i++) { + if (length < len) { + rv = -3; + goto abort; + } + for (i = 0; i < data_disks; i++) { int disk = geo_map(i, start/chunk_size/data_disks, raid_disks, level, layout); - if (lseek64(source, read_offset, 0) != read_offset) - return -1; - if (read(source, stripes[disk], chunk_size) != chunk_size) - return -1; + if (src_buf == NULL) { + /* read from file */ + if (lseek64(source, read_offset, 0) != + (off64_t)read_offset) { + rv = -1; + goto abort; + } + if (read(source, + stripes[disk], + chunk_size) != chunk_size) { + rv = -1; + goto abort; + } + } else { + /* read from input buffer */ + memcpy(stripes[disk], + src_buf + read_offset, + chunk_size); + } read_offset += chunk_size; } /* We have the data, now do the parity */ @@ -622,22 +807,34 @@ int restore_stripes(int *dest, unsigned long long *offsets, syndrome_disks = data_disks; } qsyndrome((uint8_t*)stripes[disk], - (uint8_t*)stripes[qdisk], + (uint8_t*)stripes[qdisk], (uint8_t**)blocks, syndrome_disks, chunk_size); break; } for (i=0; i < raid_disks ; i++) if (dest[i] >= 0) { - if (lseek64(dest[i], offsets[i]+offset, 0) < 0) - return -1; - if (write(dest[i], stripes[i], chunk_size) != chunk_size) - return -1; + if (lseek64(dest[i], + offsets[i]+offset, 0) < 0) { + rv = -1; + goto abort; + } + if (write(dest[i], stripes[i], + chunk_size) != chunk_size) { + rv = -1; + goto abort; + } } length -= len; start += len; } - return 0; + rv = 0; + +abort: + free(stripe_buf); + free(stripes); + free(blocks); + return rv; } #ifdef MAIN @@ -647,14 +844,19 @@ int test_stripes(int *source, unsigned long long *offsets, unsigned long long start, unsigned long long length) { /* ready the data and p (and q) blocks, and check we got them right */ - char *stripe_buf = malloc(raid_disks * chunk_size); - char **stripes = malloc(raid_disks * sizeof(char*)); - char **blocks = malloc(raid_disks * sizeof(char*)); - char *p = malloc(chunk_size); - char *q = malloc(chunk_size); + char *stripe_buf = xmalloc(raid_disks * chunk_size); + char **stripes = xmalloc(raid_disks * sizeof(char*)); + char **blocks = xmalloc(raid_disks * sizeof(char*)); + uint8_t *p = xmalloc(chunk_size); + uint8_t *q = xmalloc(chunk_size); int i; + int diskP, diskQ; int data_disks = raid_disks - (level == 5 ? 1: 2); + + if (!tables_ready) + make_tables(); + for ( i = 0 ; i < raid_disks ; i++) stripes[i] = stripe_buf + i * chunk_size; @@ -674,18 +876,27 @@ int test_stripes(int *source, unsigned long long *offsets, switch(level) { case 6: qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size); - disk = geo_map(-1, start/chunk_size, raid_disks, + diskP = geo_map(-1, start/chunk_size, raid_disks, level, layout); - if (memcmp(p, stripes[disk], chunk_size) != 0) { - printf("P(%d) wrong at %llu\n", disk, + if (memcmp(p, stripes[diskP], chunk_size) != 0) { + printf("P(%d) wrong at %llu\n", diskP, start / chunk_size); } - disk = geo_map(-2, start/chunk_size, raid_disks, + diskQ = geo_map(-2, start/chunk_size, raid_disks, level, layout); - if (memcmp(q, stripes[disk], chunk_size) != 0) { - printf("Q(%d) wrong at %llu\n", disk, + if (memcmp(q, stripes[diskQ], chunk_size) != 0) { + printf("Q(%d) wrong at %llu\n", diskQ, start / chunk_size); } + disk = raid6_check_disks(data_disks, start, chunk_size, + level, layout, diskP, diskQ, + p, q, stripes); + if(disk >= 0) { + printf("Possible failed disk: %d\n", disk); + } + if(disk == -2) { + printf("Failure detected, but disk unknown\n"); + } break; } length -= chunk_size; @@ -705,7 +916,8 @@ unsigned long long getnum(char *str, char **err) return rv; } -main(int argc, char *argv[]) +char const Name[] = "test_restripe"; +int main(int argc, char *argv[]) { /* save/restore file raid_disks chunk_size level layout start length devices... */ @@ -721,8 +933,7 @@ main(int argc, char *argv[]) char *err = NULL; if (argc < 10) { - fprintf(stderr, "Usage: test_stripe save/restore file raid_disks" - " chunk_size level layout start length devices...\n"); + fprintf(stderr, "Usage: test_stripe save/restore file raid_disks chunk_size level layout start length devices...\n"); exit(1); } if (strcmp(argv[1], "save")==0) @@ -752,9 +963,8 @@ main(int argc, char *argv[]) raid_disks, argc-9); exit(2); } - fds = malloc(raid_disks * sizeof(*fds)); - offsets = malloc(raid_disks * sizeof(*offsets)); - memset(offsets, 0, raid_disks * sizeof(*offsets)); + fds = xmalloc(raid_disks * sizeof(*fds)); + offsets = xcalloc(raid_disks, sizeof(*offsets)); storefd = open(file, O_RDWR); if (storefd < 0) { @@ -763,6 +973,14 @@ main(int argc, char *argv[]) exit(3); } for (i=0; i