X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=restripe.c;h=90896c89de0260a4c610d8ebdab7c5ffe3d031f8;hb=66eb2c93a619eb1d79dc653fd91add159aa3d1ff;hp=ea348e1f8cf8d71b0f2e889ace19e885cb2b9177;hpb=a628848379c07c79485a49c7f0c684ece02ae3b7;p=thirdparty%2Fmdadm.git diff --git a/restripe.c b/restripe.c index ea348e1f..90896c89 100644 --- a/restripe.c +++ b/restripe.c @@ -1,7 +1,7 @@ /* * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2006 Neil Brown + * Copyright (C) 2006-2009 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -33,7 +33,7 @@ * */ -static int geo_map(int block, unsigned long long stripe, int raid_disks, +int geo_map(int block, unsigned long long stripe, int raid_disks, int level, int layout) { /* On the given stripe, find which disk in the array will have @@ -43,6 +43,11 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, */ int pd; + /* layout is not relevant for raid0 and raid4 */ + if ((level == 0) || + (level == 4)) + layout = 0; + switch(level*100 + layout) { case 000: case 400: @@ -206,7 +211,7 @@ static int is_ddf(int layout) } -static void xor_blocks(char *target, char **sources, int disks, int size) +void xor_blocks(char *target, char **sources, int disks, int size) { int i, j; /* Amazingly inefficient... */ @@ -218,7 +223,7 @@ static void xor_blocks(char *target, char **sources, int disks, int size) } } -static void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size) +void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size) { int d, z; uint8_t wq0, wp0, wd0, w10, w20; @@ -280,10 +285,13 @@ uint8_t raid6_gfmul[256][256]; uint8_t raid6_gfexp[256]; uint8_t raid6_gfinv[256]; uint8_t raid6_gfexi[256]; +uint8_t raid6_gflog[256]; +uint8_t raid6_gfilog[256]; void make_tables(void) { int i, j; uint8_t v; + uint32_t b, log; /* Compute multiplication table */ for (i = 0; i < 256; i++) @@ -307,10 +315,37 @@ void make_tables(void) for (i = 0; i < 256; i ++) raid6_gfexi[i] = raid6_gfinv[raid6_gfexp[i] ^ 1]; + /* Compute log and inverse log */ + /* Modified code from: + * http://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.html + */ + b = 1; + raid6_gflog[0] = 0; + raid6_gfilog[255] = 0; + + for (log = 0; log < 255; log++) { + raid6_gflog[b] = (uint8_t) log; + raid6_gfilog[log] = (uint8_t) b; + b = b << 1; + if (b & 256) b = b ^ 0435; + } + tables_ready = 1; } uint8_t *zero; +int zero_size; + +void ensure_zero_has_size(int chunk_size) +{ + if (zero == NULL || chunk_size > zero_size) { + if (zero) + free(zero); + zero = xcalloc(1, chunk_size); + zero_size = chunk_size; + } +} + /* Following was taken from linux/drivers/md/raid6recov.c */ /* Recover two failed data blocks. */ @@ -382,16 +417,96 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs) } } -/* Save data: - * We are given: - * A list of 'fds' of the active disks. Some may be absent. - * A geometry: raid_disks, chunk_size, level, layout - * A list of 'fds' for mirrored targets. They are already seeked to - * right (Write) location - * A start and length which must be stripe-aligned - * 'buf' is large enough to hold one stripe, and is aligned - */ +/* Try to find out if a specific disk has a problem */ +int raid6_check_disks(int data_disks, int start, int chunk_size, + int level, int layout, int diskP, int diskQ, + char *p, char *q, char **stripes) +{ + int i; + int data_id, diskD; + uint8_t Px, Qx; + int curr_broken_disk = -1; + int prev_broken_disk = -1; + int broken_status = 0; + + for(i = 0; i < chunk_size; i++) { + Px = (uint8_t)stripes[diskP][i] ^ (uint8_t)p[i]; + Qx = (uint8_t)stripes[diskQ][i] ^ (uint8_t)q[i]; + + if((Px != 0) && (Qx == 0)) + curr_broken_disk = diskP; + + if((Px == 0) && (Qx != 0)) + curr_broken_disk = diskQ; + + + if((Px != 0) && (Qx != 0)) { + data_id = (raid6_gflog[Qx] - raid6_gflog[Px]); + if(data_id < 0) data_id += 255; + diskD = geo_map(data_id, start/chunk_size, + data_disks + 2, level, layout); + curr_broken_disk = diskD; + } + + if((Px == 0) && (Qx == 0)) + curr_broken_disk = curr_broken_disk; + + if(curr_broken_disk >= data_disks + 2) + broken_status = 2; + + switch(broken_status) { + case 0: + if(curr_broken_disk != -1) { + prev_broken_disk = curr_broken_disk; + broken_status = 1; + } + break; + + case 1: + if(curr_broken_disk != prev_broken_disk) + broken_status = 2; + break; + + case 2: + default: + curr_broken_disk = prev_broken_disk = -2; + break; + } + } + + return curr_broken_disk; +} + +/******************************************************************************* + * Function: save_stripes + * Description: + * Function reads data (only data without P and Q) from array and writes + * it to buf and opcjonaly to backup files + * Parameters: + * source : A list of 'fds' of the active disks. + * Some may be absent + * offsets : A list of offsets on disk belonging + * to the array [bytes] + * raid_disks : geometry: number of disks in the array + * chunk_size : geometry: chunk size [bytes] + * level : geometry: RAID level + * layout : geometry: layout + * nwrites : number of backup files + * dest : A list of 'fds' for mirrored targets + * (e.g. backup files). They are already seeked to right + * (write) location. If NULL, data will be wrote + * to the buf only + * start : start address of data to read (must be stripe-aligned) + * [bytes] + * length - : length of data to read (must be stripe-aligned) + * [bytes] + * buf : buffer for data. It is large enough to hold + * one stripe. It is stripe aligned + * Returns: + * 0 : success + * -1 : fail + ******************************************************************************/ int save_stripes(int *source, unsigned long long *offsets, int raid_disks, int chunk_size, int level, int layout, int nwrites, int *dest, @@ -402,31 +517,41 @@ int save_stripes(int *source, unsigned long long *offsets, int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2); int disk; int i; + unsigned long long length_test; if (!tables_ready) make_tables(); + ensure_zero_has_size(chunk_size); - if (zero == NULL) { - zero = malloc(chunk_size); - memset(zero, 0, chunk_size); + len = data_disks * chunk_size; + length_test = length / len; + length_test *= len; + + if (length != length_test) { + dprintf("Error: save_stripes(): Data are not alligned. EXIT\n"); + dprintf("\tArea for saving stripes (length) = %llu\n", length); + dprintf("\tWork step (len) = %i\n", len); + dprintf("\tExpected save area (length_test) = %llu\n", + length_test); + abort(); } - len = data_disks * chunk_size; while (length > 0) { int failed = 0; int fdisk[3], fblock[3]; for (disk = 0; disk < raid_disks ; disk++) { unsigned long long offset; int dnum; - len = chunk_size; offset = (start/chunk_size/data_disks)*chunk_size; dnum = geo_map(disk < data_disks ? disk : data_disks - disk - 1, start/chunk_size/data_disks, raid_disks, level, layout); + if (dnum < 0) abort(); if (source[dnum] < 0 || - lseek64(source[dnum], offsets[disk]+offset, 0) < 0 || - read(source[dnum], buf+disk * chunk_size, len) != len) + lseek64(source[dnum], offsets[dnum]+offset, 0) < 0 || + read(source[dnum], buf+disk * chunk_size, chunk_size) + != chunk_size) if (failed <= 2) { fdisk[failed] = dnum; fblock[failed] = disk; @@ -464,38 +589,80 @@ int save_stripes(int *source, unsigned long long *offsets, * 'p' and 'q' get to be all zero */ for (i = 0; i < raid_disks; i++) - if (i == disk || i == qdisk) - bufs[i] = zero; - else - bufs[i] = (uint8_t*)buf+i*chunk_size; + bufs[i] = zero; + for (i = 0; i < data_disks; i++) { + int dnum = geo_map(i, + start/chunk_size/data_disks, + raid_disks, level, layout); + int snum; + /* i is the logical block number, so is index to 'buf'. + * dnum is physical disk number + * and thus the syndrome number. + */ + snum = dnum; + bufs[snum] = (uint8_t*)buf + chunk_size * i; + } syndrome_disks = raid_disks; } else { /* for md, q is over 'data_disks' blocks, * starting immediately after 'q' + * Note that for the '_6' variety, the p block + * makes a hole that we need to be careful of. */ - for (i = 0; i < data_disks; i++) - bufs[i] = (uint8_t*)buf + chunk_size * ((qdisk+1+i) % raid_disks); + int j; + int snum = 0; + for (j = 0; j < raid_disks; j++) { + int dnum = (qdisk + 1 + j) % raid_disks; + if (dnum == disk || dnum == qdisk) + continue; + for (i = 0; i < data_disks; i++) + if (geo_map(i, + start/chunk_size/data_disks, + raid_disks, level, layout) == dnum) + break; + /* i is the logical block number, so is index to 'buf'. + * dnum is physical disk number + * snum is syndrome disk for which 0 is immediately after Q + */ + bufs[snum] = (uint8_t*)buf + chunk_size * i; + + if (fblock[0] == i) + fdisk[0] = snum; + if (fblock[1] == i) + fdisk[1] = snum; + snum++; + } - fdisk[0] = (qdisk + 1 + fdisk[0]) * raid_disks; - fdisk[1] = (qdisk + 1 + fdisk[1]) * raid_disks; syndrome_disks = data_disks; } - bufs[syndrome_disks] = (uint8_t*)buf + chunk_size * disk; - bufs[syndrome_disks+1] = (uint8_t*)buf + chunk_size * qdisk; + + /* Place P and Q blocks at end of bufs */ + bufs[syndrome_disks] = (uint8_t*)buf + chunk_size * data_disks; + bufs[syndrome_disks+1] = (uint8_t*)buf + chunk_size * (data_disks+1); + if (fblock[1] == data_disks) /* One data failed, and parity failed */ raid6_datap_recov(syndrome_disks+2, chunk_size, fdisk[0], bufs); - else + else { + if (fdisk[0] > fdisk[1]) { + int t = fdisk[0]; + fdisk[0] = fdisk[1]; + fdisk[1] = t; + } /* Two data blocks failed, P,Q OK */ raid6_2data_recov(syndrome_disks+2, chunk_size, fdisk[0], fdisk[1], bufs); + } + } + if (dest) { + for (i = 0; i < nwrites; i++) + if (write(dest[i], buf, len) != len) + return -1; + } else { + /* build next stripe in buffer */ + buf += len; } - - for (i=0; i zero_size) { if (zero) - memset(zero, 0, chunk_size); + free(zero); + zero = xcalloc(1, chunk_size); + zero_size = chunk_size; } + if (stripe_buf == NULL || stripes == NULL || blocks == NULL || zero == NULL) { - free(stripe_buf); - free(stripes); - free(blocks); - free(zero); - return -2; + rv = -2; + goto abort; } - for (i=0; i 0) { - int len = data_disks * chunk_size; + unsigned int len = data_disks * chunk_size; unsigned long long offset; int disk, qdisk; int syndrome_disks; - if (length < len) - return -3; - for (i=0; i < data_disks; i++) { + if (length < len) { + rv = -3; + goto abort; + } + for (i = 0; i < data_disks; i++) { int disk = geo_map(i, start/chunk_size/data_disks, raid_disks, level, layout); - if (lseek64(source, read_offset, 0) != read_offset) - return -1; - if (read(source, stripes[disk], chunk_size) != chunk_size) - return -1; + if (src_buf == NULL) { + /* read from file */ + if (lseek64(source, read_offset, 0) != + (off64_t)read_offset) { + rv = -1; + goto abort; + } + if (read(source, + stripes[disk], + chunk_size) != chunk_size) { + rv = -1; + goto abort; + } + } else { + /* read from input buffer */ + memcpy(stripes[disk], + src_buf + read_offset, + chunk_size); + } read_offset += chunk_size; } /* We have the data, now do the parity */ @@ -599,15 +787,27 @@ int restore_stripes(int *dest, unsigned long long *offsets, } for (i=0; i < raid_disks ; i++) if (dest[i] >= 0) { - if (lseek64(dest[i], offsets[i]+offset, 0) < 0) - return -1; - if (write(dest[i], stripes[i], chunk_size) != chunk_size) - return -1; + if (lseek64(dest[i], + offsets[i]+offset, 0) < 0) { + rv = -1; + goto abort; + } + if (write(dest[i], stripes[i], + chunk_size) != chunk_size) { + rv = -1; + goto abort; + } } length -= len; start += len; } - return 0; + rv = 0; + +abort: + free(stripe_buf); + free(stripes); + free(blocks); + return rv; } #ifdef MAIN @@ -617,14 +817,19 @@ int test_stripes(int *source, unsigned long long *offsets, unsigned long long start, unsigned long long length) { /* ready the data and p (and q) blocks, and check we got them right */ - char *stripe_buf = malloc(raid_disks * chunk_size); - char **stripes = malloc(raid_disks * sizeof(char*)); - char **blocks = malloc(raid_disks * sizeof(char*)); - char *p = malloc(chunk_size); - char *q = malloc(chunk_size); + char *stripe_buf = xmalloc(raid_disks * chunk_size); + char **stripes = xmalloc(raid_disks * sizeof(char*)); + char **blocks = xmalloc(raid_disks * sizeof(char*)); + char *p = xmalloc(chunk_size); + char *q = xmalloc(chunk_size); int i; + int diskP, diskQ; int data_disks = raid_disks - (level == 5 ? 1: 2); + + if (!tables_ready) + make_tables(); + for ( i = 0 ; i < raid_disks ; i++) stripes[i] = stripe_buf + i * chunk_size; @@ -643,19 +848,28 @@ int test_stripes(int *source, unsigned long long *offsets, } switch(level) { case 6: - qsyndrome(p, q, blocks, data_disks, chunk_size); - disk = geo_map(-1, start/chunk_size, raid_disks, + qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size); + diskP = geo_map(-1, start/chunk_size, raid_disks, level, layout); - if (memcmp(p, stripes[disk], chunk_size) != 0) { - printf("P(%d) wrong at %llu\n", disk, + if (memcmp(p, stripes[diskP], chunk_size) != 0) { + printf("P(%d) wrong at %llu\n", diskP, start / chunk_size); } - disk = geo_map(-2, start/chunk_size, raid_disks, + diskQ = geo_map(-2, start/chunk_size, raid_disks, level, layout); - if (memcmp(q, stripes[disk], chunk_size) != 0) { - printf("Q(%d) wrong at %llu\n", disk, + if (memcmp(q, stripes[diskQ], chunk_size) != 0) { + printf("Q(%d) wrong at %llu\n", diskQ, start / chunk_size); } + disk = raid6_check_disks(data_disks, start, chunk_size, + level, layout, diskP, diskQ, + p, q, stripes); + if(disk >= 0) { + printf("Possible failed disk: %d\n", disk); + } + if(disk == -2) { + printf("Failure detected, but disk unknown\n"); + } break; } length -= chunk_size; @@ -722,9 +936,8 @@ main(int argc, char *argv[]) raid_disks, argc-9); exit(2); } - fds = malloc(raid_disks * sizeof(*fds)); - offsets = malloc(raid_disks * sizeof(*offsets)); - memset(offsets, 0, raid_disks * sizeof(*offsets)); + fds = xmalloc(raid_disks * sizeof(*fds)); + offsets = xcalloc(raid_disks, sizeof(*offsets)); storefd = open(file, O_RDWR); if (storefd < 0) { @@ -733,6 +946,14 @@ main(int argc, char *argv[]) exit(3); } for (i=0; i