]> git.ipfire.org Git - thirdparty/mdadm.git/blame - restripe.c
imsm: fix family number handling
[thirdparty/mdadm.git] / restripe.c
CommitLineData
e86c9dd6
NB
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
e736b623 4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
e86c9dd6
NB
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25#include "mdadm.h"
26
27/* To restripe, we read from old geometry to a buffer, and
28 * read from buffer to new geometry.
29 * When reading we don't worry about parity. When writing we do.
30 *
31 */
32
33static int geo_map(int block, unsigned long long stripe, int raid_disks, int level, int layout)
34{
48327135 35 /* On the given stripe, find which disk in the array will have
e86c9dd6 36 * block numbered 'block'.
48327135
NB
37 * '-1' means the parity block.
38 * '-2' means the Q syndrome.
e86c9dd6
NB
39 */
40 int pd;
41
42 switch(level*100 + layout) {
43 case 000:
44 case 400:
45 /* raid 4 isn't messed around by parity blocks */
46 if (block == -1)
47 return raid_disks-1; /* parity block */
48 return block;
49 case 500 + ALGORITHM_LEFT_ASYMMETRIC:
50 pd = (raid_disks-1) - stripe % raid_disks;
51 if (block == -1) return pd;
52 if (block >= pd)
53 block++;
54 return block;
55
56 case 500 + ALGORITHM_RIGHT_ASYMMETRIC:
57 pd = stripe % raid_disks;
58 if (block == -1) return pd;
59 if (block >= pd)
60 block++;
61 return block;
62
63 case 500 + ALGORITHM_LEFT_SYMMETRIC:
64 pd = (raid_disks - 1) - stripe % raid_disks;
65 if (block == -1) return pd;
66 return (pd + 1 + block) % raid_disks;
67
68 case 500 + ALGORITHM_RIGHT_SYMMETRIC:
69 pd = stripe % raid_disks;
70 if (block == -1) return pd;
71 return (pd + 1 + block) % raid_disks;
72
73 case 600 + ALGORITHM_LEFT_ASYMMETRIC:
74 pd = raid_disks - 1 - (stripe % raid_disks);
75 if (block == -1) return pd;
48327135 76 if (block == -2) return (pd+1) % raid_disks;
e86c9dd6
NB
77 if (pd == raid_disks - 1)
78 return block+1;
79 if (block >= pd)
80 return block+2;
81 return block;
82
83 case 600 + ALGORITHM_RIGHT_ASYMMETRIC:
84 pd = stripe % raid_disks;
85 if (block == -1) return pd;
48327135 86 if (block == -2) return (pd+1) % raid_disks;
e86c9dd6
NB
87 if (pd == raid_disks - 1)
88 return block+1;
89 if (block >= pd)
90 return block+2;
91 return block;
92
93 case 600 + ALGORITHM_LEFT_SYMMETRIC:
94 pd = raid_disks - 1 - (stripe % raid_disks);
95 if (block == -1) return pd;
48327135 96 if (block == -2) return (pd+1) % raid_disks;
e86c9dd6
NB
97 return (pd + 2 + block) % raid_disks;
98
99 case 600 + ALGORITHM_RIGHT_SYMMETRIC:
100 pd = stripe % raid_disks;
101 if (block == -1) return pd;
48327135 102 if (block == -2) return (pd+1) % raid_disks;
e86c9dd6
NB
103 return (pd + 2 + block) % raid_disks;
104 }
105 return -1;
106}
107
108
109static void xor_blocks(char *target, char **sources, int disks, int size)
110{
111 int i, j;
112 /* Amazingly inefficient... */
113 for (i=0; i<size; i++) {
114 char c = 0;
115 for (j=0 ; j<disks; j++)
116 c ^= sources[j][i];
117 target[i] = c;
118 }
119}
120
48327135
NB
121static void qsyndrome(char *p, char *q, char **sources, int disks, int size)
122{
123 int d, z;
124 char wq0, wp0, wd0, w10, w20;
125 for ( d = 0; d < size; d++) {
126 wq0 = wp0 = sources[disks-1][d];
127 for ( z = disks-2 ; z >= 0 ; z-- ) {
128 wd0 = sources[z][d];
129 wp0 ^= wd0;
130 w20 = (wq0&0x80) ? 0xff : 0x00;
131 w10 = (wq0 << 1) & 0xff;
132 w20 &= 0x1d;
133 w10 ^= w20;
134 wq0 = w10 ^ wd0;
135 }
136 p[d] = wp0;
137 q[d] = wq0;
138 }
139}
140
e86c9dd6
NB
141/* Save data:
142 * We are given:
143 * A list of 'fds' of the active disks. For now we require all to be present.
48327135 144 * A geometry: raid_disks, chunk_size, level, layout
e86c9dd6
NB
145 * A list of 'fds' for mirrored targets. They are already seeked to
146 * right (Write) location
147 * A start and length
148 */
149
150int save_stripes(int *source, unsigned long long *offsets,
151 int raid_disks, int chunk_size, int level, int layout,
152 int nwrites, int *dest,
153 unsigned long long start, unsigned long long length)
154{
94a20f0c
N
155 char abuf[8192+512];
156 char *buf = (char*)(((unsigned long)abuf+511)&~511UL);
e86c9dd6
NB
157 int cpos = start % chunk_size; /* where in chunk we are up to */
158 int len;
159 int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
160 int disk;
161
162 while (length > 0) {
163 unsigned long long offset;
164 int i;
165 len = chunk_size - cpos;
94a20f0c 166 if (len > 8192) len = 8192;
e86c9dd6
NB
167 if (len > length) len = length;
168 /* len bytes to be moved from one device */
169
170 offset = (start/chunk_size/data_disks)*chunk_size + cpos;
171 disk = start/chunk_size % data_disks;
172 disk = geo_map(disk, start/chunk_size/data_disks,
173 raid_disks, level, layout);
174 if (lseek64(source[disk], offsets[disk]+offset, 0) < 0)
175 return -1;
176 if (read(source[disk], buf, len) != len)
177 return -1;
178 for (i=0; i<nwrites; i++)
179 if (write(dest[i], buf, len) != len)
180 return -1;
181 length -= len;
182 start += len;
183 cpos += len;
184 while (cpos >= chunk_size) cpos -= chunk_size;
185 }
186 return 0;
187}
188
189/* Restore data:
190 * We are given:
191 * A list of 'fds' of the active disks. Some may be '-1' for not-available.
353632d9 192 * A geometry: raid_disks, chunk_size, level, layout
e86c9dd6
NB
193 * An 'fd' to read from. It is already seeked to the right (Read) location.
194 * A start and length.
195 * The length must be a multiple of the stripe size.
196 *
197 * We build a full stripe in memory and then write it out.
198 * We assume that there are enough working devices.
199 */
200int restore_stripes(int *dest, unsigned long long *offsets,
201 int raid_disks, int chunk_size, int level, int layout,
353632d9 202 int source, unsigned long long read_offset,
e86c9dd6
NB
203 unsigned long long start, unsigned long long length)
204{
205 char *stripe_buf = malloc(raid_disks * chunk_size);
206 char **stripes = malloc(raid_disks * sizeof(char*));
207 char **blocks = malloc(raid_disks * sizeof(char*));
208 int i;
209
210 int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
211
212 if (stripe_buf == NULL || stripes == NULL || blocks == NULL) {
213 free(stripe_buf);
214 free(stripes);
215 free(blocks);
216 return -2;
217 }
218 for (i=0; i<raid_disks; i++)
219 stripes[i] = stripe_buf + i * chunk_size;
220 while (length > 0) {
221 int len = data_disks * chunk_size;
222 unsigned long long offset;
48327135 223 int disk, qdisk;
e86c9dd6
NB
224 if (length < len)
225 return -3;
226 for (i=0; i < data_disks; i++) {
227 int disk = geo_map(i, start/chunk_size/data_disks,
228 raid_disks, level, layout);
229 blocks[i] = stripes[disk];
353632d9
NB
230 if (lseek64(source, read_offset, 0) != read_offset)
231 return -1;
e86c9dd6
NB
232 if (read(source, stripes[disk], chunk_size) != chunk_size)
233 return -1;
353632d9 234 read_offset += chunk_size;
e86c9dd6
NB
235 }
236 /* We have the data, now do the parity */
237 offset = (start/chunk_size/data_disks) * chunk_size;
48327135
NB
238 switch (level) {
239 case 4:
240 case 5:
241 disk = geo_map(-1, start/chunk_size/data_disks,
e86c9dd6
NB
242 raid_disks, level, layout);
243 xor_blocks(stripes[disk], blocks, data_disks, chunk_size);
48327135
NB
244 break;
245 case 6:
246 disk = geo_map(-1, start/chunk_size/data_disks,
247 raid_disks, level, layout);
248 qdisk = geo_map(-2, start/chunk_size/data_disks,
249 raid_disks, level, layout);
250
251 qsyndrome(stripes[disk], stripes[qdisk], blocks,
252 data_disks, chunk_size);
253 break;
e86c9dd6
NB
254 }
255 for (i=0; i < raid_disks ; i++)
256 if (dest[i] >= 0) {
257 if (lseek64(dest[i], offsets[i]+offset, 0) < 0)
258 return -1;
259 if (write(dest[i], stripes[i], chunk_size) != chunk_size)
260 return -1;
261 }
262 length -= len;
263 start += len;
264 }
265 return 0;
266}
267
268#ifdef MAIN
269
48327135
NB
270int test_stripes(int *source, unsigned long long *offsets,
271 int raid_disks, int chunk_size, int level, int layout,
272 unsigned long long start, unsigned long long length)
273{
274 /* ready the data and p (and q) blocks, and check we got them right */
275 char *stripe_buf = malloc(raid_disks * chunk_size);
276 char **stripes = malloc(raid_disks * sizeof(char*));
277 char **blocks = malloc(raid_disks * sizeof(char*));
278 char *p = malloc(chunk_size);
279 char *q = malloc(chunk_size);
280
281 int i;
282 int data_disks = raid_disks - (level == 5 ? 1: 2);
283 for ( i = 0 ; i < raid_disks ; i++)
284 stripes[i] = stripe_buf + i * chunk_size;
285
286 while (length > 0) {
287 int disk;
288
289 for (i = 0 ; i < raid_disks ; i++) {
290 lseek64(source[i], offsets[i]+start, 0);
291 read(source[i], stripes[i], chunk_size);
292 }
293 for (i = 0 ; i < data_disks ; i++) {
294 int disk = geo_map(i, start/chunk_size, raid_disks,
295 level, layout);
296 blocks[i] = stripes[disk];
297 printf("%d->%d\n", i, disk);
298 }
299 switch(level) {
300 case 6:
301 qsyndrome(p, q, blocks, data_disks, chunk_size);
302 disk = geo_map(-1, start/chunk_size, raid_disks,
303 level, layout);
304 if (memcmp(p, stripes[disk], chunk_size) != 0) {
305 printf("P(%d) wrong at %llu\n", disk,
306 start / chunk_size);
307 }
308 disk = geo_map(-2, start/chunk_size, raid_disks,
309 level, layout);
310 if (memcmp(q, stripes[disk], chunk_size) != 0) {
311 printf("Q(%d) wrong at %llu\n", disk,
312 start / chunk_size);
313 }
314 break;
315 }
316 length -= chunk_size;
317 start += chunk_size;
318 }
319 return 0;
320}
321
e86c9dd6
NB
322unsigned long long getnum(char *str, char **err)
323{
324 char *e;
325 unsigned long long rv = strtoull(str, &e, 10);
326 if (e==str || *e) {
327 *err = str;
328 return 0;
329 }
330 return rv;
331}
332
333main(int argc, char *argv[])
334{
335 /* save/restore file raid_disks chunk_size level layout start length devices...
336 */
337 int save;
338 int *fds;
339 char *file;
340 int storefd;
341 unsigned long long *offsets;
342 int raid_disks, chunk_size, level, layout;
343 unsigned long long start, length;
344 int i;
345
346 char *err = NULL;
347 if (argc < 10) {
348 fprintf(stderr, "Usage: test_stripe save/restore file raid_disks"
349 " chunk_size level layout start length devices...\n");
350 exit(1);
351 }
352 if (strcmp(argv[1], "save")==0)
353 save = 1;
354 else if (strcmp(argv[1], "restore") == 0)
355 save = 0;
48327135
NB
356 else if (strcmp(argv[1], "test") == 0)
357 save = 2;
e86c9dd6
NB
358 else {
359 fprintf(stderr, "test_stripe: must give 'save' or 'restore'.\n");
360 exit(2);
361 }
362
363 file = argv[2];
364 raid_disks = getnum(argv[3], &err);
365 chunk_size = getnum(argv[4], &err);
366 level = getnum(argv[5], &err);
367 layout = getnum(argv[6], &err);
368 start = getnum(argv[7], &err);
369 length = getnum(argv[8], &err);
370 if (err) {
371 fprintf(stderr, "test_stripe: Bad number: %s\n", err);
372 exit(2);
373 }
374 if (argc != raid_disks + 9) {
375 fprintf(stderr, "test_stripe: wrong number of devices: want %d found %d\n",
376 raid_disks, argc-9);
377 exit(2);
378 }
379 fds = malloc(raid_disks * sizeof(*fds));
380 offsets = malloc(raid_disks * sizeof(*offsets));
381 memset(offsets, 0, raid_disks * sizeof(*offsets));
382
383 storefd = open(file, O_RDWR);
384 if (storefd < 0) {
385 perror(file);
386 fprintf(stderr, "test_stripe: could not open %s.\n", file);
387 exit(3);
388 }
389 for (i=0; i<raid_disks; i++) {
390 fds[i] = open(argv[9+i], O_RDWR);
391 if (fds[i] < 0) {
392 perror(argv[9+i]);
393 fprintf(stderr,"test_stripe: cannot open %s.\n", argv[9+i]);
394 exit(3);
395 }
396 }
397
48327135 398 if (save == 1) {
e86c9dd6
NB
399 int rv = save_stripes(fds, offsets,
400 raid_disks, chunk_size, level, layout,
401 1, &storefd,
402 start, length);
403 if (rv != 0) {
48327135
NB
404 fprintf(stderr,
405 "test_stripe: save_stripes returned %d\n", rv);
406 exit(1);
407 }
408 } else if (save == 2) {
409 int rv = test_stripes(fds, offsets,
410 raid_disks, chunk_size, level, layout,
411 start, length);
412 if (rv != 0) {
413 fprintf(stderr,
414 "test_stripe: test_stripes returned %d\n", rv);
e86c9dd6
NB
415 exit(1);
416 }
417 } else {
418 int rv = restore_stripes(fds, offsets,
419 raid_disks, chunk_size, level, layout,
353632d9 420 storefd, 0ULL,
e86c9dd6
NB
421 start, length);
422 if (rv != 0) {
48327135
NB
423 fprintf(stderr,
424 "test_stripe: restore_stripes returned %d\n",
425 rv);
e86c9dd6
NB
426 exit(1);
427 }
428 }
429 exit(0);
430}
431
432#endif /* MAIN */